rocketride-org · TrishaReddygari · May 3, 2026 · May 3, 2026 · May 3, 2026
@@ -100,6 +100,25 @@ jobs:
     steps:
       - run: echo "Build skipped — no code changes detected"
 
+  # ---------------------------------------------------------------------------
+  #  Python lint — pinned ruff format check on every PR/push.
+  #  Lightweight (~1 min); does not depend on init/build.
+  # ---------------------------------------------------------------------------
+  lint-python:
+    name: Lint Python (ruff format)
+    if: github.event_name != 'schedule'
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Checkout
+        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+
+      - name: Install ruff
+        run: pip install ruff==0.15.12
+
+      - name: Check formatting
+        run: ruff format --check .
+
   # ---------------------------------------------------------------------------
   #  CodeQL — scheduled + push only (not on PRs to avoid blocking releases)
   # ---------------------------------------------------------------------------
@@ -201,19 +220,19 @@ jobs:
   # ---------------------------------------------------------------------------
   ci-ok:
     name: CI OK
-    needs: [init, changes, build]
+    needs: [init, changes, build, lint-python]
     if: always()
     runs-on: ubuntu-latest
     steps:
       - name: Verify results
         run: |
           if [[ "${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}" == "true" ]]; then
             echo "❌ One or more CI jobs failed or were cancelled."
-            echo "Results: init=${{ needs.init.result }}, changes=${{ needs.changes.result }}, build=${{ needs.build.result }}"
+            echo "Results: init=${{ needs.init.result }}, changes=${{ needs.changes.result }}, build=${{ needs.build.result }}, lint-python=${{ needs.lint-python.result }}"
             exit 1
           fi
           echo "✅ All CI checks passed (or were correctly skipped)."
-          echo "Results: init=${{ needs.init.result }}, changes=${{ needs.changes.result }}, build=${{ needs.build.result }}"
+          echo "Results: init=${{ needs.init.result }}, changes=${{ needs.changes.result }}, build=${{ needs.build.result }}, lint-python=${{ needs.lint-python.result }}"
 
   # ---------------------------------------------------------------------------
   #  Discord — update PR embed with final check counts after CI completes.

@@ -133,7 +133,7 @@ We follow [Conventional Commits](https://www.conventionalcommits.org/):
 
 - Follow PEP 8 style guidelines
 - Use type hints where appropriate
-- Use single quotes for strings (as configured in ruff)
+- Use single quotes for strings (as configured in ruff — see [Formatting](#formatting) below)
 - Add docstrings to all public functions and classes
 - Include MIT license header in new files
 
@@ -145,6 +145,34 @@ We follow [Conventional Commits](https://www.conventionalcommits.org/):
 - Add JSDoc comments to public APIs
 - Include MIT license header in new files
 
+## Formatting
+
+Python files in this repo are formatted with [ruff](https://docs.astral.sh/ruff/). The configuration lives in `[tool.ruff]` in `pyproject.toml`.
+
+### Install ruff
+
+```bash
+pip install ruff==0.15.12
+```
+
+The version is pinned to match what CI runs — using a different version may produce different formatting and fail the format check.
+
+### Format and check
+
+```bash
+# Reformat all Python files in place
+ruff format .
+
+# Check formatting without modifying files (what CI runs)
+ruff format --check .
+```
+
+### CI enforcement
+
+The `lint-python` job in `.github/workflows/ci.yml` runs `ruff format --check` on every PR. If it fails, run `ruff format .` locally, commit the result, and push.
+
+> Ruff is the formatter only at this point — full lint enforcement (`ruff check`) is tracked separately while existing violations are addressed.
+
 ## Testing
 
 ### Running Tests

@@ -67,7 +67,7 @@
 
 # Spatial format prompt modifiers
 SPATIAL_PROMPTS = {
-    'clock': '\n\nUse clock positions for spatial references (12 o\'clock = straight ahead).',
+    'clock': "\n\nUse clock positions for spatial references (12 o'clock = straight ahead).",
     'relative': '\n\nUse relative directions (left, right, ahead, behind) for spatial references.',
     'both': '\n\nUse both clock positions and relative directions for spatial references.',
 }
@@ -98,31 +98,18 @@ def __init__(self, provider: str, connConfig: dict[str, Any], bag: dict[str, Any
         spatial_format = config.get('accessibility.spatialFormat', 'clock')
 
         # Build system prompt with config modifiers
-        self._system_prompt = (
-            config.get('accessibility.systemPrompt')
-            or config.get('systemPrompt')
-            or DEFAULT_SYSTEM_PROMPT
-        )
+        self._system_prompt = config.get('accessibility.systemPrompt') or config.get('systemPrompt') or DEFAULT_SYSTEM_PROMPT
         self._system_prompt += HAZARD_PROMPTS.get(hazard_priority, '')
         self._system_prompt += SPATIAL_PROMPTS.get(spatial_format, '')
 
-        self._prompt = (
-            config.get('accessibility.prompt')
-            or config.get('prompt')
-            or DEFAULT_PROMPT
-        )
+        self._prompt = config.get('accessibility.prompt') or config.get('prompt') or DEFAULT_PROMPT
 
         if not api_key:
-            raise ValueError(
-                'Missing Google AI API key. Get one at https://aistudio.google.com/apikey'
-            )
+            raise ValueError('Missing Google AI API key. Get one at https://aistudio.google.com/apikey')
 
         # Validate the API key format
         if api_key.startswith('sk-'):
-            raise ValueError(
-                'Invalid API key format. This appears to be an OpenAI key. '
-                'Please provide a Google AI API key.'
-            )
+            raise ValueError('Invalid API key format. This appears to be an OpenAI key. Please provide a Google AI API key.')
 
         try:
             self._client = genai.Client(api_key=api_key)
@@ -157,7 +144,7 @@ def _format_user_error(self, error_msg: str) -> str:
         if any(phrase in error_lower for phrase in ['invalid input', 'bad request', '400']):
             return 'Invalid input. Please check your image format and prompt.'
         if any(phrase in error_lower for phrase in ['model not found', 'unavailable', 'not supported']):
-            return f'Model \'{self._model}\' is currently unavailable. Please try a different model.'
+            return f"Model '{self._model}' is currently unavailable. Please try a different model."
         if any(phrase in error_lower for phrase in ['timeout', 'timed out']):
             return 'Request timed out. Please try again.'
         if any(phrase in error_lower for phrase in ['content policy', 'safety', 'blocked']):
@@ -199,9 +186,7 @@ def chat(self, question: Question) -> Answer:
             mime_type = header.split(':')[1].split(';')[0]
             image_bytes = base64.b64decode(b64_data)
         except (ValueError, IndexError, base64.binascii.Error) as e:
-            raise ValueError(
-                'Malformed image data URL. Expected format: data:<mime>;base64,<data>'
-            ) from e
+            raise ValueError('Malformed image data URL. Expected format: data:<mime>;base64,<data>') from e
 
         # Build request contents once (deterministic, no need to rebuild per retry)
         contents = [
@@ -233,7 +218,7 @@ def chat(self, question: Question) -> Answer:
             except Exception as e:
                 last_error = e
                 if attempt < max_retries and self._shouldRetry(e):
-                    delay = base_delay * (2 ** attempt)
+                    delay = base_delay * (2**attempt)
                     time.sleep(delay)
                     continue
                 break

@@ -42,6 +42,7 @@ def beginGlobal(self) -> None:
         config handling is needed here.
         """
         from .rocketride_agent import RocketRideDriver
+
         self.agent = RocketRideDriver(self)
 
     def endGlobal(self) -> None:

@@ -27,15 +27,10 @@
 
 class IInstance(IInstanceBase):
     IGlobal: IGlobal  # Reference to a global context providing recognizer functionality
-    
+
     # Default PII labels for zero-shot NER when no classifications provided
-    DEFAULT_PII_LABELS = [
-        'person', 'name', 'email', 'phone number', 'address',
-        'social security number', 'credit card number', 'date of birth',
-        'organization', 'company', 'location', 'ip address',
-        'bank account', 'passport number', 'driver license'
-    ]
-
+    DEFAULT_PII_LABELS = ['person', 'name', 'email', 'phone number', 'address', 'social security number', 'credit card number', 'date of birth', 'organization', 'company', 'location', 'ip address', 'bank account', 'passport number', 'driver license']
+
     #
     # Current object context properties
     #
@@ -59,7 +54,7 @@ def closing(self):
         if not self.has_classifications:
             # No classifications received - anonymize with default PII labels
             self.target_object_text = self.IGlobal.recognizer.process(self.target_object_text, self.DEFAULT_PII_LABELS)
-        
+
         # Resume the writeText lane
         self.instance.writeText(self.target_object_text)
 

@@ -21,6 +21,7 @@
 # SOFTWARE.
 # =============================================================================
 
+
 def anonymize(text: str, matches, anonymize_char: str = '*') -> str:
     """Replace specified segments with a sequence of anonymization characters.
 

@@ -36,7 +36,7 @@ class GliNERRecognizer:
     def __init__(self, provider: str, connConfig: Dict[str, Any], bag: Dict[str, Any]):
         """
         Initialize the GLiNER Recognizer.
-        
+
         Uses ai.common.models.GLiNER which automatically routes to model server
         if --modelserver flag is present, otherwise runs locally.
         """
@@ -50,7 +50,7 @@ def __init__(self, provider: str, connConfig: Dict[str, Any], bag: Dict[str, Any
         enginePath = expand('%execPath%')
         rule_file_path = os.path.join(enginePath, 'nucleuz', 'rulePack.dat')
         self.ruleParser = RuleParser(rule_file_path)
-        
+
         # Use ai.common.models.GLiNER - auto-detects local vs model server mode
         self.model = GLiNER(self.model_name)
 
@@ -182,34 +182,34 @@ def process_chunk(chunk_idx):
     def process(self, text: str, labels: list, existing_matches: list = None) -> str:
         """
         Core anonymization method - detects entities using GLiNER and masks them.
-        
+
         Args:
             text: The text to anonymize
             labels: Entity labels to detect
             existing_matches: Optional list of (offset, length) tuples from classifications
-            
+
         Returns:
             Anonymized text with detected entities replaced by anonymize_char
         """
         if not text:
             return text
-        
+
         # Run NER prediction
         ner_results = self.predict(text, labels)
         ner_matches = self.convert_ner_results_to_matches(ner_results)
-        
+
         debug(f'Anonymize: Detected {len(ner_results)} entities')
-        
+
         # Combine with existing matches (from classifications)
         all_matches = list(existing_matches or []) + ner_matches
-        
+
         if not all_matches:
             debug('Anonymize: No entities to mask')
             return text
-        
+
         # Sort by offset and apply masking
         all_matches_sorted = sorted(all_matches, key=lambda x: x[0])
-        
+
         return _anonymize(text, all_matches_sorted, self.anonymize_char)
 
     def handleClassifications(self, classifications: dict, target_object_text: str, classificationPolicy: any, classificationRules: any):
@@ -234,9 +234,6 @@ def handleClassifications(self, classifications: dict, target_object_text: str,
         labels = self.ruleParser.get_rules_names(unique_id_refs) + rules
 
         # Extract existing matches from classifications (offset, length tuples)
-        existing_matches = list(
-            (m['offset'], m['length']) 
-            for m in ((m.get('location', {}).get('inChars') or m) for m in text_matches)
-        )
+        existing_matches = list((m['offset'], m['length']) for m in ((m.get('location', {}).get('inChars') or m) for m in text_matches))
 
         return self.process(target_object_text, labels, existing_matches)
@@ -66,10 +66,7 @@ def transcribe(self, audio: Any) -> List[SimpleNamespace]:
             )
 
         segments = result.get('$segments') or []
-        return [
-            SimpleNamespace(text=s.get('text', ''), start=s.get('start', 0.0), end=s.get('end', 0.0))
-            for s in segments
-        ]
+        return [SimpleNamespace(text=s.get('text', ''), start=s.get('start', 0.0), end=s.get('end', 0.0)) for s in segments]
 
     def _audio_to_pcm_bytes(self, audio: Any) -> bytes:
         """Convert audio (bytes or float32 numpy) to PCM int16 bytes (16 kHz mono)."""

@@ -80,6 +80,7 @@ def _ensure_spacy_en_model() -> None:
         """Install ``en_core_web_sm`` matching the installed spaCy version (GitHub wheel)."""
         try:
             import en_core_web_sm  # noqa: F401
+
             return
         except ImportError:
             pass

@@ -40,11 +40,11 @@ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]:
         # Config.getNodeConfig() strips the node namespace prefix before returning;
         # keys are unprefixed here by design (e.g. 'host', not 'mysql.host').
         return {
-            'host':     config.get('host', 'localhost').strip(),
-            'user':     config.get('user', 'root').strip(),
+            'host': config.get('host', 'localhost').strip(),
+            'user': config.get('user', 'root').strip(),
             'password': config.get('password', ''),  # Do not strip — whitespace is valid in passwords
             'database': config.get('database', 'database').strip(),
-            'table':    config.get('table', 'table').strip(),
+            'table': config.get('table', 'table').strip(),
         }
 
     def _build_connection_url(self, params: Dict[str, str]) -> str:

@@ -40,11 +40,11 @@ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]:
         # Config.getNodeConfig() strips the node namespace prefix before returning;
         # keys are unprefixed here by design (e.g. 'host', not 'postgresdb.host').
         return {
-            'host':     config.get('host', 'localhost').strip(),
-            'user':     config.get('user', 'postgres').strip(),
+            'host': config.get('host', 'localhost').strip(),
+            'user': config.get('user', 'postgres').strip(),
             'password': config.get('password', ''),  # Do not strip — whitespace is valid in passwords
             'database': config.get('database', 'postgres').strip(),
-            'table':    config.get('table', 'table').strip(),
+            'table': config.get('table', 'table').strip(),
         }
 
     def _build_connection_url(self, params: Dict[str, str]) -> str:

@@ -27,9 +27,9 @@
 Defines the pipeline endpoint; configuration and lifecycle are handled by
 IGlobal and IInstance.
 """
+
 from ai.common.transform import IEndpointTransform
 
 
 class IEndpoint(IEndpointTransform):
     """Endpoint for index_search (Elasticsearch and OpenSearch). No extra config."""
-
-Original file line number
+Diff line change
@@ Expand Up / @@ -80,6 +80,7 @@ def _ensure_spacy_en_model() -> None: @@
             """Install ``en_core_web_sm`` matching the installed spaCy version (GitHub wheel)."""
             try:
                 import en_core_web_sm  # noqa: F401
                 return
             except ImportError:
                 pass
@@ Expand Down @@