rocketride-org · Tejeshyewale · Apr 26, 2026 · Apr 26, 2026 · Apr 26, 2026 · Apr 28, 2026
@@ -0,0 +1,50 @@
+# =============================================================================
+# MIT License
+# Copyright (c) 2026 RocketRide Contributors
+# =============================================================================
+
+# ------------------------------------------------------------------------------
+# This class controls the data shared between all threads for the task
+# ------------------------------------------------------------------------------
+import os
+
+from rocketlib import IGlobalBase, OPEN_MODE, warning
+from ai.common.config import Config
+
+
+class IGlobal(IGlobalBase):
+    """Global state for the ml_sklearn node — holds the loaded sklearn model."""
+
+    preprocessor: object = None  # The sklearn model/pipeline instance
+
+    def validateConfig(self):
+        """Validate that scikit-learn and numpy are available."""
+        try:
+            from depends import depends
+
+            requirements = os.path.dirname(os.path.realpath(__file__)) + '/requirements.txt'
+            depends(requirements)
+        except Exception as e:  # noqa: BLE001
+            warning(str(e))
+
+    def beginGlobal(self):
+        """Load the sklearn model at runtime startup."""
+        if self.IEndpoint.endpoint.openMode == OPEN_MODE.CONFIG:
+            # Config mode: don't load the model, we'll only be called
+            # to configure the service definition.
+            pass
+        else:
+            from depends import depends
+
+            requirements = os.path.dirname(os.path.realpath(__file__)) + '/requirements.txt'
+            depends(requirements)
+
+            # Deferred import — only after deps are installed
+            from .code import PreProcessor
+
+            config = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig)
+            self.preprocessor = PreProcessor(config)
+
+    def endGlobal(self):
+        """Release the sklearn model."""
+        self.preprocessor = None
@@ -0,0 +1,45 @@
+# =============================================================================
+# MIT License
+# Copyright (c) 2026 RocketRide Contributors
+# =============================================================================
+
+# ------------------------------------------------------------------------------
+# This class controls the data for each thread of the task
+# ------------------------------------------------------------------------------
+import copy
+
+from rocketlib import IInstanceBase, Entry
+
+from .IGlobal import IGlobal
+
+
+class IInstance(IInstanceBase):
+    """Per-thread instance for the ml_sklearn node."""
+
+    IGlobal: IGlobal
+
+    def open(self, obj: Entry):
+        """Called before each new pipeline object — nothing to reset for this node."""
+        pass
+
+    def writeAnswers(self, question):
+        """
+        Receive a question from upstream, run sklearn inference on its text,
+        and forward the result to the answers output lane.
+
+        The question is deep-copied to prevent mutation in fan-out pipelines.
+        """
+        if self.IGlobal.preprocessor is None:
+            raise RuntimeError('sklearn PreProcessor not initialized')
+
+        question = copy.deepcopy(question)
+
+        # Get the text to process
+        text = question.text if hasattr(question, 'text') else str(question)
+
+        # Run inference
+        result = self.IGlobal.preprocessor.process(text)
+
+        # Write result back to the question object and forward downstream
+        question.text = result
+        self.instance.writeAnswers(question)
@@ -0,0 +1,19 @@
+# ML Sklearn Prediction Node
+
+This node performs predictions using a trained scikit-learn model.
+
+## Input
+
+- text (number as string)
+
+## Output
+
+- text (predicted value as string)
+
+## Example
+
+Input:
+250
+
+Output:
+3.5
@@ -0,0 +1,4 @@
+from .IGlobal import IGlobal
+from .IInstance import IInstance
+
+__all__ = ['IGlobal', 'IInstance']
@@ -0,0 +1,47 @@
+# =============================================================================
+# MIT License
+# Copyright (c) 2026 RocketRide Contributors
+# =============================================================================
+
+# ------------------------------------------------------------------------------
+# PreProcessor: sklearn-based text inference class
+# All heavy imports are deferred — this file is imported only after
+# depends() has installed requirements.txt in beginGlobal().
+# ------------------------------------------------------------------------------
+
+
+class PreProcessor:
+    """Wraps a scikit-learn model/pipeline for text inference."""
+
+    def __init__(self, config: dict):
+        """
+        Initialize the sklearn model.
+
+        In a real deployment, you'd load a pickled model from a path
+        specified in config. This stub returns text unchanged so the
+        node is CI-safe without a pre-trained model artifact.
+        """
+        # Example: load a real model like this:
+        # import joblib
+        # model_path = config.get('model_path', '')
+        # self._model = joblib.load(model_path)
+        self._model = None  # Replace with actual model loading
+
+    def process(self, text: str) -> str:
+        """
+        Run sklearn inference on input text and return processed text.
+
+        Args:
+            text: The input string to process.
+
+        Returns:
+            The processed string. Currently passes through unchanged.
+        """
+        if self._model is None:
+            # Pass-through when no model is loaded (safe for CI)
+            return text
+
+        # Example with a real model:
+        # prediction = self._model.predict([text])
+        # return str(prediction[0])
+        return text
@@ -0,0 +1,2 @@
+scikit-learn>=1.0.0,<2.0.0
+numpy>=1.21.0,<3.0.0
@@ -0,0 +1,52 @@
+{
+    "ml_sklearn": {
+        "name": "ML sklearn",
+        "description": "Applies a trained scikit-learn model to process text through the pipeline.",
+        "icon": "python.svg",
+        "group": "preprocessor",
+        "color": "#f97316",
+        "runtime": "python",
+        "pipe": {
+            "lanes": {
+                "answers": {
+                    "in": true,
+                    "out": true
+                }
+            }
+        },
+        "preconfig": {
+            "default": {
+                "object": "default",
+                "properties": []
+            }
+        },
+        "profiles": {
+            "ml_sklearn.default": {
+                "object": "default",
+                "properties": []
+            }
+        },
+        "fields": [],
+        "shape": {
+            "inputs": [
+                {
+                    "name": "answers",
+                    "type": "answers"
+                }
+            ],
+            "outputs": [
+                {
+                    "name": "answers",
+                    "type": "answers"
+                }
+            ]
+        },
+        "test": {
+            "answers": [
+                {
+                    "text": "hello world"
+                }
+            ]
+        }
+    }
+}
@@ -61,6 +61,10 @@
 from .types import LAUNCH_TYPE
 from .task_conn import TaskConn
 from .task_metrics import TaskMetrics
+from .task_logger import get_task_logger
+
+# Module-level structured logger for task lifecycle events
+_logger = get_task_logger(__name__)
 
 
 if TYPE_CHECKING:
@@ -843,6 +847,15 @@ async def _terminated(self) -> None:
                         apikey=task_apikey,
                     )
 
+        _logger.info(
+            'Task terminated',
+            extra={
+                'task_id': self.id,
+                'step': 'termination',
+                'exit_code': self._status.exitCode,
+                'final_state': self._status.state,
+            },
+        )
         self.debug_message('Resource cleanup completed successfully')
 
     def _on_metrics_updated(self) -> None:
@@ -1470,6 +1483,10 @@ async def start_task(self) -> None:
 
             # Set our current state
             self._status.state = TASK_STATE.STARTING.value
+            _logger.info(
+                'Task starting',
+                extra={'task_id': self.id, 'step': 'start'},
+            )
 
             # Resolve any ${...} in the pipeline
             self._pipeline = self._resolve_pipeline(self._pipeline)
@@ -1586,6 +1603,15 @@ async def start_task(self) -> None:
                 env=subprocess_env,
             )
 
+            _logger.info(
+                'Subprocess created',
+                extra={
+                    'task_id': self.id,
+                    'step': 'subprocess',
+                    'pid': self._engine_process.pid,
+                },
+            )
+
             # Initialize stdio interface
             try:
                 self._debug_stdio = Task.TaskDbgStdio(
@@ -1661,6 +1687,11 @@ async def start_task(self) -> None:
 
         except Exception as e:
             await self._terminated()
+            _logger.error(
+                'Task startup failed',
+                extra={'task_id': self.id, 'step': 'error', 'error': str(e)},
+                exc_info=True,
+            )
             self.debug_message(f'Task startup failed: {e}')
             raise
 

@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+import json
+import logging
+import time
+from typing import Any
+
+
+class _StructuredFormatter(logging.Formatter):
+    _RESERVED = frozenset(logging.LogRecord(
+        '', 0, '', 0, '', (), None
+    ).__dict__.keys()) | {'message', 'asctime'}
+
+    def format(self, record: logging.LogRecord) -> str:
+        record.message = record.getMessage()
+
+        payload = {
+            'timestamp': time.strftime(
+                '%Y-%m-%dT%H:%M:%SZ', time.gmtime(record.created)
+            ),
+            'level':   record.levelname,
+            'logger':  record.name,
+            'message': record.message,
+        }
+
+        for key, value in record.__dict__.items():
+            if key not in self._RESERVED:
+                payload[key] = value
+
+        if record.exc_info:
+            payload['exception'] = self.formatException(record.exc_info)
+
+        return json.dumps(payload, default=str)
+
+
+def get_task_logger(name: str) -> logging.Logger:
+    logger = logging.getLogger(name)
+
+    if not logger.handlers:
+        handler = logging.StreamHandler()
+        handler.setFormatter(_StructuredFormatter())
+        logger.addHandler(handler)
+        logger.propagate = False
+
+    return logger
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		scikit-learn>=1.0.0,<2.0.0
		numpy>=1.21.0,<3.0.0