diff --git a/.vscode/settings.json b/.vscode/settings.json index 7b281e81b..fc09d6a12 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,5 +11,6 @@ "python.analysis.extraPaths": [ "packages/server/engine-lib/rocketlib-python/lib", "packages/ai/src" - ] + ], + "DockerRun.DisableDockerrc": true } diff --git a/nodes/src/nodes/ml_sklearn/IGlobal.py b/nodes/src/nodes/ml_sklearn/IGlobal.py new file mode 100644 index 000000000..78541ef5e --- /dev/null +++ b/nodes/src/nodes/ml_sklearn/IGlobal.py @@ -0,0 +1,42 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 RocketRide Contributors +# ============================================================================= +import os +from rocketlib import IGlobalBase, OPEN_MODE, warning +from ai.common.config import Config + + +class IGlobal(IGlobalBase): + """Global state for the ml_sklearn node — holds the loaded sklearn model.""" + + preprocessor: object = None + + def validateConfig(self): + """Validate that scikit-learn and joblib are available.""" + try: + from depends import depends + + requirements = os.path.dirname(os.path.realpath(__file__)) + '/requirements.txt' + depends(requirements) + except Exception as e: + warning(str(e)) + + def beginGlobal(self): + """Load the sklearn model at runtime startup.""" + if self.IEndpoint.endpoint.openMode == OPEN_MODE.CONFIG: + pass + else: + from depends import depends + + requirements = os.path.dirname(os.path.realpath(__file__)) + '/requirements.txt' + depends(requirements) + + from .code import PreProcessor + + config = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig) + self.preprocessor = PreProcessor(config) + + def endGlobal(self): + """Release the sklearn model.""" + self.preprocessor = None diff --git a/nodes/src/nodes/ml_sklearn/IInstance.py b/nodes/src/nodes/ml_sklearn/IInstance.py new file mode 100644 index 000000000..b3ef8bbe3 --- /dev/null +++ b/nodes/src/nodes/ml_sklearn/IInstance.py @@ -0,0 +1,28 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 RocketRide Contributors +# ============================================================================= +import copy +from rocketlib import IInstanceBase, Entry +from ai.common.schema import Answer +from .IGlobal import IGlobal + + +class IInstance(IInstanceBase): + IGlobal: IGlobal + + def open(self, obj: Entry): + pass + + def _process(self, text: str) -> str: + if self.IGlobal.preprocessor is not None: + return self.IGlobal.preprocessor.process(text) + return text + + def writeAnswers(self, answer: Answer): + """Process answer text through sklearn model and forward downstream.""" + answer = copy.deepcopy(answer) + text = answer.getText() if answer else '' + result = self._process(text) + answer.setText(result) + self.instance.writeAnswers(answer) diff --git a/nodes/src/nodes/ml_sklearn/README.md b/nodes/src/nodes/ml_sklearn/README.md new file mode 100644 index 000000000..aa9d356e9 --- /dev/null +++ b/nodes/src/nodes/ml_sklearn/README.md @@ -0,0 +1,19 @@ +# ML Sklearn Prediction Node + +This node performs predictions using a trained scikit-learn model. + +## Input + +- text (number as string) + +## Output + +- text (predicted value as string) + +## Example + +Input: +250 + +Output: +3.5 diff --git a/nodes/src/nodes/ml_sklearn/__init__.py b/nodes/src/nodes/ml_sklearn/__init__.py new file mode 100644 index 000000000..70eda8d6b --- /dev/null +++ b/nodes/src/nodes/ml_sklearn/__init__.py @@ -0,0 +1,4 @@ +from .IGlobal import IGlobal +from .IInstance import IInstance + +__all__ = ['IGlobal', 'IInstance'] diff --git a/nodes/src/nodes/ml_sklearn/code.py b/nodes/src/nodes/ml_sklearn/code.py new file mode 100644 index 000000000..e194def99 --- /dev/null +++ b/nodes/src/nodes/ml_sklearn/code.py @@ -0,0 +1,35 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 RocketRide Contributors +# ============================================================================= +import os +import joblib + + +class PreProcessor: + """Wraps a scikit-learn model/pipeline for text inference.""" + + def __init__(self, config: dict): + model_path = config.get('model_path', '') + + self._model = None + if model_path and os.path.exists(model_path): + self._model = joblib.load(model_path) + + def process(self, text: str) -> str: + """ + Run sklearn inference on input text. + Returns prediction as string. + Falls back to original text if no model loaded or inference fails. + """ + if self._model is None: + return text + + try: + value = float(text) + prediction = self._model.predict([[value]]) + return str(prediction[0]) + except (ValueError, TypeError): + return text + except Exception: + return text diff --git a/nodes/src/nodes/ml_sklearn/create_sample_model.py b/nodes/src/nodes/ml_sklearn/create_sample_model.py new file mode 100644 index 000000000..d42a268d5 --- /dev/null +++ b/nodes/src/nodes/ml_sklearn/create_sample_model.py @@ -0,0 +1,23 @@ +# ============================================================================= +# Run this once to generate a sample model.pkl for testing: +# python nodes/src/nodes/ml_sklearn/create_sample_model.py +# ============================================================================= +import joblib +import numpy as np +from sklearn.linear_model import LinearRegression + +# Simple model: y = 2x + 1 +X = np.array([[1.0], [2.0], [3.0], [4.0], [5.0]]) +y = np.array([3.0, 5.0, 7.0, 9.0, 11.0]) + +model = LinearRegression() +model.fit(X, y) + +import os + +save_path = os.path.join(os.path.dirname(__file__), 'model.pkl') +joblib.dump(model, save_path) + +print(f'✅ Model saved: {save_path}') +print(f'✅ predict(4.0) = {model.predict([[4.0]])[0]:.1f} (expected 9.0)') +print(f'✅ predict(6.0) = {model.predict([[6.0]])[0]:.1f} (expected 13.0)') diff --git a/nodes/src/nodes/ml_sklearn/model.pkl b/nodes/src/nodes/ml_sklearn/model.pkl new file mode 100644 index 000000000..b02586283 Binary files /dev/null and b/nodes/src/nodes/ml_sklearn/model.pkl differ diff --git a/nodes/src/nodes/ml_sklearn/requirements.txt b/nodes/src/nodes/ml_sklearn/requirements.txt new file mode 100644 index 000000000..2fb0f3743 --- /dev/null +++ b/nodes/src/nodes/ml_sklearn/requirements.txt @@ -0,0 +1,3 @@ +scikit-learn>=1.3.0 +joblib>=1.3.0 +numpy>=1.24.0 diff --git a/nodes/src/nodes/ml_sklearn/services.json b/nodes/src/nodes/ml_sklearn/services.json new file mode 100644 index 000000000..1f04676f3 --- /dev/null +++ b/nodes/src/nodes/ml_sklearn/services.json @@ -0,0 +1,72 @@ +{ + "title": "ML Sklearn Predict", + "protocol": "preprocessor_ml_sklearn://", + "classType": ["preprocessor"], + "capabilities": [], + "register": "filter", + "node": "python", + "path": "nodes.ml_sklearn", + "prefix": "Preprocessor", + "description": ["Applies a trained scikit-learn model to input text. ", + "Loads a pickled model from a configurable path and runs inference, ", + "returning the prediction as output. Falls back to original input ", + "if no model is found or inference fails."], + "icon": "preprocessor-code.svg", + "documentation": "https://docs.rocketride.org", + "tile": [], + "lanes": { + "answers": ["answers"] + }, + "input": [ + { + "lane": "answers", + "min": 0, + "output": [ + { + "lane": "answers" + } + ] + } + ], + "preconfig": { + "default": "default", + "profiles": { + "default": { + "model_path": "" + } + } + }, + "test": { + "profiles": ["default"], + "cases": [ + { + "answers": { + "answers": [{"text": "hello world"}] + }, + "expect": { + "answers": { + "notEmpty": true + } + } + } + ] + }, + "fields": { + "ml_sklearn.model_path": { + "type": "string", + "title": "Model file path (.pkl)", + "default": "" + }, + "ml_sklearn.default": { + "object": "default", + "properties": ["ml_sklearn.model_path"] + } + }, + "shape": [ + { + "section": "Pipe", + "title": "ML Sklearn", + "properties": ["ml_sklearn.model_path"] + } + ] +} \ No newline at end of file diff --git a/nodes/src/nodes/ml_sklearn/test_ml_sklearn.py b/nodes/src/nodes/ml_sklearn/test_ml_sklearn.py new file mode 100644 index 000000000..e988514f8 --- /dev/null +++ b/nodes/src/nodes/ml_sklearn/test_ml_sklearn.py @@ -0,0 +1,76 @@ +# ============================================================================= +# Standalone test for ml_sklearn — no pytest needed +# Run: python nodes/src/nodes/ml_sklearn/test_ml_sklearn.py +# ============================================================================= +import os +import sys +import joblib +import numpy as np +from sklearn.linear_model import LinearRegression + +# ---- Build a temp model ---- +import tempfile + +tmp = tempfile.mkdtemp() +model_path = os.path.join(tmp, 'model.pkl') + +X = np.array([[1.0], [2.0], [3.0], [4.0], [5.0]]) +y = np.array([3.0, 5.0, 7.0, 9.0, 11.0]) +model = LinearRegression() +model.fit(X, y) +joblib.dump(model, model_path) + +# ---- Import PreProcessor directly ---- +import importlib.util + +spec = importlib.util.spec_from_file_location( + 'mlcode', # 'mlcode' use karo — 'code' nahi, clash hoga + os.path.join(os.path.dirname(__file__), 'code.py') if '__file__' in dir() else 'nodes/src/nodes/ml_sklearn/code.py', +) +mod = importlib.util.module_from_spec(spec) +spec.loader.exec_module(mod) +PreProcessor = mod.PreProcessor + +# ---- Tests ---- +passed = 0 +failed = 0 + + +def check(name, condition): + global passed, failed + if condition: + print(f' PASS {name}') + passed += 1 + else: + print(f' FAIL {name}') + failed += 1 + + +print('\nRunning ml_sklearn tests...\n') + +# Test 1: Normal prediction +pp = PreProcessor({'model_path': model_path}) +result = pp.process('4.0') +check('predict(4.0) ≈ 9.0', abs(float(result) - 9.0) < 0.1) + +# Test 2: predict(6.0) +result2 = pp.process('6.0') +check('predict(6.0) ≈ 13.0', abs(float(result2) - 13.0) < 0.1) + +# Test 3: No model fallback +pp2 = PreProcessor({'model_path': ''}) +result3 = pp2.process('hello world') +check('no model → returns input unchanged', result3 == 'hello world') + +# Test 4: Bad input fallback +pp3 = PreProcessor({'model_path': model_path}) +result4 = pp3.process('not a number') +check('bad input → returns input unchanged', result4 == 'not a number') + +# ---- Summary ---- +print(f'\n{passed} passed, {failed} failed') +if failed == 0: + print('All tests passed! ✅') +else: + print('Some tests failed ❌') + sys.exit(1)