continue to flow

eladven · eladven · commit 32178db0e8c1 · 2025-03-10T14:57:02.000+02:00
diff --git a/src/unitxt/ccc_inference.py b/src/unitxt/ccc_inference.py
@@ -17,11 +17,25 @@
 class ServerManager:
     def __init__(self):
         self.shutdown_flag = False
-        self.inactivity_timeout = 60
+        self.inactivity_timeout = 6000
         self.monitor_thread = threading.Thread(target=self.monitor_activity, daemon=True)
-        self.monitor_thread.start()
+
         self.last_request_time = time.time()
         self.shutdown_flag = False
+        self.configuration = None
+        self.workers_status = {}
+
+    def set_configuration(self, configuration):
+        self.configuration = configuration
+
+    def get_configuration(self):
+        return self.configuration
+
+    def register_worker(self, id):
+        self.workers_status[id] = {"status": "registered"}
+
+    def start_monitoring(self):
+        self.monitor_thread.start()
 
     def update_last_request_time(self):
         self.last_request_time = time.time()
@@ -48,32 +62,54 @@ def shutdown_server(self):
         time.sleep(1)
         os._exit(0)  # This immediately stops the program
 
+
 server_manager = ServerManager()
 
+
 @app.before_request
 def update_activity():
     server_manager.update_last_request_time()
 
+
 @app.route("/isup", methods=["GET"])
 def isup():
     return jsonify({"status": "up"}), 200
 
+
 @app.route("/version", methods=["GET"])
 def version():
     return jsonify({"version": "1.0.0"}), 200
 
+
 @app.route("/infer", methods=["POST"])
 def infer():
-    data = request.json.get("dataset", [])
-    predictions = [f"Processed: {item}" for item in data]
+    data = request.json
+    predictions = [0.202 for item in data]
     return jsonify(predictions)
 
+
+@app.route("/set_configuration", methods=["POST"])
+def set_configuration():
+    configuration = request.json
+    server_manager.set_configuration(configuration)
+    return jsonify({"message": "configuration has been set"})
+
+
+@app.route("/register", methods=["POST"])
+def register():
+    id = request.json
+    server_manager.register_worker(id)
+    return jsonify(server_manager.get_configuration())
+
+
 @app.route("/shutdown", methods=["POST"])
 def shutdown():
     app.logger.info("Received shutdown request")
     server_manager.shutdown_server()
     return jsonify({"message": "Shutting down server..."}), 200
 
+
 if __name__ == "__main__":
+    server_manager.start_monitoring()
     app.logger.info("Server started on port {PORT}")
     app.run(host="0.0.0.0", port=PORT)
diff --git a/src/unitxt/ccc_worker.py b/src/unitxt/ccc_worker.py
@@ -0,0 +1,51 @@
+import argparse
+
+import requests
+
+server_url = "http://localhost/"
+
+
+def post(endpoint, data):
+    # print(f"{server_url}{endpoint}")
+    response = requests.post(f"{server_url}{endpoint}", json=data)
+    if response.status_code == 200:
+        return response.json()
+    raise RuntimeError("Failed to post from to server:", response.status_code, response.text)
+
+
+def main(**kwargs):
+    worker_id = kwargs["id"]
+    # get configuration and create actual inference engine
+    configuration = post("register", worker_id)
+    configuration["1"] = 2
+    #print(configuration)
+    finish = False
+    while not finish:
+        # get batch from server
+        batch = None
+        if not batch:
+            finish = True
+            continue
+        # create predictions for batch
+        # return predictions to server
+
+
+
+
+# should be  --kwargs key1=value1 key2=value2 key3=value3
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generic argument parser")
+
+    # Accept arbitrary key-value pairs as arguments
+    parser.add_argument("--kwargs", nargs="+", help="Pass key=value pairs", default=[])
+
+    args = parser.parse_args()
+
+    # Convert key=value pairs to a dictionary
+    kwargs_dict = {}
+    for item in args.kwargs:
+        if "=" in item:
+            key, value = item.split("=", 1)
+            kwargs_dict[key] = value
+
+    main(**kwargs_dict)
diff --git a/src/unitxt/inference.py b/src/unitxt/inference.py
@@ -3358,12 +3358,18 @@ class CCCInferenceEngine(
     ccc_path: str
     ccc_python: str
     server_port: str = "8000"
+    num_of_workers: int = 10
 
-    def prepare_engine(self):
+    def post(self, endpoint, data=None):
+        response = requests.post(f"{self.server_url}/{endpoint}", json=data, timeout=5)
+        if response.status_code == 200:
+            return response.json()
+        raise RuntimeError("Failed to post from to server:", response.status_code, response.text)
+
+    def start_ccc_server(self):
         import paramiko
         server_file = "ccc_inference.py"
         local_server_path = os.path.dirname(os.path.abspath(__file__))
-        self.server_url = f"http://{self.ccc_host}:{self.server_port}"
 
         ssh = paramiko.SSHClient()
         ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
@@ -3372,8 +3378,9 @@ def prepare_engine(self):
         sftp = ssh.open_sftp()
         sftp.put(os.path.join(local_server_path, server_file), os.path.join(self.ccc_path, server_file))
         sftp.close()
-        ssh.exec_command(f"cd {self.ccc_path} && nohup {self.ccc_python} {server_file} --port {self.server_port} > server.log 2>&1 &")
-        time.sleep(2)  # Wait 2 seconds before checking
+        ssh.exec_command(
+            f"cd {self.ccc_path} && nohup {self.ccc_python} {server_file} --port {self.server_port} > server.log 2>&1 &")
+        time.sleep(1)
         try:
             response = requests.get(f"{self.server_url}/isup", timeout=5)
             if response.status_code == 200:
@@ -3394,14 +3401,23 @@ def prepare_engine(self):
         except requests.RequestException as err:
             raise RuntimeError(f"Failed to start ccc server. Response: {server_log_content}") from err
 
-        get_logger().info("OK")
-        get_logger().info(111)
-        self.shutdown_server()
-        get_logger().info(222)
+    def prepare_engine(self):
+        self.server_url = f"http://{self.ccc_host}:{self.server_port}"
+        if "localhost" in self.ccc_host:
+            response = requests.get(f"{self.server_url}/isup", timeout=5)
+            if response.status_code == 200:
+                get_logger().info("Server is up and running!")
+            else:
+                raise RuntimeError("server is down!")
+        else:
+            self.start_ccc_server()
+        self.post("set_configuration", data=self.to_dict([HFGenerationParamsMixin]))
 
     def shutdown_server(self):
+        if "localhost" in self.ccc_host:
+            return
         try:
-            requests.post(f"{self.server_url}/shutdown", timeout=5)
+            self.post("shutdown")
         except:
             pass
 
@@ -3414,4 +3430,5 @@ def _infer(
             dataset: Union[List[Dict[str, Any]], Dataset],
             return_meta_data: bool = False,
     ) -> Union[List[str], List[TextGenerationInferenceOutput]]:
-        pass
+        messages = [self.to_messages(instance) for instance in dataset]
+        return self.post("infer", data=messages)
diff --git a/test_ccc.py b/test_ccc.py
@@ -1,7 +1,38 @@
+import hashlib
+import json
+import os
+import time
+
+import joblib
 import unitxt
+from unitxt import load_dataset
 from unitxt.inference import CCCInferenceEngine
 from unitxt.logging_utils import set_verbosity
 
+
+def get_cache_filename(cache_dir="cache", **kwargs):
+    """Generate a unique filename for caching based on function arguments."""
+    os.makedirs(cache_dir, exist_ok=True)
+    hash_key = hashlib.md5(json.dumps(kwargs, sort_keys=True).encode()).hexdigest()
+    return os.path.join(cache_dir, f"dataset_{hash_key}.pkl")
+
+
+def load_dataset_cached(**kwargs):
+    """Load dataset with disk caching."""
+    cache_file = get_cache_filename(**kwargs)
+
+    if os.path.exists(cache_file):
+        # print("Loading from cache...")
+        return joblib.load(cache_file)
+
+    # print("Loading dataset from source...")
+    data = load_dataset(**kwargs)  # Your actual function call
+    # print("Saving to cache...")
+    joblib.dump(data, cache_file)
+
+    return data
+
+
 if __name__ == "__main__":
     set_verbosity("debug")
     unitxt.settings.allow_unverified_code = True
@@ -12,20 +43,15 @@
         "metrics.llm_as_judge.direct.rits.llama3_1_70b[context_fields=[question],"
         f"criteria=metrics.llm_as_judge.direct.criteria.{criterion}]"
     ]
-    # dataset = load_dataset(card="cards.openbook_qa",
-    #                        metrics=metrics,
-    #                        split='test')
-    # #dataset = dataset.select(range(10))
+    dataset = load_dataset_cached(card="cards.openbook_qa", metrics=metrics, split="test")
+    #dataset = dataset.select(range(10))
     inference_model = CCCInferenceEngine(max_new_tokens=13,
                                          ccc_host="cccxl013.pok.ibm.com",
+                                         #ccc_host="localhost",
                                          ccc_user="eladv",
                                          ccc_path="/u/eladv/fusion/inference_server",
                                          ccc_python="/dccstor/fuse/envs/fm-eval/bin/python")
-    #     model="watsonx/meta-llama/llama-3-2-1b-instruct",
-    #     max_tokens=256,
-    #     use_cache=True
-    # )
-    #
+
     # def my_wrapper(original_method):
     #     random.seed(int(time.time()))
     #     async def wrapped(*args, **kwargs):
@@ -45,10 +71,11 @@
     #
     # inference_model._infer_instance = my_wrapper(inference_model._infer_instance)
 
-    # start_time = time.time()
-    # predictions = inference_model.infer(dataset)
-    # end_time = time.time()
-    #
+    start_time = time.time()
+    predictions = inference_model.infer(dataset)
+    end_time = time.time()
+
+    # print(f"len predictions: {len(predictions)} first 10 predictions: {predictions[:10]}")
     # print(f"predictions contains {predictions.count(None)} Nones")
     #
     # mode = 'validate'