automl · benjamc · Aug 12, 2024 · Aug 12, 2024 · Aug 12, 2024 · Aug 17, 2024
diff --git a/PARALLEL.md b/PARALLEL.md
@@ -0,0 +1,12 @@
+Run with 
+
+```bash
+# hangs
+python -m carps.run +optimizer/smac20=blackbox +problem/BBOB=cfg_2_1_2_0 task.n_workers=4
+
+# API needs to be adjusted
+python -m carps.run +optimizer/optuna=blackbox +problem/BBOB=cfg_2_1_2_0 task.n_workers=4
+
+# works
+python -m carps.run +optimizer/randomsearch=config +problem/BBOB=cfg_2_1_2_0 task.n_workers=4
+```
diff --git a/carps/benchmarks/problem.py b/carps/benchmarks/problem.py
@@ -2,12 +2,15 @@
 
 from abc import ABC, abstractmethod
 from typing import TYPE_CHECKING
+from dataclasses import asdict
+from carps.utils.trials import TrialInfo, TrialValue
+from ConfigSpace import Configuration
 
 if TYPE_CHECKING:
     from ConfigSpace import ConfigurationSpace
 
     from carps.loggers.abstract_logger import AbstractLogger
-    from carps.utils.trials import TrialInfo, TrialValue
+
 
 
 class Problem(ABC):
@@ -86,3 +89,39 @@ def evaluate(self, trial_info: TrialInfo) -> TrialValue:
             )
 
         return trial_value
+
+    def parallel_evaluate(self, eval_config: dict, fidels: dict[str, int | float] | None = None, trial_info: TrialInfo | None = None, obj_keys: list[str] | None = None, **kwargs) -> dict[str, float]:
+        assert obj_keys, "obj_keys must be specified, usually during instantiation of "\
+            "carps.benchmarks.wrapper.ParallelProblemWrapper"
+
+        if trial_info is None:
+            trial_info = TrialInfo(
+                config=Configuration(values=eval_config, configuration_space=self.configspace),
+                budget=list(fidels.values())[0] if fidels else None
+            )
+
+        trial_value = self._evaluate(trial_info=trial_info)
+        self.n_function_calls += 1
+        if trial_info.normalized_budget is not None:
+            self.n_trials += trial_info.normalized_budget
+        else:
+            self.n_trials += 1
+
+        for logger in self.loggers:
+            logger.log_trial(
+                n_trials=self.n_trials,
+                n_function_calls=self.n_function_calls,
+                trial_info=trial_info,
+                trial_value=trial_value,
+            )
+
+        cost = trial_value.cost
+        if not isinstance(cost, list):
+            cost = [cost]
+        cost_dict = dict(zip(obj_keys, cost, strict=False))
+
+        return {
+            **cost_dict,
+            "runtime": max(trial_value.time, trial_value.virtual_time)
+        }
+
diff --git a/carps/benchmarks/wrapper.py b/carps/benchmarks/wrapper.py
@@ -0,0 +1,129 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from benchmark_simulator import AbstractAskTellOptimizer, ObjectiveFuncWrapper
+from ConfigSpace import Configuration
+
+from carps.utils.trials import TrialInfo, TrialValue
+
+if TYPE_CHECKING:
+    from carps.optimizers.optimizer import Optimizer
+
+
+class ParallelProblemWrapper(ObjectiveFuncWrapper):
+    def __call__(self, trial_info: TrialInfo) -> TrialValue:
+        config = trial_info.config
+        eval_config = dict(config)
+        budget = trial_info.budget
+        fidels = {self.fidel_keys[0]: budget} if budget else None
+        print(">>>>>>>>", fidels)
+        output = super().__call__(eval_config, fidels=fidels, trial_info=trial_info, obj_keys=self.obj_keys)
+        print("<<<<<<<<<, done")
+
+        time = None
+        if "runtime" in self.obj_keys:
+            time = output["runtime"]
+
+        if len(self.obj_keys) > 1:
+            cost = [output[k] for k in self.obj_keys if k != "runtime"]
+        else:
+            cost = output[self.obj_keys[0]]
+
+        return TrialValue(cost=cost, time=time)
+
+
+class OptimizerParallelWrapper(AbstractAskTellOptimizer):
+    def __init__(self, optimizer: Optimizer):
+        self.optimizer = optimizer
+
+        super().__init__()
+
+        if self.optimizer.solver is None:
+            self.optimizer.setup_optimizer()
+
+        # we need to record the entire information preserved in the trial info during ask such that no information
+        # is lost when we feed the information to the benchmark_simulator
+        # NOTE: this solution does not solve the cases where one configuration runs on multiple seeds and instances!
+        self.history: dict[Configuration, TrialInfo] = {}
+
+    def ask(self) -> tuple[dict[str, Any], dict[str, int | float] | None, int | None]:
+        """The ask method to sample a configuration using an optimizer.
+
+        Args:
+            None
+
+        Returns:
+            (eval_config, fidels) (tuple[dict[str, Any], dict[str, int | float] | None]):
+                * eval_config (dict[str, Any]):
+                    The configuration to evaluate.
+                    The key is the hyperparameter name and its value is the corresponding hyperparameter value.
+                    For example, when returning {"alpha": 0.1, "beta": 0.3}, the objective function evaluates
+                    the hyperparameter configuration with alpha=0.1 and beta=0.3.
+                * fidels (dict[str, int | float] | None):
+                    The fidelity parameters to be used for the evaluation of the objective function.
+                    If not multi-fidelity optimization, simply return None.
+                * config_id (int | None):
+                    The identifier of configuration if needed for continual learning.
+                    Not used at all when continual_max_fidel=None.
+                    As we internally use a hash of eval_config, it may be unstable if eval_config has float.
+                    However, even if config_id is not provided, our simulator works without errors
+                    although we cannot guarantee that our simulator recognizes the same configs if a users' optimizer
+                    slightly changes the content of eval_config.
+        """
+        trial_info = self.optimizer.ask()
+        eval_config = dict(trial_info.config)
+        fidels = {self.optimizer.task.fidelity_type: trial_info.budget} if trial_info.budget else None
+        config_id = None
+        self.history[trial_info.config] = trial_info
+        return eval_config, fidels, config_id
+
+    def tell(
+        self,
+        eval_config: dict[str, Any],
+        results: dict[str, float],
+        *,
+        fidels: dict[str, int | float] | None = None,
+        config_id: int | None = None,
+    ) -> None:
+        """The tell method to register for a tuple of configuration, fidelity, and the results to an optimizer.
+
+        Args:
+            eval_config (dict[str, Any]):
+                The configuration to be used in the objective function.
+            results (dict[str, float]):
+                The dict of the return values from the objective function.
+            fidels (dict[str, Union[float, int] | None):
+                The fidelities to be used in the objective function. Typically training epoch in deep learning.
+                If None, we assume that no fidelity is used.
+            config_id (int | None):
+                The identifier of configuration if needed for continual learning.
+                Not used at all when continual_max_fidel=None.
+                As we internally use a hash of eval_config, it may be unstable if eval_config has float.
+                However, even if config_id is not provided, our simulator works without errors
+                although we cannot guarantee that our simulator recognizes the same configs if a users' optimizer
+                slightly changes the content of eval_config.
+
+        Returns:
+            None
+        """
+        config = Configuration(values=eval_config, configuration_space=self.optimizer.problem.configspace)
+        trial_info_ask = self.history.pop(config)
+        trial_info = TrialInfo(
+            config=Configuration(values=eval_config, configuration_space=self.optimizer.problem.configspace),
+            budget=next(iter(fidels.values())) if fidels else None,
+            instance=trial_info_ask.instance,
+            seed=trial_info_ask.seed,
+            name=trial_info_ask.name,
+            checkpoint=trial_info_ask.checkpoint,
+        )
+        time = None
+        if "runtime" in results:
+            time = results["runtime"]
+            del results["runtime"]
+        cost = list(results.values())
+        if len(cost) == 1:
+            cost = cost[0]
+
+        trial_value = TrialValue(cost=cost, time=time)
+        self.optimizer.tell(trial_info=trial_info, trial_value=trial_value)
diff --git a/carps/configs/optimizer/optuna/config.yaml → carps/configs/optimizer/optuna/blackbox.yaml b/carps/configs/optimizer/optuna/config.yaml → carps/configs/optimizer/optuna/blackbox.yaml
diff --git a/carps/utils/running.py b/carps/utils/running.py
@@ -11,6 +11,11 @@
 )
 
 from carps.utils.exceptions import NotSupportedError
+from carps.benchmarks.wrapper import ParallelProblemWrapper
+from benchmark_simulator import ObjectiveFuncWrapper
+from carps.benchmarks.wrapper import OptimizerParallelWrapper
+
+from functools import partial
 
 if TYPE_CHECKING:
     from py_experimenter.result_processor import ResultProcessor
@@ -47,7 +52,17 @@ def make_problem(cfg: DictConfig, result_processor: ResultProcessor | None = Non
                 kwargs = {}
             logger = instantiate(logger)(**kwargs)
             loggers.append(logger)
-    return instantiate(problem_cfg, loggers=loggers)
+
+    problem = instantiate(problem_cfg, loggers=loggers)
+    if cfg.task.n_workers > 1:
+        problem.evaluate = ParallelProblemWrapper(
+            obj_func=problem.parallel_evaluate,
+            obj_keys=[*list(cfg.task.objectives), "runtime"],
+            fidel_keys=[cfg.task.fidelity_type] if cfg.task.fidelity_type else None,
+            n_workers=cfg.task.n_workers,
+            ask_and_tell=False
+        )
+    return problem
 
 
 def make_optimizer(cfg: DictConfig, problem: Problem) -> Optimizer:
@@ -99,15 +114,38 @@ def optimize(cfg: DictConfig, result_processor: ResultProcessor | None = None) -
     problem = make_problem(cfg=cfg, result_processor=result_processor)
     inspect(problem)
 
-    optimizer = make_optimizer(cfg=cfg, problem=problem)
-    inspect(optimizer)
-
-    try:
-        inc_tuple = optimizer.run()
-        printr("Solution found: ", inc_tuple)
-    except NotSupportedError:
-        print("Not supported. Skipping.")
-    except Exception as e:
-        print("Something went wrong:")
-        print(e)
-        raise e
+    if cfg.task.n_workers > 1:
+        cfg_copy = cfg.copy()
+        cfg_copy.task.n_workers = 1
+        optimizer = make_optimizer(cfg=cfg_copy, problem=problem)
+        inspect(optimizer)
+        opt = OptimizerParallelWrapper(optimizer=optimizer)
+        obj_fun = partial(problem.parallel_evaluate, obj_keys=optimizer.task.objectives)
+        worker = ObjectiveFuncWrapper(
+            save_dir_name="tmp",
+            ask_and_tell=True,
+            n_workers=cfg.task.n_workers,
+            obj_func=obj_fun,
+            n_actual_evals_in_opt=cfg.task.n_trials + cfg.task.n_workers,  # TODO check if trial for simulator means the same as in carps
+            n_evals=cfg.task.n_trials,
+            seed=cfg.seed,
+            fidel_keys=None,
+            obj_keys=optimizer.task.objectives,
+            # allow_parallel_sampling=True,
+            expensive_sampler=True
+        )
+        worker.simulate(opt)
+
+    else:
+        optimizer = make_optimizer(cfg=cfg, problem=problem)
+        inspect(optimizer)
+
+        try:
+            inc_tuple = optimizer.run()
+            printr("Solution found: ", inc_tuple)
+        except NotSupportedError:
+            print("Not supported. Skipping.")
+        except Exception as e:
+            print("Something went wrong:")
+            print(e)
+            raise e
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,7 +29,7 @@ dependencies = [
   "typing_extensions",
   "pymoo",
   "GitPython",
-  "mlcroissant",
+  "mfhpo-simulator",
 ]
 
 requires-python = ">=3.9"

diff --git a/requirements.txt b/requirements.txt
@@ -10,4 +10,5 @@ dataclasses-json
 pymysql
 cryptography
 domdf_python_tools
-py-experimenter>=1.4.1
+py-experimenter>=1.4.1
+mfhpo-simulator