From d72bc719f39a9d8d5476abb2f00856fdfbc303c9 Mon Sep 17 00:00:00 2001 From: mwever Date: Mon, 10 Feb 2025 09:14:56 +0100 Subject: [PATCH] Revert "Revert "Version 1.3.3"" --- .github/workflows/docs.yml | 3 + CHANGELOG.md | 15 ++ Makefile | 2 + README.md | 4 +- deepcave/layouts/general.py | 60 +++++-- deepcave/runs/converters/amltk.py | 23 +++ deepcave/runs/converters/bohb.py | 21 +++ deepcave/runs/converters/dataframe.py | 22 +++ deepcave/runs/converters/deepcave.py | 21 +++ deepcave/runs/converters/optuna.py | 22 +++ deepcave/runs/converters/smac3v1.py | 23 +++ deepcave/runs/converters/smac3v2.py | 235 ++++++++++++++++++-------- deepcave/runs/handler.py | 20 +++ deepcave/runs/run.py | 19 +++ docs/converters/bohb.rst | 8 +- docs/converters/custom_converters.rst | 19 ++- docs/converters/deepcave.rst | 8 +- docs/converters/optuna.rst | 3 + docs/converters/smac.rst | 11 +- docs/installation.rst | 3 + requirements.txt | 4 +- 21 files changed, 448 insertions(+), 98 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2db6efef..560a142a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -26,6 +26,9 @@ env: jobs: build-and-deploy: runs-on: ubuntu-latest + permissions: + contents: write + actions: read defaults: run: shell: bash # Default to using bash on all diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a8cfbdd..8d170491 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,18 @@ +# Version 1.3.4 + +## Installation +- Numpy will be installed via conda instead of pip to fix linkage + +# Version 1.3.3 + +## Converters +- Add for each converter a method to check whether a run is valid. +- Change rst docs of converters. + +## General Layout +- Fix navigation and button problems in general layout. +- Change general layout to be more intuitiv. + # Version 1.3.2 ## Features diff --git a/Makefile b/Makefile index c150b3c0..92a1bdd5 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,9 @@ FLAKE8 ?= flake8 install: $(PIP) install -e . +# Fix numpy as version 2.1.0 will drop support for Python 3.9 install-dev: + conda install -y numpy=2.0.1 $(PIP) install -e ".[dev]" pre-commit install diff --git a/README.md b/README.md index 3354fcc5..41493d6f 100644 --- a/README.md +++ b/README.md @@ -69,8 +69,8 @@ DeepCAVE comes with some pre-evaluated runs to get a feeling for what DeepCAVE c If you cloned the repository from GitHub via `git clone https://github.com/automl/DeepCAVE.git`, you can try out some examples by exploring the `logs` directory inside the DeepCAVE dashboard. -For example, if you navigate to `logs/DeepCAVE`, you can view the run `mnist_pytorch` if you hit -the `+` button left to it. +For example, if you navigate to `logs/DeepCAVE/mnist_pytorch`, you can view its runs if you hit +the `+` button left to them. ## Features diff --git a/deepcave/layouts/general.py b/deepcave/layouts/general.py index 20c03da9..3191892b 100644 --- a/deepcave/layouts/general.py +++ b/deepcave/layouts/general.py @@ -142,8 +142,10 @@ def callback(run_paths: List[str]): # Add text to go to parent directory new_element = html.Div( [ - dbc.Button( - "+", id={"type": "general-dynamic-add-run", "index": -1}, disabled=True + html.I( + className="fas fa-folder-open fa-lg", + id={"type": "general-dynamic-add-run", "index": -1}, + style={"pointer-events": "none"}, ), dbc.Button( "..", @@ -161,23 +163,45 @@ def callback(run_paths: List[str]): for i, run_path in enumerate(run_paths): run_name = run_handler.get_run_name(run_path) - new_element = html.Div( - [ - dbc.Button("+", id={"type": "general-dynamic-add-run", "index": i}), - dbc.Button( - run_name, - id={"type": "general-dynamic-change-directory", "index": i}, - color="link", - ), - dcc.Store( - id={"type": "general-dynamic-available-run-path", "index": i}, - data=run_path, - ), - ], - className="mb-1", - ) - children.append(new_element) + is_run = run_handler.is_run(run_path) + # Differenciate between run and directory for visibility and usability reasons + if is_run: + new_element = html.Div( + [ + dbc.Button( + "+", id={"type": "general-dynamic-add-run", "index": i}, size="sm" + ), + dbc.Button( + run_name, + id={"type": "general-dynamic-change-directory", "index": i}, + color="light", + disabled=True, + ), + dcc.Store( + id={"type": "general-dynamic-available-run-path", "index": i}, + data=run_path, + ), + ], + className="mb-1", + ) + else: + new_element = html.Div( + [ + html.I(className="fas fa-folder fa-lg"), + dbc.Button( + run_name, + id={"type": "general-dynamic-change-directory", "index": i}, + color="link", + ), + dcc.Store( + id={"type": "general-dynamic-available-run-path", "index": i}, + data=run_path, + ), + ], + className="mb-1", + ) + children.append(new_element) if len(children) == 0: return html.Div("No runs found.") diff --git a/deepcave/runs/converters/amltk.py b/deepcave/runs/converters/amltk.py index 3f1becef..56017c26 100644 --- a/deepcave/runs/converters/amltk.py +++ b/deepcave/runs/converters/amltk.py @@ -24,6 +24,7 @@ from typing import Optional, Sequence, Union +import os import re from pathlib import Path @@ -215,3 +216,25 @@ def from_path(cls, path: Union[Path, str]) -> "AMLTKRun": ) return run + + @classmethod + def is_valid_run(cls, path_name: str) -> bool: + """ + Check whether the path name belongs to a valid amltk run. + + Parameters + ---------- + path_name: str + The path to check. + + Returns + ------- + bool + True if path is valid run. + False otherwise. + """ + if os.path.isfile(path_name + "/history.parquet") and os.path.isfile( + path_name + "/configspace.json" + ): + return True + return False diff --git a/deepcave/runs/converters/bohb.py b/deepcave/runs/converters/bohb.py index 08b2f37c..4a39f69d 100644 --- a/deepcave/runs/converters/bohb.py +++ b/deepcave/runs/converters/bohb.py @@ -25,6 +25,7 @@ from typing import Any, Dict, Union +import os from pathlib import Path from ConfigSpace.configuration_space import ConfigurationSpace @@ -177,3 +178,23 @@ def from_path(cls, path: Union[Path, str]) -> "BOHBRun": ) return run + + @classmethod + def is_valid_run(cls, path_name: str) -> bool: + """ + Check whether the path name belongs to a valid bohb run. + + Parameters + ---------- + path_name: str + The path to check. + + Returns + ------- + bool + True if path is valid run. + False otherwise. + """ + if os.path.isfile(path_name + "/configspace.json"): + return True + return False diff --git a/deepcave/runs/converters/dataframe.py b/deepcave/runs/converters/dataframe.py index 11e34048..7af0fb73 100644 --- a/deepcave/runs/converters/dataframe.py +++ b/deepcave/runs/converters/dataframe.py @@ -385,3 +385,25 @@ def _extract_additional( additional = data.drop(hyperparameters + costs_metrics + budgets + seeds + meta) additional = dict(additional) return {key: value if pd.notna(value) else None for key, value in additional.items()} + + @classmethod + def is_valid_run(cls, path_name: str) -> bool: + """ + Check whether the path name belongs to a valid dataframe run. + + Parameters + ---------- + path_name: str + The path to check. + + Returns + ------- + bool + True if path is valid run. + False otherwise. + """ + if os.path.isfile(path_name + "/trials.csv") and os.path.isfile( + path_name + "/configspace.csv" + ): + return True + return False diff --git a/deepcave/runs/converters/deepcave.py b/deepcave/runs/converters/deepcave.py index 9fedc619..3e3c6324 100644 --- a/deepcave/runs/converters/deepcave.py +++ b/deepcave/runs/converters/deepcave.py @@ -24,6 +24,7 @@ from typing import Union +import os from pathlib import Path from deepcave.runs.run import Run @@ -92,3 +93,23 @@ def from_path(cls, path: Path) -> "DeepCAVERun": The DeepCAVE run. """ return DeepCAVERun(path.stem, path=Path(path)) + + @classmethod + def is_valid_run(cls, path_name: str) -> bool: + """ + Check whether the path name belongs to a valid deepcave run. + + Parameters + ---------- + path_name: str + The path to check. + + Returns + ------- + bool + True if path is valid run. + False otherwise. + """ + if os.path.isfile(path_name + "history.jsonl"): + return True + return False diff --git a/deepcave/runs/converters/optuna.py b/deepcave/runs/converters/optuna.py index 2d570744..bce2159a 100644 --- a/deepcave/runs/converters/optuna.py +++ b/deepcave/runs/converters/optuna.py @@ -285,3 +285,25 @@ def from_path(cls, path: Union[Path, str]) -> "OptunaRun": ) return run + + @classmethod + def is_valid_run(cls, path_name: str) -> bool: + """ + Check whether the path name belongs to a valid optuna run. + + Parameters + ---------- + path_name: str + The path to check. + + Returns + ------- + bool + True if path is valid run. + False otherwise. + """ + path = Path(path_name) + pickle_files = list(path.glob("*.pkl")) + if len(pickle_files) != 1: + return False + return True diff --git a/deepcave/runs/converters/smac3v1.py b/deepcave/runs/converters/smac3v1.py index 21afe20c..c90142da 100644 --- a/deepcave/runs/converters/smac3v1.py +++ b/deepcave/runs/converters/smac3v1.py @@ -27,6 +27,7 @@ from typing import Optional, Union import json +import os from pathlib import Path import numpy as np @@ -217,3 +218,25 @@ def from_path(cls, path: Union[Path, str]) -> "SMAC3v1Run": ) return run + + @classmethod + def is_valid_run(cls, path_name: str) -> bool: + """ + Check whether the path name belongs to a valid smac3v1 run. + + Parameters + ---------- + path_name: str + The path to check. + + Returns + ------- + bool + True if path is valid run. + False otherwise. + """ + if os.path.isfile(path_name + "/runhistory.json") and os.path.isfile( + path_name + "/configspace.json" + ): + return True + return False diff --git a/deepcave/runs/converters/smac3v2.py b/deepcave/runs/converters/smac3v2.py index 0c933526..42a903db 100644 --- a/deepcave/runs/converters/smac3v2.py +++ b/deepcave/runs/converters/smac3v2.py @@ -25,9 +25,10 @@ - SMAC3v2Run: Define a SMAC3v2 run object. """ -from typing import Union +from typing import Dict, List, Optional, Union import json +import os from pathlib import Path import numpy as np @@ -144,75 +145,171 @@ def from_path(cls, path: Union[Path, str]) -> "SMAC3v2Run": config_origins = all_data["config_origins"] configs = all_data["configs"] - instance_ids = [] + instance_ids: List[int] = [] first_starttime = None - for ( - config_id, - instance_id, - seed, - budget, - cost, - time, - status, - starttime, - endtime, - additional_info, - ) in data: - if instance_id not in instance_ids: - instance_ids += [instance_id] - - if len(instance_ids) > 1: - raise RuntimeError("Instances are not supported.") - - config_id = str(config_id) - config = configs[config_id] - - if first_starttime is None: - first_starttime = starttime - - starttime = starttime - first_starttime - endtime = endtime - first_starttime - - if status == 0: - # still running - continue - elif status == 1: - status = Status.SUCCESS - elif status == 3: - status = Status.TIMEOUT - elif status == 4: - status = Status.MEMORYOUT - else: - status = Status.CRASHED - - if status != Status.SUCCESS: - # Costs which failed, should not be included - cost = [None] * len(cost) if isinstance(cost, list) else None - time = None - else: - time = endtime - starttime - - # Round budget - if budget: - budget = np.round(budget, 2) - else: - budget = 0.0 - - origin = None - if config_id in config_origins: - origin = config_origins[config_id] - - run.add( - costs=cost + [time] if isinstance(cost, list) else [cost, time], - config=config, - budget=budget, - seed=seed, - start_time=starttime, - end_time=endtime, - status=status, - origin=origin, - additional=additional_info, - ) + if isinstance(data, list): + import warnings + + warnings.warn( + "The runhistory.json file is in an outdated format.", + DeprecationWarning, + stacklevel=2, # Adjusts the stack level to point to the caller. + ) + for ( + config_id, + instance_id, + seed, + budget, + cost, + time, + status, + starttime, + endtime, + additional_info, + ) in data: + run_dict = run._process_data_entry( + str(config_id), + instance_id, + seed, + budget, + cost, + time, + status, + starttime, + endtime, + additional_info, + first_starttime, + instance_ids, + configs, + config_origins, + ) + if run_dict is not None: + run.add(**run_dict) + elif isinstance(data, dict): + for config_id, config_data in data.items(): + instance_id = config_data["instance"] + seed = config_data["seed"] + budget = config_data["budget"] + cost = config_data["cost"] + time = config_data["time"] + status = config_data["status"] + starttime = config_data["starttime"] + endtime = config_data["endtime"] + additional_info = config_data["additional_info"] + run_dict = run._process_data_entry( + config_id, + instance_id, + seed, + budget, + cost, + time, + status, + starttime, + endtime, + additional_info, + first_starttime, + instance_ids, + configs, + config_origins, + ) + if run_dict is not None: + run.add(**run_dict) + else: + raise RuntimeError("Data in runhistory.json is not in a valid format.") return run + + def _process_data_entry( + self, + config_id: str, + instance_id: int, + seed: int, + budget: Optional[float], + cost: Optional[Union[List[Union[float, None]], float]], + time: Optional[float], + status: int, + starttime: float, + endtime: float, + additional_info: Optional[Dict], + first_starttime: Optional[float], + instance_ids: List[int], + configs: Dict, + config_origins: Dict[str, str], + ) -> Optional[Dict]: + if instance_id not in instance_ids: + instance_ids += [instance_id] + + if len(instance_ids) > 1: + raise RuntimeError("Instances are not supported.") + + config = configs[config_id] + + if first_starttime is None: + first_starttime = starttime + + starttime = starttime - first_starttime + endtime = endtime - first_starttime + + if status == 0: + # still running + return None + elif status == 1: + status = Status.SUCCESS + elif status == 3: + status = Status.TIMEOUT + elif status == 4: + status = Status.MEMORYOUT + else: + status = Status.CRASHED + + if status != Status.SUCCESS: + # Costs which failed, should not be included + cost = [None] * len(cost) if isinstance(cost, list) else None + time = None + else: + time = endtime - starttime + + # Round budget + if budget: + budget = np.round(budget, 2) + else: + budget = 0.0 + + origin = None + if config_id in config_origins: + origin = config_origins[config_id] + + return { + "costs": cost + [time] if isinstance(cost, list) else [cost, time], + "config": config, + "budget": budget, + "seed": seed, + "start_time": starttime, + "end_time": endtime, + "status": status, + "origin": origin, + "additional": additional_info, + } + + @classmethod + def is_valid_run(cls, path_name: str) -> bool: + """ + Check whether the path name belongs to a valid smac3v2 run. + + Parameters + ---------- + path_name: str + The path to check. + + Returns + ------- + bool + True if path is valid run. + False otherwise. + """ + if os.path.isfile(path_name + "/runhistory.json") and os.path.isfile( + path_name + "/configspace.json" + ): + return True + return False diff --git a/deepcave/runs/handler.py b/deepcave/runs/handler.py index 27387348..6b8057c7 100644 --- a/deepcave/runs/handler.py +++ b/deepcave/runs/handler.py @@ -523,3 +523,23 @@ def get_runs(self, include_groups: bool = False) -> List[AbstractRun]: runs += list(self.groups.values()) return runs + + def is_run(self, path_name: str) -> bool: + """ + Check whether the path name belongs to a valid run. + + Parameters + ---------- + path_name: str + The path to check. + + Returns + ------- + bool + True if path is valid run. + False otherwise. + """ + for run_class in self.available_run_classes: + if run_class.is_valid_run(path_name): + return True + return False diff --git a/deepcave/runs/run.py b/deepcave/runs/run.py index 18e3475c..912db646 100644 --- a/deepcave/runs/run.py +++ b/deepcave/runs/run.py @@ -139,6 +139,25 @@ def from_path(cls, path: Path) -> "Run": """ pass + @classmethod + @abstractmethod + def is_valid_run(cls, path_name: str) -> bool: + """ + Check whether the path name belongs to a valid run. + + Parameters + ---------- + path_name: str + The path to check. + + Returns + ------- + bool + True if path is valid run. + False otherwise. + """ + pass + @property def id(self) -> str: """ diff --git a/docs/converters/bohb.rst b/docs/converters/bohb.rst index 0f289929..681da493 100644 --- a/docs/converters/bohb.rst +++ b/docs/converters/bohb.rst @@ -5,4 +5,10 @@ To load an BOHB run into DeepCAVE, it is necessary to install HpBandSter, e.g. v .. code-block:: bash - pip install deepcave[bohb] \ No newline at end of file + pip install deepcave[bohb] + +To be able to load it correctly into DeepCAVE, you need to save the configuration space as a ConfigSpace JSON file: + + .. code-block:: python + + "configspace.json" \ No newline at end of file diff --git a/docs/converters/custom_converters.rst b/docs/converters/custom_converters.rst index 015892ca..d8820a6a 100644 --- a/docs/converters/custom_converters.rst +++ b/docs/converters/custom_converters.rst @@ -27,23 +27,31 @@ Step 1: Create the converter class @property def hash(self) -> str: """ - Returns a unique hash for the run (e.g., hashing the trial history). + Return a unique hash for the run (e.g., hashing the trial history). """ pass @property def latest_change(self) -> float: """ - Returns the timestamp of the latest change. + Return the timestamp of the latest change. """ pass @classmethod def from_path(cls, path: str) -> 'Run': """ - Returns a Run object from a given path. + Return a Run object from a given path. """ pass + + @classmethod + def is_valid_run(cls, path: str) -> bool: + """ + Check if the path belongs to a valid run. + """ + pass + Step 2: Register your converter ------------------------------- @@ -63,7 +71,10 @@ Step 3: Implement the methods Inside this method, you have to provide a configspace, objectives, and meta data, as well as the trials, which have to be added to the newly created run object. -Step 4: Document your converter +4. **is_valid_run**: This method should return True if the given path belongs to a valid run. Valid means that the given run + directory contains all files that are necessary for further processing (e.g. a configspace.json). + +Step 5: Document your converter ------------------------------- To document your converter, create a corresponding `.rst` file under `docs/converters` and add it to diff --git a/docs/converters/deepcave.rst b/docs/converters/deepcave.rst index e89ad522..09d2e27e 100644 --- a/docs/converters/deepcave.rst +++ b/docs/converters/deepcave.rst @@ -2,4 +2,10 @@ DeepCAVE --------- You can incorporate DeepCAVE into your AutoML tool directly to save your results in the native -DeepCAVE format and then analyze it in DeepCAVE. Please see the :ref:`examples` on this. \ No newline at end of file +DeepCAVE format and then analyze it in DeepCAVE. Please see the :ref:`examples` on this. + +To be able to load it correctly into DeepCAVE, your run needs to at least contain a history JSONL file: + + .. code-block:: python + + "history.jsonl" \ No newline at end of file diff --git a/docs/converters/optuna.rst b/docs/converters/optuna.rst index 89467972..932a30f5 100644 --- a/docs/converters/optuna.rst +++ b/docs/converters/optuna.rst @@ -14,6 +14,9 @@ consider when running Optuna: Replace ```` with the directory where you want to store the Pickle file. +.. note:: + There has to be exactly one Pickle file. + 2. Optional: If you want to display the names of your objectives in DeepCAVE, you can set metric names in Optuna using the following command: .. code-block:: python diff --git a/docs/converters/smac.rst b/docs/converters/smac.rst index dabb3937..7d0e7e92 100644 --- a/docs/converters/smac.rst +++ b/docs/converters/smac.rst @@ -5,4 +5,13 @@ For the SMAC framework, we support loading runs created with the older SMAC vers runs created with the newer SMAC version >= 2.0.0. As Auto-Sklearn and Auto-PyTorch rely on SMAC for hyperparameter optimization, the SMAC -converter also allows to load runs created with these tools. \ No newline at end of file +converter also allows to load runs created with these tools. + +To be able to load it correctly into DeepCAVE, you need to save the configuration space as a ConfigSpace JSON file, +as well as the run history: + + .. code-block:: python + + "configspace.json" + "runhistory.json" + diff --git a/docs/installation.rst b/docs/installation.rst index f5bc4ead..d50f9f79 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -52,6 +52,9 @@ If you want to contribute to DeepCAVE, you can clone it from GitHub and install While it is generally expected to function correctly on MacOS, some issues may arise due to compatibility with Swig. Specifically, users may encounter problems with the Parallel Coordinates and Importance Plugin on MacOS. + NumPy currently needs to be installed with conda (as opposed to pip) to ensure it being linked to openblas + and not the Mac specific accelerate. The latter, if used, will result in a plugin failure for + Configuration Footprint. Currently, DeepCAVE cannot be run on Windows due to its reliance on a bash script for starting services such as Redis, workers, and the webserver. diff --git a/requirements.txt b/requirements.txt index c4f6fe31..f710af33 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ setuptools==68.2.2 absl-py>=1.0.0 jsonlines>=3.0.0 pandas>=1.3.4 -numpy==2.0.1 # Fix numpy as version 2.1.0 will drop support for Python 3.9 +# numpy needs to be installed via conda instead of pip matplotlib==3.9.0 seaborn>=0.13.0 pyyaml>=6.0.1 @@ -24,7 +24,7 @@ dash==2.0.0 dash-extensions==0.0.71 dash-bootstrap-components==1.0.3 redis>=4.1.4 -rq>=1.10.1 +rq==1.10.1 # Pinned due to https://github.com/plotly/dash/issues/1992 # Pinning might be removed for dash>2.3.0 werkzeug==2.0.3