diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6bfe58ff..2a8cfbdd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
+# Version 1.3.2
+
+## Features
+- Allow Importance and ablation path analysis for multi-objective runs.
+
# Version 1.3.1
## Quality of Life
diff --git a/Makefile b/Makefile
index 3afee1db..c150b3c0 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
# are usually completed in github actions.
SHELL := /bin/bash
-VERSION := 1.3.1
+VERSION := 1.3.2
NAME := DeepCAVE
PACKAGE_NAME := deepcave
diff --git a/deepcave/__init__.py b/deepcave/__init__.py
index 2e7d2bfe..57a89b70 100644
--- a/deepcave/__init__.py
+++ b/deepcave/__init__.py
@@ -44,7 +44,7 @@
"Source Code": "https://github.com/automl/deepcave",
}
copyright = f"Copyright {datetime.date.today().strftime('%Y')}, {author}"
-version = "1.3.1"
+version = "1.3.2"
_exec_file = sys.argv[0]
_exec_files = ["server.py", "worker.py", "sphinx-build"]
diff --git a/deepcave/evaluators/ablation.py b/deepcave/evaluators/ablation.py
index da14b7ba..251cc830 100644
--- a/deepcave/evaluators/ablation.py
+++ b/deepcave/evaluators/ablation.py
@@ -245,7 +245,7 @@ def _ablation(
max_hp_difference = -np.inf
for hp in hp_it:
- if hp in hp in incumbent_config.keys() and hp in self.default_config.keys():
+ if hp in incumbent_config.keys() and hp in self.default_config.keys():
config_copy = copy.copy(self.default_config)
config_copy[hp] = incumbent_config[hp]
diff --git a/deepcave/evaluators/mo_ablation.py b/deepcave/evaluators/mo_ablation.py
new file mode 100644
index 00000000..c7c24a89
--- /dev/null
+++ b/deepcave/evaluators/mo_ablation.py
@@ -0,0 +1,350 @@
+# Copyright 2021-2024 The DeepCAVE Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# noqa: D400
+"""
+# Ablation Paths
+
+This module evaluates the ablation paths.
+
+Ablation Paths is a method to analyze the importance of hyperparameters in a configuration space.
+Starting from a default configuration, the default configuration is iteratively changed to the
+incumbent configuration by changing one hyperparameter at a time, choosing the
+hyperparameter that leads to the largest improvement in the objective function at each step.
+
+## Classes:
+ - Ablation: Provide an evaluator of the ablation paths.
+"""
+
+from typing import Any, List, Optional, Tuple, Union
+
+import copy
+
+import numpy as np
+import pandas as pd
+
+from deepcave.evaluators.ablation import Ablation
+from deepcave.evaluators.epm.random_forest_surrogate import RandomForestSurrogate
+from deepcave.runs import AbstractRun
+from deepcave.runs.objective import Objective
+from deepcave.utils.multi_objective_importance import get_weightings
+
+
+class MOAblation(Ablation):
+ """
+ Provide an evaluator of the ablation paths.
+
+ Override: Multi-Objective case
+
+ Properties
+ ----------
+ run : AbstractRun
+ The run to analyze.
+ cs : ConfigurationSpace
+ The configuration space of the run.
+ hp_names : List[str]
+ A list of the hyperparameter names.
+ performances : Optional[Dict[Any, Any]]
+ A dictionary containing the performances for each HP.
+ improvements : Optional[Dict[Any, Any]]
+ A dictionary containing the improvements over the respective previous step for each HP.
+ objectives : Optional[Union[Objective, List[Objective]]]
+ The objective(s) of the run.
+ default_config : Configurations
+ The default configuration of this configuration space.
+ Gets changed step by step towards the incumbent configuration.
+ """
+
+ def __init__(self, run: AbstractRun):
+ super().__init__(run)
+ self.models: List = []
+ self.df_importances = pd.DataFrame([])
+
+ def get_importances(self) -> str:
+ """
+ Return the importance scores.
+
+ Returns
+ -------
+ Dict
+ Dictionary with Hyperparameter names and the corresponding importance scores and
+ variances.
+
+ Raises
+ ------
+ RuntimeError
+ If the important scores are not calculated.
+ """
+ if self.df_importances is None:
+ raise RuntimeError("Importance scores must be calculated first.")
+
+ return self.df_importances.to_json()
+
+ def predict(self, cfg: list[Any], weighting: np.ndarray) -> Tuple[float, float]:
+ """
+ Predict the performance of the input configuration.
+
+ The model results are weighted by the input weightings and summed.
+
+ Parameters
+ ----------
+ cfg : Dict
+ Configuration.
+ weighting : List[float]
+ Weightings.
+
+ Returns
+ -------
+ mean : float
+ The mean of the weighted sum of predictions.
+ var : float
+ The variance of the weighted sum of predictions.
+ """
+ mean, var = 0, 0
+ for model, w in zip(self.models, weighting):
+ pred, var_ = model.predict(np.array([cfg]))
+ mean += w * pred[0]
+ var += w * var_[0]
+ return mean, var
+
+ def calculate(
+ self,
+ objectives: Optional[Union[Objective, List[Objective]]], # noqa
+ budget: Optional[Union[int, float]] = None, # noqa
+ n_trees: int = 50, # noqa
+ seed: int = 0, # noqa
+ ) -> None:
+ """
+ Calculate the MO ablation path performances and improvements.
+
+ Parameters
+ ----------
+ objectives : Optional[Union[Objective, List[Objective]]]
+ The objective(s) to be considered.
+ budget : Optional[Union[int, float]]
+ The budget to be considered. If None, all budgets of the run are considered.
+ Default is None.
+ n_trees : int
+ The number of trees for the surrogate model.
+ Default is 50.
+ seed : int
+ The seed for the surrogate model.
+ Default is 0.
+ """
+ assert isinstance(objectives, list)
+ for objective in objectives:
+ assert isinstance(objective, Objective)
+
+ df = self.run.get_encoded_data(objectives, budget, specific=True, include_config_ids=True)
+
+ # Obtain all configurations with theirs costs
+ df = df.dropna(subset=[obj.name for obj in objectives])
+ X = df[list(self.run.configspace.keys())].to_numpy()
+
+ # normalize objectives
+ objectives_normed = list()
+ for obj in objectives:
+ normed = obj.name + "_normed"
+ df[normed] = (df[obj.name] - df[obj.name].min()) / (
+ df[obj.name].max() - df[obj.name].min()
+ )
+
+ if obj.optimize == "upper":
+ df[normed] = 1 - df[normed]
+ objectives_normed.append(normed)
+
+ # train one model per objective
+ Y = df[normed].to_numpy()
+ model = RandomForestSurrogate(self.cs, seed=seed, n_trees=n_trees)
+ model._fit(X, Y)
+ self.models.append(model)
+
+ weightings = get_weightings(objectives_normed, df)
+
+ # calculate importance for each weighting generated from the pareto efficient points
+ for w in weightings:
+ df_res = self.calculate_ablation_path(df, objectives_normed, w, budget)
+ if df_res is None:
+ columns = ["hp_name", "importance", "variance", "new_performance", "weight"]
+ self.df_importances = pd.DataFrame(
+ 0, index=np.arange(len(self.hp_names) + 1), columns=columns
+ )
+ self.df_importances["hp_name"] = ["Default"] + self.hp_names
+ return
+ df_res["weight"] = w[0]
+ self.df_importances = pd.concat([self.df_importances, df_res])
+ self.df_importances = self.df_importances.reset_index(drop=True)
+
+ def calculate_ablation_path(
+ self,
+ df: pd.DataFrame,
+ objectives_normed: List[str],
+ weighting: np.ndarray,
+ budget: Optional[Union[int, float]],
+ ) -> pd.DataFrame:
+ """
+ Calculate the ablation path performances.
+
+ Parameters
+ ----------
+ df : pd.DataFrame
+ Dataframe with encoded data.
+ objectives_normed : List[str]
+ The normed objective names to be considered.
+ weighting : np.ndarray
+ The weighting of the objective values.
+ budget : Optional[Union[int, float]]
+ The budget to be considered. If None, all budgets of the run are considered.
+ Default is None.
+
+ Returns
+ -------
+ df : pd.DataFrame
+ Dataframe with results of the ablation calculation.
+ """
+ # Get the incumbent configuration
+ incumbent_cfg_id = np.argmin(
+ sum(df[obj] * w for obj, w in zip(objectives_normed, weighting))
+ )
+ incumbent_config = self.run.get_config(df.iloc[incumbent_cfg_id]["config_id"])
+
+ # Get the default configuration
+ self.default_config = self.cs.get_default_configuration()
+ default_encode = self.run.encode_config(self.default_config, specific=True)
+
+ # Obtain the predicted cost of the default and incumbent configuration
+ def_cost, def_std = self.predict(default_encode, weighting)
+ inc_cost, _ = self.predict(
+ self.run.encode_config(incumbent_config, specific=True), weighting
+ )
+
+ if inc_cost > def_cost:
+ self.logger.warning(
+ "The predicted incumbent objective is worse than the predicted default "
+ f"objective for budget: {budget}. Aborting ablation path calculation."
+ )
+ return None
+ else:
+ # Copy the hps names as to not remove objects from the original list
+ hp_it = self.hp_names.copy()
+ df_abl = pd.DataFrame([])
+ df_abl = pd.concat(
+ [
+ df_abl,
+ pd.DataFrame(
+ {
+ "hp_name": "Default",
+ "importance": 0,
+ "variance": def_std,
+ "new_performance": def_cost,
+ },
+ index=[0],
+ ),
+ ]
+ )
+
+ for i in range(len(hp_it)):
+ # Get the results of the current ablation iteration
+ continue_ablation, max_hp, max_hp_cost, max_hp_std = self.ablation(
+ budget, incumbent_config, def_cost, hp_it, weighting
+ )
+
+ if not continue_ablation:
+ break
+
+ diff = def_cost - max_hp_cost
+ def_cost = max_hp_cost
+
+ df_abl = pd.concat(
+ [
+ df_abl,
+ pd.DataFrame(
+ {
+ "hp_name": max_hp,
+ "importance": diff,
+ "variance": max_hp_std,
+ "new_performance": max_hp_cost,
+ },
+ index=[i + 1],
+ ),
+ ]
+ )
+
+ # Remove the current best hp for keeping the order right
+ hp_it.remove(max_hp)
+ return df_abl.reset_index(drop=True)
+
+ def ablation(
+ self,
+ budget: Optional[Union[int, float]],
+ incumbent_config: Any,
+ def_cost: Any,
+ hp_it: List[str],
+ weighting: np.ndarray[Any, Any],
+ ) -> Tuple[Any, Any, Any, Any]:
+ """
+ Calculate the ablation importance for each hyperparameter.
+
+ Parameters
+ ----------
+ budget: Optional[Union[int, float]]
+ The budget of the run.
+ incumbent_config: Any
+ The incumbent configuration.
+ def_cost: Any
+ The default cost.
+ hp_it: List[str]
+ A list of the HPs that still have to be looked at.
+ weighting : np.ndarray[Any, Any]
+ The weighting of the objective values.
+
+ Returns
+ -------
+ Tuple[Any, Any, Any, Any]
+ continue_ablation, max_hp, max_hp_performance, max_hp_std
+ """
+ max_hp = ""
+ max_hp_difference = -np.inf
+
+ for hp in hp_it:
+ if hp in incumbent_config.keys() and hp in self.default_config.keys():
+ config_copy = copy.copy(self.default_config)
+ config_copy[hp] = incumbent_config[hp]
+
+ new_cost, _ = self.predict(
+ self.run.encode_config(config_copy, specific=True), weighting
+ )
+ difference = def_cost - new_cost
+
+ # Check for the maximum difference hyperparameter in this round
+ if difference > max_hp_difference:
+ max_hp = hp
+ max_hp_difference = difference
+ else:
+ continue
+ hp_count = len(list(self.cs.keys()))
+ if max_hp != "":
+ # For the maximum impact hyperparameter, switch the default with the incumbent value
+ self.default_config[max_hp] = incumbent_config[max_hp]
+ max_hp_cost, max_hp_std = self.predict(
+ self.run.encode_config(self.default_config, specific=True), weighting
+ )
+ return True, max_hp, max_hp_cost, max_hp_std
+ else:
+ self.logger.info(
+ f"End ablation at step {hp_count - len(hp_it) + 1}/{hp_count} "
+ f"for budget {budget} (remaining hyperparameters not activate in incumbent or "
+ "default configuration)."
+ )
+ return False, None, None, None
diff --git a/deepcave/evaluators/mo_fanova.py b/deepcave/evaluators/mo_fanova.py
new file mode 100644
index 00000000..fe0bbb0f
--- /dev/null
+++ b/deepcave/evaluators/mo_fanova.py
@@ -0,0 +1,157 @@
+# Copyright 2021-2024 The DeepCAVE Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# noqa: D400
+"""
+# fANOVA
+
+This module provides a tool for assessing the importance of an algorithms Hyperparameters.
+
+Utilities provide calculation of the data wrt the budget and train the forest on the encoded data.
+
+## Classes
+ - fANOVA: Calculate and provide midpoints and sizes.
+"""
+
+from typing import List, Optional, Union
+
+import pandas as pd
+
+from deepcave.evaluators.epm.fanova_forest import FanovaForest
+from deepcave.evaluators.fanova import fANOVA
+from deepcave.runs import AbstractRun
+from deepcave.runs.objective import Objective
+from deepcave.utils.multi_objective_importance import get_weightings
+
+
+class MOfANOVA(fANOVA):
+ """
+ Multi-Objective fANOVA.
+
+ Calculate and provide midpoints and sizes from the forest's split values in order to get
+ the marginals.
+ Override: to train the random forest with an arbitrary weighting of the objectives
+ (multi-objective case).
+ """
+
+ def __init__(self, run: AbstractRun):
+ if run.configspace is None:
+ raise RuntimeError("The run needs to be initialized.")
+
+ super().__init__(run)
+ self.importances_ = None
+
+ def calculate(
+ self,
+ objectives: Optional[Union[Objective, List[Objective]]] = None,
+ budget: Optional[Union[int, float]] = None,
+ n_trees: int = 100,
+ seed: int = 0,
+ ) -> None:
+ """
+ Get the data with respect to budget and train the forest on the encoded data.
+
+ Calculates weighted fanova for multiple objectives.
+
+ Note
+ ----
+ Right now, only `n_trees` is used. It can be further specified if needed.
+
+ Parameters
+ ----------
+ objectives : Optional[Union[Objective, List[Objective]]], optional
+ Considered objectives. By default None. If None, all objectives are considered.
+ budget : Optional[Union[int, float]], optional
+ Considered budget. By default None. If None, the highest budget is chosen.
+ n_trees : int, optional
+ How many trees should be used. By default 100.
+ seed : int
+ Random seed. By default 0.
+ """
+ if objectives is None:
+ objectives = self.run.get_objectives()
+
+ if budget is None:
+ budget = self.run.get_highest_budget()
+
+ self.n_trees = n_trees
+
+ # Get data
+ df = self.run.get_encoded_data(
+ objectives, budget, specific=True, include_combined_cost=True
+ )
+
+ # normalize objectives
+ assert isinstance(objectives, list)
+ objectives_normed = list()
+ for obj in objectives:
+ normed = obj.name + "_normed"
+ df[normed] = (df[obj.name] - df[obj.name].min()) / (
+ df[obj.name].max() - df[obj.name].min()
+ )
+ if obj.optimize == "upper":
+ df[normed] = 1 - df[normed]
+ objectives_normed.append(normed)
+ df = df.dropna(subset=objectives_normed)
+ X = df[self.hp_names].to_numpy()
+ weightings = get_weightings(objectives_normed, df)
+ df_all = pd.DataFrame([])
+
+ # calculate importance for each weighting generated from the pareto efficient points
+ for w in weightings:
+ Y = sum(df[obj] * weighting for obj, weighting in zip(objectives_normed, w)).to_numpy()
+
+ self._model = FanovaForest(self.cs, n_trees=n_trees, seed=seed)
+ self._model.train(X, Y)
+ df_res = (
+ pd.DataFrame(super(MOfANOVA, self).get_importances(hp_names=None))
+ .loc[0:1]
+ .T.reset_index()
+ )
+ df_res["weight"] = w[0]
+ df_all = pd.concat([df_all, df_res])
+ self.importances_ = df_all.rename(
+ columns={0: "importance", 1: "variance", "index": "hp_name"}
+ ).reset_index(drop=True)
+
+ def get_importances_(self, hp_names: Optional[List[str]] = None) -> str:
+ """
+ Return the importance scores from the passed Hyperparameter names.
+
+ Parameters
+ ----------
+ hp_names : Optional[List[str]]
+ Selected Hyperparameter names to get the importance scores from. If None, all
+ Hyperparameters of the configuration space are used.
+ sort : bool, optional
+ Whether the Hyperparameters should be sorted by importance. By default True.
+
+ Returns
+ -------
+ Dict
+ Dictionary with Hyperparameter names and the corresponding importance scores and
+ variances.
+
+ Raises
+ ------
+ RuntimeError
+ If the important scores are not calculated.
+ """
+ if self.importances_ is None:
+ raise RuntimeError("Importance scores must be calculated first.")
+
+ if hp_names:
+ return self.importances_.loc[self.importances_["hp_name"].isin(hp_names)].to_json()
+ else:
+ return self.importances_.to_json()
diff --git a/deepcave/evaluators/mo_lpi.py b/deepcave/evaluators/mo_lpi.py
new file mode 100644
index 00000000..06c72afa
--- /dev/null
+++ b/deepcave/evaluators/mo_lpi.py
@@ -0,0 +1,308 @@
+# Copyright 2021-2024 The DeepCAVE Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# noqa: D400
+"""
+# LPI
+
+This module provides utilities to calculate the local parameter importance (LPI).
+
+## Classes
+ - LPI: This class calculates the local parameter importance (LPI).
+"""
+
+from typing import Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import pandas as pd
+from ConfigSpace import Configuration
+from ConfigSpace.c_util import change_hp_value
+from ConfigSpace.util import impute_inactive_values
+
+from deepcave.evaluators.epm.fanova_forest import FanovaForest
+from deepcave.evaluators.lpi import LPI
+from deepcave.runs import AbstractRun
+from deepcave.runs.objective import Objective
+from deepcave.utils.multi_objective_importance import get_weightings
+
+
+# https://github.com/automl/ParameterImportance/blob/f4950593ee627093fc30c0847acc5d8bf63ef84b/pimp/evaluator/local_parameter_importance.py#L27
+class MOLPI(LPI):
+ """
+ Calculate the multi-objective local parameter importance (LPI).
+
+ Override: to train the random forest with an arbitrary weighting of the objectives
+ (multi-objective case).
+
+ Properties
+ ----------
+ run : AbstractRun
+ The AbstractRun to get the importance from.
+ cs : ConfigurationSpace
+ The configuration space of the run.
+ hp_names : List[str]
+ The names of the Hyperparameters.
+ variances : Dict[Any, list]
+ The overall variances per tree.
+ importances : dict
+ The importances of the Hyperparameters.
+ continuous_neighbors : int
+ The number of neighbors chosen for continuous Hyperparameters.
+ incumbent : Configuration
+ The incumbent of the run.
+ default : Configuration
+ A configuration containing Hyperparameters with default values.
+ incumbent_array : numpy.ndarray
+ The internal vector representation of the incumbent.
+ seed : int
+ The seed. If not provided it will be random.
+ rs : RandomState
+ A random state with a given seed value.
+ """
+
+ def __init__(self, run: AbstractRun):
+ super().__init__(run)
+ self.importances: Optional[pd.DataFrame] = None
+
+ def calculate(
+ self,
+ objectives: Optional[Union[Objective, List[Objective]]] = None,
+ budget: Optional[Union[int, float]] = None,
+ continous_neighbors: int = 500,
+ n_trees: int = 10,
+ seed: int = 0,
+ ) -> None:
+ """
+ Prepare the data and train a RandomForest model.
+
+ Parameters
+ ----------
+ objectives : Optional[Union[Objective, List[Objective]]], optional
+ Considered objectives. By default, None. If None, all objectives are considered.
+ budget : Optional[Union[int, float]], optional
+ Considered budget. By default, None. If None, the highest budget is chosen.
+ continuous_neighbors : int, optional
+ How many neighbors should be chosen for continuous hyperparameters (HPs).
+ By default, 500.
+ n_trees : int, optional
+ The number of trees for the fanova forest.
+ Default is 10.
+ seed : Optional[int], optional
+ The seed. By default None. If None, a random seed is chosen.
+ """
+ if objectives is None:
+ objectives = self.run.get_objectives()
+
+ if budget is None:
+ budget = self.run.get_highest_budget()
+
+ # Set variables
+ self.continous_neighbors = continous_neighbors
+ self.default = self.cs.get_default_configuration()
+
+ self.seed = seed
+ self.rs = np.random.RandomState(seed)
+
+ # Get data
+ df = self.run.get_encoded_data(
+ objectives=objectives,
+ budget=budget,
+ specific=True,
+ include_combined_cost=True,
+ include_config_ids=True,
+ )
+
+ # normalize objectives
+ assert isinstance(objectives, list)
+ objectives_normed = list()
+ for obj in objectives:
+ normed = obj.name + "_normed"
+ df[normed] = (df[obj.name] - df[obj.name].min()) / (
+ df[obj.name].max() - df[obj.name].min()
+ )
+ if obj.optimize == "upper":
+ df[normed] = 1 - df[normed]
+ objectives_normed.append(normed)
+ df = df.dropna(subset=objectives_normed)
+ X = df[self.hp_names].to_numpy()
+ df_all = pd.DataFrame([])
+ weightings = get_weightings(objectives_normed, df)
+
+ # calculate importance for each weighting generated from the pareto efficient points
+ for w in weightings:
+ Y = sum(df[obj] * weighting for obj, weighting in zip(objectives_normed, w)).to_numpy()
+ # Use same forest as for fanova
+ self._model = FanovaForest(self.cs, n_trees=n_trees, seed=seed)
+ self._model.train(X, Y)
+
+ incumbent_cfg_id = np.argmin(sum(df[obj] * w for obj, w in zip(objectives_normed, w)))
+ self.incumbent = self.run.get_config(df.iloc[incumbent_cfg_id]["config_id"])
+ self.incumbent_array = self.incumbent.get_array()
+ importances = self.calc_one_weighting()
+ df_res = pd.DataFrame(importances).loc[0:1].T.reset_index()
+ df_res["weight"] = w[0]
+ df_all = pd.concat([df_all, df_res])
+ self.importances = df_all.rename(
+ columns={0: "importance", 1: "variance", "index": "hp_name"}
+ ).reset_index(drop=True)
+ self.importances = self.importances.map(
+ lambda x: max(x, 0) if not isinstance(x, str) else x
+ ) # no negative values
+
+ def calc_one_weighting(self) -> Dict[str, Tuple[float, float]]:
+ """
+ Prepare the data after a model has be trained for one weighting.
+
+ Returns
+ -------
+ imp_var_dict: Dict[str, Tuple[float, float]]
+ Dictionary of importances and variances.
+ """
+ # Get neighborhood sampled on an unit-hypercube.
+ neighborhood = self._get_neighborhood()
+
+ # The delta performance is needed from the default configuration and the incumbent
+ def_perf, def_var = self._predict_mean_var(self.default)
+ inc_perf, inc_var = self._predict_mean_var(self.incumbent)
+ delta = def_perf - inc_perf
+
+ # These are used for plotting and hold the predictions for each neighbor of each parameter.
+ # That means performances holds the mean, variances the variance of the forest.
+ performances: Dict[str, List[np.ndarray]] = {}
+ variances: Dict[str, List[np.ndarray]] = {}
+ # These are used for importance and hold the corresponding importance/variance over
+ # neighbors. Only import if NOT quantifying importance via performance-variance across
+ # neighbors.
+
+ # Nested list of values per tree in random forest.
+ predictions: Dict[str, List[List[np.ndarray]]] = {}
+
+ # Iterate over parameters
+ for hp_idx, hp_name in enumerate(self.incumbent.keys()):
+ if hp_name not in neighborhood:
+ continue
+
+ performances[hp_name] = []
+ variances[hp_name] = []
+ predictions[hp_name] = []
+ incumbent_added = False
+ incumbent_idx = 0
+
+ # Iterate over neighbors
+ for unit_neighbor, neighbor in zip(neighborhood[hp_name][0], neighborhood[hp_name][1]):
+ if not incumbent_added:
+ # Detect incumbent
+ if unit_neighbor > self.incumbent_array[hp_idx]:
+ performances[hp_name].append(inc_perf)
+ variances[hp_name].append(inc_var)
+ incumbent_added = True
+ else:
+ incumbent_idx += 1
+
+ # Create the neighbor-Configuration object
+ new_array = self.incumbent_array.copy()
+ new_array = change_hp_value(
+ self.cs, new_array, hp_name, unit_neighbor, self.cs.index_of[hp_name]
+ )
+ new_config = impute_inactive_values(Configuration(self.cs, vector=new_array))
+
+ # Get the leaf values
+ x = np.array(new_config.get_array())
+ leaf_values = self._model.get_leaf_values(x)
+
+ # And the prediction/performance/variance
+ predictions[hp_name].append([np.mean(tree_pred) for tree_pred in leaf_values])
+ performances[hp_name].append(np.mean(predictions[hp_name][-1]))
+ variances[hp_name].append(np.var(predictions[hp_name][-1]))
+
+ if len(neighborhood[hp_name][0]) > 0:
+ neighborhood[hp_name][0] = np.insert(
+ neighborhood[hp_name][0], incumbent_idx, self.incumbent_array[hp_idx]
+ )
+ neighborhood[hp_name][1] = np.insert(
+ neighborhood[hp_name][1], incumbent_idx, self.incumbent[hp_name]
+ )
+ else:
+ neighborhood[hp_name][0] = np.array(self.incumbent_array[hp_idx])
+ neighborhood[hp_name][1] = [self.incumbent[hp_name]]
+
+ if not incumbent_added:
+ performances[hp_name].append(inc_perf)
+ variances[hp_name].append(inc_var)
+
+ # Avoid division by zero
+ if delta == 0:
+ delta = 1
+
+ # Creating actual importance value (by normalizing over sum of vars)
+ num_trees = len(list(predictions.values())[0][0])
+ hp_names = list(performances.keys())
+
+ overall_var_per_tree = {}
+ for hp_name in hp_names:
+ hp_variances = []
+ for tree_idx in range(num_trees):
+ variance = np.var([neighbor[tree_idx] for neighbor in predictions[hp_name]])
+ hp_variances += [variance]
+
+ overall_var_per_tree[hp_name] = hp_variances
+
+ # Sum up variances per tree across parameters
+ sum_var_per_tree = [
+ sum([overall_var_per_tree[hp_name][tree_idx] for hp_name in hp_names])
+ for tree_idx in range(num_trees)
+ ]
+
+ # Normalize
+ overall_var_per_tree = {
+ p: [
+ t / sum_var_per_tree[idx] if sum_var_per_tree[idx] != 0.0 else np.nan
+ for idx, t in enumerate(trees)
+ ]
+ for p, trees in overall_var_per_tree.items()
+ }
+ imp_var_dict = {
+ k: (np.mean(overall_var_per_tree[k]), np.var(overall_var_per_tree[k]))
+ for k in overall_var_per_tree
+ }
+ return imp_var_dict
+
+ def get_importances_(self, hp_names: List[str]) -> str:
+ """
+ Return the importance scores from the passed Hyperparameter names.
+
+ Parameters
+ ----------
+ hp_names : Optional[List[str]]
+ Selected Hyperparameter names to get the importance scores from. If None, all
+ Hyperparameters of the configuration space are used.
+
+ Returns
+ -------
+ Dict
+ Dictionary with Hyperparameter names and the corresponding importance scores and
+ variances.
+
+ Raises
+ ------
+ RuntimeError
+ If the important scores are not calculated.
+ """
+ if self.importances is None:
+ raise RuntimeError("Importance scores must be calculated first.")
+
+ if hp_names:
+ return self.importances.loc[self.importances["hp_name"].isin(hp_names)].to_json()
+ else:
+ return self.importances.to_json()
diff --git a/deepcave/plugins/hyperparameter/ablation_paths.py b/deepcave/plugins/hyperparameter/ablation_paths.py
index eb68f60e..152d2609 100644
--- a/deepcave/plugins/hyperparameter/ablation_paths.py
+++ b/deepcave/plugins/hyperparameter/ablation_paths.py
@@ -25,16 +25,22 @@
- Ablation_Paths: This class provides a plugin for the visualization of the ablation paths.
"""
-from typing import Any, Callable, Dict, List
+from typing import Any, Callable, Dict, List, Union
+
+import math
+from io import StringIO
import dash_bootstrap_components as dbc
import numpy as np
+import pandas as pd
+import plotly.express as px
import plotly.graph_objs as go
from dash import dcc, html
from dash.exceptions import PreventUpdate
from deepcave import config
from deepcave.evaluators.ablation import Ablation
+from deepcave.evaluators.mo_ablation import MOAblation
from deepcave.plugins.static import StaticPlugin
from deepcave.runs import AbstractRun
from deepcave.utils.cast import optional_int
@@ -77,9 +83,19 @@ def get_input_layout(register: Callable) -> List[Any]:
[
dbc.Col(
[
- dbc.Label("Objective"),
+ dbc.Label("Objective 1"),
+ dbc.Select(
+ id=register("objective_id1", ["value", "options"], type=int),
+ placeholder="Select objective ...",
+ ),
+ ],
+ md=6,
+ ),
+ dbc.Col(
+ [
+ dbc.Label("Objective 2"),
dbc.Select(
- id=register("objective_id", ["value", "options"], type=int),
+ id=register("objective_id2", ["value", "options"], type=int),
placeholder="Select objective ...",
),
],
@@ -196,7 +212,7 @@ def load_inputs(self) -> Dict[str, Dict[str, Any]]:
"show_confidence": {"options": get_select_options(binary=True), "value": "false"},
}
- def load_dependency_inputs(self, run, _: Any, inputs: Dict[str, Any]) -> Dict[str, Any]: # type: ignore # noqa: E501
+ def load_dependency_inputs(self, run, _, inputs) -> Dict[str, Any]: # type: ignore # noqa: E501
"""
Works like 'load_inputs' but called after inputs have changed.
@@ -207,7 +223,7 @@ def load_dependency_inputs(self, run, _: Any, inputs: Dict[str, Any]) -> Dict[st
Parameters
----------
- run
+ run:
The selected run.
inputs : Dict[str, Any]
Current content of the inputs.
@@ -220,8 +236,20 @@ def load_dependency_inputs(self, run, _: Any, inputs: Dict[str, Any]) -> Dict[st
# Prepare objectives
objective_names = run.get_objective_names()
objective_ids = run.get_objective_ids()
+ objective_value1 = inputs["objective_id1"]["value"]
+ objective_value2 = inputs["objective_id2"]["value"] # in the multi-objective case
+
+ # Pre-set values
+ if objective_value1 is None:
+ objective_value1 = objective_ids[0]
+
objective_options = get_select_options(objective_names, objective_ids)
- objective_value = inputs["objective_id"]["value"]
+ objective_options2 = [
+ dict for dict in objective_options if dict["value"] != objective_value1
+ ] # make sure the same objective cannot be chosen twice
+ objective_options2 += [
+ {"label": "Select objective ...", "value": -1}
+ ] # add the option to deselect the second objective
# Prepare budgets
budgets = run.get_budgets(human=True)
@@ -229,13 +257,9 @@ def load_dependency_inputs(self, run, _: Any, inputs: Dict[str, Any]) -> Dict[st
budget_options = get_checklist_options(budgets, budget_ids)
budget_value = inputs["budget_id"]["value"]
- hp_names = run.configspace.get_hyperparameter_names()
+ hp_names = list(run.configspace.keys())
n_hps = inputs["n_hps"]["value"]
- # Pre-set values
- if objective_value is None:
- objective_value = objective_ids[0]
-
if n_hps == 0:
n_hps = len(hp_names)
@@ -245,9 +269,13 @@ def load_dependency_inputs(self, run, _: Any, inputs: Dict[str, Any]) -> Dict[st
budget_value = budget_ids[-1]
return {
- "objective_id": {
+ "objective_id1": {
"options": objective_options,
- "value": objective_value,
+ "value": objective_value1,
+ },
+ "objective_id2": {
+ "options": objective_options2,
+ "value": objective_value2,
},
"budget_id": {
"options": budget_options,
@@ -289,7 +317,10 @@ def process(run: AbstractRun, inputs: Dict[str, Any]) -> Dict[str, Any]:
RuntimeError
If the number of trees is not specified.
"""
- objective = run.get_objective(inputs["objective_id"])
+ objective: Any = None
+ objective = run.get_objective(inputs["objective_id1"])
+ if inputs["objective_id2"] not in (None, -1):
+ objective = [objective, run.get_objective(inputs["objective_id2"])]
n_trees = inputs["n_trees"]
if n_trees is None:
@@ -297,18 +328,24 @@ def process(run: AbstractRun, inputs: Dict[str, Any]) -> Dict[str, Any]:
budgets = run.get_budgets(include_combined=True)
- evaluator = Ablation(run)
+ evaluator: Union[Ablation, MOAblation, None] = None
+ if isinstance(objective, list):
+ evaluator = MOAblation(run)
+ else:
+ evaluator = Ablation(run)
# Collect data
- data = {}
+ data: Dict[Any, Any] = {}
for budget_id, budget in enumerate(budgets):
- assert isinstance(budget, (int, float))
assert isinstance(budget, (int, float))
evaluator.calculate(objective, budget, n_trees=n_trees, seed=0)
-
- performances = evaluator.get_ablation_performances()
- improvements = evaluator.get_ablation_improvements()
- data[budget_id] = [performances, improvements]
+ if isinstance(objective, list):
+ assert isinstance(evaluator, MOAblation)
+ data[budget_id] = evaluator.get_importances()
+ else:
+ performances = evaluator.get_ablation_performances()
+ improvements = evaluator.get_ablation_improvements()
+ data[budget_id] = [performances, improvements]
return data # type: ignore
@staticmethod
@@ -365,9 +402,13 @@ def load_outputs(run, inputs, outputs) -> List[go.Figure]: # type: ignore
return [figure1, figure2]
The figures of the ablation paths.
"""
+ if inputs["objective_id2"] not in (None, -1):
+ # MO case: other plot
+ return AblationPaths.load_outputs_mo(run, inputs, outputs)
+
# First selected, should always be shown first
selected_budget_id = inputs["budget_id"]
- objective = run.get_objective(inputs["objective_id"])
+ objective = run.get_objective(inputs["objective_id1"])
n_hps = inputs["n_hps"]
show_confidence = inputs["show_confidence"]
@@ -462,3 +503,125 @@ def load_outputs(run, inputs, outputs) -> List[go.Figure]: # type: ignore
save_image(figure2, "ablation_path_improvement.pdf")
return [figure1, figure2]
+
+ @staticmethod
+ def load_outputs_mo(run, inputs, outputs) -> List[go.Figure]: # type: ignore
+ """
+ Multi-objective case for read in raw data and prepare for layout.
+
+ Note
+ ----
+ The passed inputs are cleaned and therefore differ
+ compared to 'load_inputs' or 'load_dependency_inputs'.
+ Please see '_clean_inputs' for more information.
+
+ Parameters
+ ----------
+ run
+ The selected run.
+ inputs
+ Input and filter values from the user.
+ outputs
+ Raw output from the run.
+
+ Returns
+ -------
+ return [figure1, None]
+ The figure of the ablation paths.
+ """
+ # First selected, should always be shown first
+ objective1 = run.get_objective(inputs["objective_id1"]).name
+ selected_budget_id = inputs["budget_id"]
+ n_hps = inputs["n_hps"]
+
+ if n_hps == "" or n_hps is None:
+ raise PreventUpdate
+ else:
+ n_hps = int(n_hps)
+
+ # Collect data
+ data = {}
+ for budget_id, importances_json in outputs.items():
+ # Important to cast budget_id here because of json serialization
+ budget_id = int(budget_id)
+ if budget_id != selected_budget_id:
+ continue
+ df_importances = pd.read_json(StringIO(importances_json))
+ data[budget_id] = df_importances
+
+ # Sort by last fidelity now
+ idx = (
+ data[selected_budget_id]
+ .groupby("hp_name")["importance"]
+ .max()
+ .sort_values(ascending=False)
+ .index
+ )
+ idx = list(idx[:n_hps]) + ["Default"]
+
+ df = data[selected_budget_id][
+ data[selected_budget_id]["hp_name"].isin(idx)
+ ].copy() # only keep selected hps
+
+ df.loc[df["hp_name"] == "Default", "accuracy"] = 1 - df["new_performance"]
+ df.loc[df["hp_name"] != "Default", "accuracy"] = df["importance"]
+
+ grouped_df = df.groupby(["weight", "hp_name"])["accuracy"].sum().unstack(fill_value=0)
+ color_palette = px.colors.qualitative.Plotly # Choose a color palette
+ colors = {
+ hp: color_palette[i % len(color_palette)]
+ for i, hp in enumerate(list(run.configspace.keys()) + ["Default"])
+ }
+
+ # Create traces for each hp_name
+ traces = []
+ for column in grouped_df.columns:
+ traces.append(
+ go.Scatter(
+ x=grouped_df.index,
+ y=grouped_df[column],
+ mode="lines",
+ stackgroup="one", # This makes the traces stacked
+ name=column,
+ hoverinfo="skip",
+ showlegend=True,
+ opacity=0.2,
+ fillcolor=colors[column],
+ line=dict(color=colors[column]),
+ )
+ )
+
+ fig = go.Figure(data=traces)
+
+ # Update the layout
+ fig.update_layout(
+ xaxis_title="Weight for " + objective1,
+ yaxis_title="Sum of the weighted
normalized performance",
+ xaxis=dict(range=[0, 1], tickangle=-45),
+ yaxis=dict(
+ range=[
+ math.floor(
+ 10 * (1 - (df[df["hp_name"] == "Default"]["new_performance"].max() + 0.01))
+ )
+ / 10,
+ 1,
+ ]
+ ),
+ margin=config.FIGURE_MARGIN,
+ font=dict(size=config.FIGURE_FONT_SIZE),
+ )
+
+ save_image(fig, "ablation_path_performance.pdf")
+
+ # create fake figure to hide the second figure which does not exists for MO
+ white_fig = go.Figure()
+ white_fig.update_layout(
+ paper_bgcolor="white",
+ plot_bgcolor="white",
+ xaxis=dict(showgrid=False, zeroline=False, visible=False),
+ yaxis=dict(showgrid=False, zeroline=False, visible=False),
+ font=dict(color="white"),
+ showlegend=False,
+ )
+
+ return [fig, white_fig]
diff --git a/deepcave/plugins/hyperparameter/importances.py b/deepcave/plugins/hyperparameter/importances.py
index 188bbc21..fe432bb7 100644
--- a/deepcave/plugins/hyperparameter/importances.py
+++ b/deepcave/plugins/hyperparameter/importances.py
@@ -27,8 +27,12 @@
from typing import Any, Callable, Dict, List, Optional, Union
+from io import StringIO
+
import dash_bootstrap_components as dbc
import numpy as np
+import pandas as pd
+import plotly.express as px
import plotly.graph_objs as go
from ConfigSpace import ConfigurationSpace, Constant
from dash import dcc, html
@@ -37,6 +41,8 @@
from deepcave import config
from deepcave.evaluators.fanova import fANOVA as GlobalEvaluator
from deepcave.evaluators.lpi import LPI as LocalEvaluator
+from deepcave.evaluators.mo_fanova import MOfANOVA
+from deepcave.evaluators.mo_lpi import MOLPI
from deepcave.plugins.static import StaticPlugin
from deepcave.runs import AbstractRun
from deepcave.utils.cast import optional_int
@@ -80,15 +86,29 @@ def get_input_layout(register: Callable) -> List[Any]:
Layout for the input block.
"""
return [
- html.Div(
+ dbc.Row(
[
- dbc.Label("Objective"),
- dbc.Select(
- id=register("objective_id", ["value", "options"], type=int),
- placeholder="Select objective ...",
+ dbc.Col(
+ [
+ dbc.Label("Objective 1"),
+ dbc.Select(
+ id=register("objective_id1", ["value", "options"], type=int),
+ placeholder="Select objective ...",
+ ),
+ ],
+ md=6,
+ ),
+ dbc.Col(
+ [
+ dbc.Label("Objective 2"),
+ dbc.Select(
+ id=register("objective_id2", ["value", "options"], type=int),
+ placeholder="Select objective ...",
+ ),
+ ],
+ md=6,
),
],
- className="mb-3",
),
dbc.Row(
[
@@ -204,9 +224,9 @@ def load_inputs(self) -> Dict[str, Dict[str, Any]]:
"budget_ids": {"options": get_checklist_options(), "value": []},
}
- def load_dependency_inputs(self, run, _: Any, inputs: Dict[str, Any]) -> Dict[str, Any]: # type: ignore # noqa: E501
+ def load_dependency_inputs(self, run, _, inputs) -> Dict[str, Any]: # type: ignore # noqa: E501
"""
- Work like 'load_inputs' but called after inputs have changed.
+ Works like 'load_inputs' but called after inputs have changed.
Note
----
@@ -228,8 +248,20 @@ def load_dependency_inputs(self, run, _: Any, inputs: Dict[str, Any]) -> Dict[st
# Prepare objectives
objective_names = run.get_objective_names()
objective_ids = run.get_objective_ids()
+ objective_value1 = inputs["objective_id1"]["value"]
+ objective_value2 = inputs["objective_id2"]["value"] # in the multi-objective case
+
+ # Pre-set values
+ if objective_value1 is None:
+ objective_value1 = objective_ids[0]
+
objective_options = get_select_options(objective_names, objective_ids)
- objective_value = inputs["objective_id"]["value"]
+ objective_options2 = [
+ dict for dict in objective_options if dict["value"] != objective_value1
+ ] # make sure the same objective cannot be chosen twice
+ objective_options2 += [
+ {"label": "Select objective ...", "value": -1}
+ ] # add the option to deselect the second objective
# Prepare budgets
budgets = run.get_budgets(human=True)
@@ -242,10 +274,6 @@ def load_dependency_inputs(self, run, _: Any, inputs: Dict[str, Any]) -> Dict[st
hp_value = inputs["hyperparameter_names"]["value"]
n_hps = inputs["n_hps"]["value"]
- # Pre-set values
- if objective_value is None:
- objective_value = objective_ids[0]
-
if n_hps == 0:
n_hps = len(hp_names)
@@ -257,9 +285,13 @@ def load_dependency_inputs(self, run, _: Any, inputs: Dict[str, Any]) -> Dict[st
budget_value = [budget_ids[-1]]
return {
- "objective_id": {
+ "objective_id1": {
"options": objective_options,
- "value": objective_value,
+ "value": objective_value1,
+ },
+ "objective_id2": {
+ "options": objective_options2,
+ "value": objective_value2,
},
"method": {
"value": inputs["method"]["value"],
@@ -301,7 +333,7 @@ def process(run: AbstractRun, inputs: Dict[str, Any]) -> Dict[str, Any]:
Returns
-------
Dict[str, Any]
- A serialzied dictionary.
+ A serialized dictionary.
Raises
------
@@ -309,7 +341,10 @@ def process(run: AbstractRun, inputs: Dict[str, Any]) -> Dict[str, Any]:
If the number of trees is not specified.
If the method is not found.
"""
- objective = run.get_objective(inputs["objective_id"])
+ objective: Any = None
+ objective = run.get_objective(inputs["objective_id1"])
+ if inputs["objective_id2"] not in (None, -1):
+ objective = [objective, run.get_objective(inputs["objective_id2"])]
method = inputs["method"]
n_trees = inputs["n_trees"]
@@ -342,10 +377,14 @@ def process(run: AbstractRun, inputs: Dict[str, Any]) -> Dict[str, Any]:
hp_names = list(run.configspace.keys())
budgets = run.get_budgets(include_combined=True)
+ # Initialize the evaluator
evaluator: Optional[Union[LocalEvaluator, GlobalEvaluator]] = None
- if method == "local":
- # Initialize the evaluator
+ if method == "local" and isinstance(objective, list):
+ evaluator = MOLPI(run)
+ elif method == "local":
evaluator = LocalEvaluator(run)
+ elif method == "global" and isinstance(objective, list):
+ evaluator = MOfANOVA(run)
elif method == "global":
evaluator = GlobalEvaluator(run)
else:
@@ -353,14 +392,26 @@ def process(run: AbstractRun, inputs: Dict[str, Any]) -> Dict[str, Any]:
# Collect data
data = {}
+ importances: Union[
+ str,
+ dict[str, tuple[float, float]],
+ dict[Union[str, tuple[str, ...]], tuple[float, float, float, float]],
+ ]
for budget_id, budget in enumerate(budgets):
assert isinstance(budget, (int, float))
evaluator.calculate(objective, budget, n_trees=n_trees, seed=0)
-
- importances = evaluator.get_importances(hp_names)
- if any(np.isnan(val) for value in importances.values() for val in value):
- logger.warning(f"Nan encountered in importance values for budget {budget}.")
+ if isinstance(objective, list):
+ assert isinstance(evaluator, (MOLPI, MOfANOVA))
+ importances = evaluator.get_importances_(hp_names)
+ if any(pd.read_json(StringIO(importances))["importance"].isna()):
+ logger.warning(f"Nan encountered in importance values for budget {budget}.")
+ else:
+ importances = evaluator.get_importances(hp_names)
+ assert isinstance(importances, Dict)
+ if any(np.isnan(val) for value in importances.values() for val in value):
+ logger.warning(f"Nan encountered in importance values for budget {budget}.")
data[budget_id] = importances
+
return data # type: ignore
@staticmethod
@@ -410,6 +461,10 @@ def load_outputs(run, inputs, outputs) -> go.Figure: # type: ignore
go.figure
The figure of the importances.
"""
+ if inputs["objective_id2"] not in (None, -1):
+ # MO case: other plot
+ return Importances.load_ouputs_mo_fanova(run, inputs, outputs)
+
# First selected, should always be shown first
selected_hp_names = inputs["hyperparameter_names"]
selected_budget_ids = inputs["budget_ids"]
@@ -476,3 +531,127 @@ def load_outputs(run, inputs, outputs) -> go.Figure: # type: ignore
save_image(figure, "importances.pdf")
return figure
+
+ @staticmethod
+ def load_ouputs_mo_fanova(run, inputs, outputs) -> go.Figure: # type: ignore
+ """
+ Multi-objective case for read in raw data and prepare for layout.
+
+ Note
+ ----
+ The passed inputs are cleaned and therefore differ
+ compared to 'load_inputs' or 'load_dependency_inputs'.
+ Please see '_clean_inputs' for more information.
+
+ Parameters
+ ----------
+ run
+ The selected run.
+ inputs
+ Input and filter values from the user.
+ outputs
+ Raw output from the run.
+
+ Returns
+ -------
+ go.figure
+ The figure of the importances.
+ """
+ # First selected, should always be shown first
+ objective1 = run.get_objective(inputs["objective_id1"]).name
+ selected_hp_names = inputs["hyperparameter_names"]
+ selected_budget_ids = inputs["budget_ids"]
+ n_hps = inputs["n_hps"]
+
+ if n_hps == "" or n_hps is None:
+ raise PreventUpdate
+ else:
+ n_hps = int(n_hps)
+
+ if len(selected_hp_names) == 0 or len(selected_budget_ids) == 0:
+ raise PreventUpdate()
+
+ # Collect data
+ data = {}
+ for budget_id, importances_json in outputs.items():
+ df_importances = pd.read_json(StringIO(importances_json))
+ # Important to cast budget_id here because of json serialization
+ budget_id = int(budget_id)
+ if budget_id not in selected_budget_ids:
+ continue
+
+ df_importances = df_importances[
+ df_importances["hp_name"].isin(selected_hp_names)
+ ] # only keep selected hps
+ data[budget_id] = df_importances
+
+ # Keep only n_hps most important hyperparameters according to max importance
+ selected_budget_id = max(selected_budget_ids)
+ idx = (
+ data[selected_budget_id]
+ .groupby("hp_name")["importance"]
+ .max()
+ .sort_values(ascending=False)
+ .index
+ )
+ idx = idx[:n_hps]
+
+ color_palette = px.colors.qualitative.Plotly # Choose a color palette
+ colors = {
+ hp: color_palette[i % len(color_palette)]
+ for i, hp in enumerate(list(run.configspace.keys()))
+ }
+
+ # Create the figure
+ figure = go.Figure()
+ df = data[selected_budget_id][
+ data[selected_budget_id]["hp_name"].isin(idx)
+ ] # only keep top hps
+
+ # Group by 'hp_name' and plot each group
+ for group_id, group_data in df.groupby("hp_name"):
+ # Sort data by the weight column
+ group_data = group_data.sort_values(by="weight")
+
+ figure.add_trace(
+ go.Scatter(
+ x=group_data["weight"],
+ y=group_data["importance"],
+ mode="lines",
+ name=group_id,
+ line=dict(color=colors[group_id]),
+ )
+ )
+
+ # Add the shaded area representing the variance
+ x = group_data["weight"]
+ y1 = (group_data["importance"] - group_data["variance"]).to_list()
+ y2 = group_data["importance"] + group_data["variance"]
+
+ figure.add_trace(
+ go.Scatter(
+ x=x.tolist() + x[::-1].tolist(),
+ y=y1 + y2[::-1].tolist(),
+ fill="toself",
+ hoverinfo="skip",
+ showlegend=False,
+ opacity=0.2,
+ fillcolor=colors[group_id],
+ line=dict(color=colors[group_id]),
+ mode="lines",
+ )
+ )
+
+ # Update the layout for labels, title, and axis limits
+ figure.update_layout(
+ xaxis_title="Weight for " + objective1,
+ yaxis_title="Importance",
+ xaxis=dict(range=[0, 1], tickangle=-45),
+ yaxis=dict(range=[0, df["importance"].max()]),
+ margin=config.FIGURE_MARGIN,
+ font=dict(size=config.FIGURE_FONT_SIZE),
+ )
+
+ save_image(figure, "importances.pdf")
+
+ return figure
diff --git a/deepcave/plugins/objective/pareto_front.py b/deepcave/plugins/objective/pareto_front.py
index f8793afc..25637f01 100644
--- a/deepcave/plugins/objective/pareto_front.py
+++ b/deepcave/plugins/objective/pareto_front.py
@@ -137,7 +137,7 @@ def get_input_layout(register: Callable) -> List[Any]:
[
dbc.Col(
[
- dbc.Label("Objective #1"),
+ dbc.Label("Objective 1"),
dbc.Select(
id=register("objective_id_1", ["value", "options"], type=int),
placeholder="Select objective ...",
@@ -147,7 +147,7 @@ def get_input_layout(register: Callable) -> List[Any]:
),
dbc.Col(
[
- dbc.Label("Objective #2"),
+ dbc.Label("Objective 2"),
dbc.Select(
id=register("objective_id_2", ["value", "options"], type=int),
placeholder="Select objective ...",
diff --git a/deepcave/runs/converters/dataframe.py b/deepcave/runs/converters/dataframe.py
index 7cbc0bce..11e34048 100644
--- a/deepcave/runs/converters/dataframe.py
+++ b/deepcave/runs/converters/dataframe.py
@@ -122,7 +122,7 @@ def from_path(cls, path: Union[Path, str]) -> "DataFrameRun":
return run
@staticmethod
- def load_objectives(path: Path) -> list[Objective]:
+ def load_objectives(path: Path) -> List[Objective]:
"""
Load the objectives of the run from the trials.csv file.
diff --git a/deepcave/utils/multi_objective_importance.py b/deepcave/utils/multi_objective_importance.py
new file mode 100644
index 00000000..36275da1
--- /dev/null
+++ b/deepcave/utils/multi_objective_importance.py
@@ -0,0 +1,69 @@
+# Copyright 2021-2024 The DeepCAVE Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# noqa: D400
+"""
+# Multi-Objective importances
+
+This module provides utilities for calculating multi-objective importances.
+"""
+
+from typing import List
+
+import numpy as np
+import pandas as pd
+
+
+def is_pareto_efficient(costs: np.ndarray) -> np.ndarray:
+ """
+ Find the pareto-efficient points.
+
+ Parameters
+ ----------
+ costs : numpy.ndarray
+ An (n_points, n_costs) array.
+
+ Returns
+ -------
+ is_efficient : numpy.ndarray
+ A (n_points, ) boolean array, indicating whether each point is pareto-efficient.
+ """
+ is_efficient = np.ones(costs.shape[0], dtype=bool)
+ for i, c in enumerate(costs):
+ is_efficient[i] = np.all(np.any(costs[:i] > c, axis=1)) and np.all(
+ np.any(costs[i + 1 :] > c, axis=1)
+ )
+ return is_efficient
+
+
+def get_weightings(objectives_normed: List[str], df: pd.DataFrame) -> np.ndarray:
+ """
+ Calculate the weighting for the weighted importance using the points on the pareto-front.
+
+ Parameters
+ ----------
+ objectives_normed : List[str]
+ The normalized objective names as a list of strings.
+ df : pandas.dataframe
+ The dataframe containing the encoded data.
+
+ Returns
+ -------
+ weightings : numpy.ndarray
+ The weightings.
+ """
+ optimized = is_pareto_efficient(df[objectives_normed].to_numpy())
+ return (
+ df[optimized][objectives_normed].T.apply(lambda values: values / values.sum()).T.to_numpy()
+ )
diff --git a/docs/images/plugins/ablation_paths.png b/docs/images/plugins/ablation_paths.png
index e5f0620f..4b7573fe 100644
Binary files a/docs/images/plugins/ablation_paths.png and b/docs/images/plugins/ablation_paths.png differ
diff --git a/docs/images/plugins/ablation_paths_mo.png b/docs/images/plugins/ablation_paths_mo.png
new file mode 100644
index 00000000..d9a34e6f
Binary files /dev/null and b/docs/images/plugins/ablation_paths_mo.png differ
diff --git a/docs/images/plugins/importances.png b/docs/images/plugins/importances.png
index 1c2aff14..ecd32902 100644
Binary files a/docs/images/plugins/importances.png and b/docs/images/plugins/importances.png differ
diff --git a/docs/images/plugins/importances_mo.png b/docs/images/plugins/importances_mo.png
new file mode 100644
index 00000000..137d6a62
Binary files /dev/null and b/docs/images/plugins/importances_mo.png differ
diff --git a/docs/plugins/ablation_paths.rst b/docs/plugins/ablation_paths.rst
index 2607db33..d8b98a76 100644
--- a/docs/plugins/ablation_paths.rst
+++ b/docs/plugins/ablation_paths.rst
@@ -1,5 +1,5 @@
Ablation Paths
-===========
+=============
Ablation Paths is a method to analyze the importance of hyperparameters in a configuration space.
Starting from a default configuration, the default configuration is iteratively changed to the
@@ -19,9 +19,24 @@ To learn more about Ablation Paths, please see the paper
.. image:: ../images/plugins/ablation_paths.png
.. image:: ../images/plugins/ablation_paths2.png
+Multi-Objective Ablation Paths
+------------------------------
+
+The ablation path analysis can be applied to two objectives displaying how the importance changes
+from one objective to the other. Based on a weighting scheme, the two objectives are scalarized to
+form a single objective for which the ablation path is calculated. The resulting plot shows the
+contribution of each hyperparameter to the performance for different weightings, including the
+default performance. Note that the y-axis displays the sum of the scalarized normalized performance.
+The weightings are calculated based on the points on the pareto-front, which is the set of
+non-dominated solutions. For more detail on this, please see the paper
+`Hyperparameter Importance Analysis for Multi-Objective AutoML
+`_.
+
+.. image:: ../images/plugins/ablation_paths_mo.png
+
Options
-------
-* **Objective**: Select the objective function you wish to analyze.
+* **Objective 1 / 2**: Choose the objective you wish to calculate the ablation path for. Optionally, choose a second objective to see how the importance changes from one objective to the other.
* **Trees:** Specify the number of trees for the random forest surrogate model used in calculating importance scores.
diff --git a/docs/plugins/importances.rst b/docs/plugins/importances.rst
index 83386433..e8641339 100644
--- a/docs/plugins/importances.rst
+++ b/docs/plugins/importances.rst
@@ -35,11 +35,24 @@ global impact of hyperparameters. For more details on fANOVA, please refer to th
`An Efficient Approach for Assessing Hyperparameter Importance
`_.
+Multi-Objective Importance
+--------------------------
+
+Both LPI and fANOVA can be applied to two objectives displaying how the importance changes from one
+objective to the other. Based on a weighting scheme, the two objectives are scalarized to form a
+single objective for which the importance is calculated. The resulting plot then shows the
+scalarized importance for different weightings. The weightings are calculated based on the points
+on the pareto front which is the set of non-dominated solutions. For more detail on this, please
+see the paper `Hyperparameter Importance Analysis for Multi-Objective AutoML
+`_.
+
+.. image:: ../images/plugins/importances_mo.png
+
Options
-------
-* **Objective:** Select the objective function you wish to analyze.
+* **Objective 1 / 2**: Choose the objective you wish to calculate the importance for. Optionally, choose a second objective to see how the importance changes from one objective to the other.
* **Method:** Whether to calculate the local parameter importance or the fANOVA importance.
diff --git a/docs/plugins/pareto_front.rst b/docs/plugins/pareto_front.rst
index 86512ca3..851b909a 100644
--- a/docs/plugins/pareto_front.rst
+++ b/docs/plugins/pareto_front.rst
@@ -21,7 +21,7 @@ This plugin can help answer the following questions:
Options
-------
-* **Objective #1 / #2:** Select the objective functions you wish to analyze.
+* **Objective 1 / 2:** Select the objective functions you wish to analyze.
* **Budget**: Select the multi-fidelity budget to be used. The plugin will only consider trials evaluated
on the selected budget. The *Combined* budget option displays all configurations but shows scores only
diff --git a/examples/api/ablation_paths.py b/examples/api/ablation_paths.py
index 83ffad96..85b994a8 100644
--- a/examples/api/ablation_paths.py
+++ b/examples/api/ablation_paths.py
@@ -13,15 +13,17 @@
if __name__ == "__main__":
# Instantiate the run
- run = DeepCAVERun.from_path(Path("logs/DeepCAVE/minimal/run_2"))
+ run = DeepCAVERun.from_path(Path("logs/DeepCAVE/minimal/run_1"))
- objective_id = run.get_objective_ids()[0]
+ objective_id1 = run.get_objective_ids()[0]
+ objective_id2 = None # replace with run.get_objective_ids()[1] for multi-objective importance
budget_id = run.get_budget_ids()[0]
# Instantiate the plugin
plugin = AblationPaths()
inputs = plugin.generate_inputs(
- objective_id=objective_id,
+ objective_id1=objective_id1,
+ objective_id2=objective_id2,
budget_id=budget_id,
n_hps=100,
n_trees=100,
@@ -33,4 +35,5 @@
# Finally, you can load the figure. Here, the filter variables play a role.
figure1, figure2 = plugin.load_outputs(run, inputs, outputs)
figure1.write_image("examples/api/ablation_paths_performance.png", scale=2.0)
- figure2.write_image("examples/api/ablation_paths_improvement.png", scale=2.0)
+ if not objective_id2:
+ figure2.write_image("examples/api/ablation_paths_improvement.png", scale=2.0)
diff --git a/examples/api/importances.py b/examples/api/importances.py
index de4dad38..78bc36ac 100644
--- a/examples/api/importances.py
+++ b/examples/api/importances.py
@@ -15,14 +15,16 @@
# Instantiate the run
run = DeepCAVERun.from_path(Path("logs/DeepCAVE/minimal/run_2"))
- objective_id = run.get_objective_ids()[0]
+ objective_id1 = run.get_objective_ids()[0]
+ objective_id2 = None # replace with run.get_objective_ids()[1] for multi-objective importance
budget_ids = run.get_budget_ids()
# Instantiate the plugin
plugin = Importances()
inputs = plugin.generate_inputs(
hyperparameter_names=list(run.configspace.keys()),
- objective_id=objective_id,
+ objective_id1=objective_id1,
+ objective_id2=objective_id2,
budget_ids=budget_ids,
method="global",
n_hps=3,