From 29a3c06276322c7fe4f762fc5d962144fdcacc88 Mon Sep 17 00:00:00 2001 From: Teytaud Date: Wed, 30 Mar 2022 09:15:59 +0200 Subject: [PATCH 1/2] Relevant weights if not all contexts run equally When different contexts have been run a different number of times, then it's better to first aggregate by settings. This is equivalent to the previous code if: - if all settings have been replicated the same number of times - asymptotically, if missing runs are equally distributed among different runs and we replicated sufficiently many times --- nevergrad/benchmark/plotting.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nevergrad/benchmark/plotting.py b/nevergrad/benchmark/plotting.py index 5d2ab6415..6fc37b4f8 100644 --- a/nevergrad/benchmark/plotting.py +++ b/nevergrad/benchmark/plotting.py @@ -603,7 +603,10 @@ def make_data(df: pd.DataFrame, normalized_loss: bool = False) -> tp.Dict[str, t ["optimizer_name", "budget", "loss"] + (["pseudotime"] if "pseudotime" in df.columns else []), ] ) - groupeddf = df.groupby(["optimizer_name", "budget"]) + # We first aggregate equivalent rows. The only point of this is that we want all contexts to have the same + # weight, in e.g. xpresults_all.png, even if not all contexts have been run the same number of times. + compact_df = df.groupby(df.columns).mean() # We first aggregate equal contexts. + groupeddf = compact_df.groupby(["optimizer_name", "budget"]) means = groupeddf.mean() stds = groupeddf.std() optim_vals: tp.Dict[str, tp.Dict[str, np.ndarray]] = {} From 53afc4d55a2017cefa1a4736f20895a4af6f752a Mon Sep 17 00:00:00 2001 From: Teytaud Date: Wed, 30 Mar 2022 09:33:26 +0200 Subject: [PATCH 2/2] Update plotting.py --- nevergrad/benchmark/plotting.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/nevergrad/benchmark/plotting.py b/nevergrad/benchmark/plotting.py index 6fc37b4f8..2722ef8e1 100644 --- a/nevergrad/benchmark/plotting.py +++ b/nevergrad/benchmark/plotting.py @@ -605,7 +605,18 @@ def make_data(df: pd.DataFrame, normalized_loss: bool = False) -> tp.Dict[str, t ) # We first aggregate equivalent rows. The only point of this is that we want all contexts to have the same # weight, in e.g. xpresults_all.png, even if not all contexts have been run the same number of times. - compact_df = df.groupby(df.columns).mean() # We first aggregate equal contexts. + descriptors = sorted( + set(df.columns) + - { + "pseudotime", + "time", + "elapsed_time", + "elapsed_budget", + "loss", + "seed", + } + ) + compact_df = df.groupby(list(descriptors)).mean() # We first aggregate equal contexts. groupeddf = compact_df.groupby(["optimizer_name", "budget"]) means = groupeddf.mean() stds = groupeddf.std()