From c535c589af6f061dcf5bb206e2f964f915db79de Mon Sep 17 00:00:00 2001
From: Katie Worton <katie.worton@linaro.org>
Date: Wed, 8 May 2024 17:37:17 +0100
Subject: [PATCH 1/3] squad-track-duration: Add information about counts in
 means

Add a table of information which displays how many boottimes were
included in the mean boottime for each device.

Signed-off-by: Katie Worton <katie.worton@linaro.org>
---
 squad-track-duration | 105 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 84 insertions(+), 21 deletions(-)

diff --git a/squad-track-duration b/squad-track-duration
index e58e213..fc3b7dd 100755
--- a/squad-track-duration
+++ b/squad-track-duration
@@ -313,60 +313,123 @@ def run():
     df["build_name_device"] = df.build_name + "-" + df.device
     figure_colletion = []
 
+    # Filter the DataFrame by the desired build name(s)
+    filtered_df1 = df[df["build_name"].isin([args.build_name])]
+
     # Create a DataFrame which groups by type then takes the mean of the boot
     # time per type.
-    dft = df.groupby(["created_at", "git_describe", "device", "build_name"])[
-        "boottime"
-    ].mean()
+    df_grouping1 = filtered_df1.groupby(
+        ["created_at", "git_describe", "device", "build_name"]
+    )
+
+    mean_boottimes1 = df_grouping1["boottime"].mean()
 
     # Convert the Series object back to a DataFrame then sort by the created_at
-    dft = dft.reset_index().sort_values(by="created_at")
+    mean_boottimes1 = mean_boottimes1.reset_index().sort_values(by="created_at")
 
-    # Filter these results by the desired build name(s)
-    dft = dft[dft["build_name"].isin([args.build_name])]
+    # Calculate how many boottimes we averaged over per device
+    count_per_device1 = df_grouping1["boottime"].count().groupby("device").sum()
+    col_name_boottime_count = "Boottimes included in average"
+    count_per_device1 = count_per_device1.reset_index().rename(
+        columns={"boottime": col_name_boottime_count}
+    )
+
+    # Create a new column with the name and count, then stick together the
+    # counts and the averages
+    count_per_device1["device_count"] = (
+        count_per_device1.device
+        + " ("
+        + count_per_device1[col_name_boottime_count].astype(str)
+        + ")"
+    )
+    mean_boottimes1 = mean_boottimes1.merge(
+        count_per_device1, on="device", how="inner", suffixes=("_1", "_2")
+    )
 
     # Create the figure to display this data
     figure_colletion.append(
         MetaFigure(
-            px.line(dft, x="created_at", y="boottime", color="device", markers=True)
-            .update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"])
+            px.line(
+                mean_boottimes1,
+                x="created_at",
+                y="boottime",
+                color="device_count",
+                markers=True,
+                labels={"device_count": "Device (number of boots in mean)"},
+            )
+            .update_xaxes(
+                tickvals=mean_boottimes1["created_at"],
+                ticktext=mean_boottimes1["git_describe"],
+            )
             .update_layout(xaxis_title="Version", yaxis_title="Boot time"),
             f"Line graph, {args.build_name}",
-            f"This line graph, is generated from build_name {args.build_name}.",
+            f"This line graph is generated from build_name {args.build_name}."
+            + " The graph uses the average (mean) over a number of boots for each device. The number of boots included in the average is presented in the 'Device (number of boots in mean)' in the line graph legend.",
         )
     )
 
+    # Filter the DataFrame by the desired build name(s)
+    filtered_df2 = df[df["build_name"].str.endswith(args.build_name.split("-")[-1])]
+
     # Group and the mean of the boot time for the desired type - this time it is
     # grouped by build_name_device, too, since we want to look at both the build
     # and what device this was run on.
-    dfp = df.groupby(
+    df_grouping2 = filtered_df2.groupby(
         ["created_at", "git_describe", "device", "build_name_device", "build_name"]
-    )["boottime"].mean()
+    )
+
+    mean_boottimes2 = df_grouping2["boottime"].mean()
 
     # Convert the Series object back to a DataFrame then sort by the created_at
     # and build_name_device
-    dfp = dfp.reset_index().sort_values(by=["created_at", "build_name_device"])
+    mean_boottimes2 = mean_boottimes2.reset_index().sort_values(
+        by=["created_at", "build_name_device"]
+    )
+
+    logger.debug(mean_boottimes2.info())
+    logger.debug(mean_boottimes2)
 
-    # Filter by results from the specified build names
-    dfp = dfp[dfp["build_name"].str.endswith(args.build_name.split("-")[-1])]
-    logger.debug(dfp.info())
-    logger.debug(dfp)
+    # Calculate how many boottimes we averaged over per device
+    count_per_device2 = (
+        df_grouping2["boottime"].count().groupby("build_name_device").sum()
+    )
+    count_per_device2 = count_per_device2.reset_index().rename(
+        columns={"boottime": col_name_boottime_count}
+    )
+
+    # Create a new column with the name and count, then stick together the
+    # counts and the averages
+    count_per_device2["build_name_device_count"] = (
+        count_per_device2.build_name_device
+        + " ("
+        + count_per_device2[col_name_boottime_count].astype(str)
+        + ")"
+    )
+    mean_boottimes2 = mean_boottimes2.merge(
+        count_per_device2, on="build_name_device", how="inner", suffixes=("_1", "_2")
+    )
 
     # Create the figure for this visualisation
     figure_colletion.append(
         MetaFigure(
             px.line(
-                dfp,
+                mean_boottimes2,
                 x="created_at",
                 y="boottime",
-                color="build_name_device",
+                color="build_name_device_count",
                 markers=True,
-                labels={"build_name_device": "Build name - device"},
+                labels={
+                    "build_name_device_count": "Build name - device (number of boots in mean)"
+                },
+            )
+            .update_xaxes(
+                tickvals=mean_boottimes2["created_at"],
+                ticktext=mean_boottimes2["git_describe"],
             )
-            .update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"])
             .update_layout(xaxis_title="Version", yaxis_title="Boot time"),
             f"Line graph, {args.build_name.split('-')[-1]}",
-            f"This line graph, is generated from \"{args.build_name.split('-')[-1]}\".",
+            f"This line graph is generated from \"{args.build_name.split('-')[-1]}\"."
+            + " The graph uses the average (mean) over a number of boots for each build_name-device combination. The number of boots included in the average is presented in the 'Build name - device (number of boots in mean)' in the line graph legend.",
         )
     )
 

From 73f964b933f1a0b52b18d3ddf4c2c81fd5b908d7 Mon Sep 17 00:00:00 2001
From: Katie Worton <katie.worton@linaro.org>
Date: Thu, 9 May 2024 08:41:48 +0100
Subject: [PATCH 2/3] squad-track-duration: Remove unneeded DataFrame setup

Remove code that creates a DataFrame then reassigns it before it is ever
used.

Signed-off-by: Katie Worton <katie.worton@linaro.org>
---
 squad-track-duration | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/squad-track-duration b/squad-track-duration
index fc3b7dd..f94a7d8 100755
--- a/squad-track-duration
+++ b/squad-track-duration
@@ -284,17 +284,6 @@ def run():
     if args.start_datetime > args.end_datetime:
         raise Exception("Start time must be earlier than end time.")
 
-    df = pd.DataFrame(
-        {
-            "build_name": [],
-            "git_describe": [],
-            "device": [],
-            "boottime": [],
-            "finished": [],
-            "created_at": [],
-        }
-    )
-
     build_cache = get_cache_from_artifactorial()
     data = []
     data, build_cache = get_data(args, build_cache)

From cfb98daedba25d250e398341fd003bcb29f82bad Mon Sep 17 00:00:00 2001
From: Katie Worton <katie.worton@linaro.org>
Date: Thu, 9 May 2024 08:49:00 +0100
Subject: [PATCH 3/3] squad-track-duration: Update sorting to fix legend order

Update the sorting of the data so it is sorted by the legend lines then
by created_at. This will ensure the data for each graph line is in the
correct order while also putting the legend in alphabetical order.

Signed-off-by: Katie Worton <katie.worton@linaro.org>
---
 squad-track-duration | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/squad-track-duration b/squad-track-duration
index f94a7d8..1ad1a30 100755
--- a/squad-track-duration
+++ b/squad-track-duration
@@ -313,8 +313,13 @@ def run():
 
     mean_boottimes1 = df_grouping1["boottime"].mean()
 
-    # Convert the Series object back to a DataFrame then sort by the created_at
-    mean_boottimes1 = mean_boottimes1.reset_index().sort_values(by="created_at")
+    # Convert the Series object back to a DataFrame then sort values first by
+    # device, then by created_at. This will make the graph legend alphabetised
+    # while also ensuring the dates for each line are ordered by created_at so
+    # the graph's lines will be drawn correctly.
+    mean_boottimes1 = mean_boottimes1.reset_index().sort_values(
+        by=["device", "created_at"]
+    )
 
     # Calculate how many boottimes we averaged over per device
     count_per_device1 = df_grouping1["boottime"].count().groupby("device").sum()
@@ -369,10 +374,12 @@ def run():
 
     mean_boottimes2 = df_grouping2["boottime"].mean()
 
-    # Convert the Series object back to a DataFrame then sort by the created_at
-    # and build_name_device
+    # Convert the Series object back to a DataFrame then sort values first by
+    # build_name_device, then by created_at. This will make the graph legend
+    # alphabetised while also ensuring the dates for each line are ordered by
+    # created_at so the graph's lines will be drawn correctly.
     mean_boottimes2 = mean_boottimes2.reset_index().sort_values(
-        by=["created_at", "build_name_device"]
+        by=["build_name_device", "created_at"]
     )
 
     logger.debug(mean_boottimes2.info())