From c535c589af6f061dcf5bb206e2f964f915db79de Mon Sep 17 00:00:00 2001 From: Katie Worton Date: Wed, 8 May 2024 17:37:17 +0100 Subject: [PATCH 1/3] squad-track-duration: Add information about counts in means Add a table of information which displays how many boottimes were included in the mean boottime for each device. Signed-off-by: Katie Worton --- squad-track-duration | 105 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 84 insertions(+), 21 deletions(-) diff --git a/squad-track-duration b/squad-track-duration index e58e213..fc3b7dd 100755 --- a/squad-track-duration +++ b/squad-track-duration @@ -313,60 +313,123 @@ def run(): df["build_name_device"] = df.build_name + "-" + df.device figure_colletion = [] + # Filter the DataFrame by the desired build name(s) + filtered_df1 = df[df["build_name"].isin([args.build_name])] + # Create a DataFrame which groups by type then takes the mean of the boot # time per type. - dft = df.groupby(["created_at", "git_describe", "device", "build_name"])[ - "boottime" - ].mean() + df_grouping1 = filtered_df1.groupby( + ["created_at", "git_describe", "device", "build_name"] + ) + + mean_boottimes1 = df_grouping1["boottime"].mean() # Convert the Series object back to a DataFrame then sort by the created_at - dft = dft.reset_index().sort_values(by="created_at") + mean_boottimes1 = mean_boottimes1.reset_index().sort_values(by="created_at") - # Filter these results by the desired build name(s) - dft = dft[dft["build_name"].isin([args.build_name])] + # Calculate how many boottimes we averaged over per device + count_per_device1 = df_grouping1["boottime"].count().groupby("device").sum() + col_name_boottime_count = "Boottimes included in average" + count_per_device1 = count_per_device1.reset_index().rename( + columns={"boottime": col_name_boottime_count} + ) + + # Create a new column with the name and count, then stick together the + # counts and the averages + count_per_device1["device_count"] = ( + count_per_device1.device + + " (" + + count_per_device1[col_name_boottime_count].astype(str) + + ")" + ) + mean_boottimes1 = mean_boottimes1.merge( + count_per_device1, on="device", how="inner", suffixes=("_1", "_2") + ) # Create the figure to display this data figure_colletion.append( MetaFigure( - px.line(dft, x="created_at", y="boottime", color="device", markers=True) - .update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"]) + px.line( + mean_boottimes1, + x="created_at", + y="boottime", + color="device_count", + markers=True, + labels={"device_count": "Device (number of boots in mean)"}, + ) + .update_xaxes( + tickvals=mean_boottimes1["created_at"], + ticktext=mean_boottimes1["git_describe"], + ) .update_layout(xaxis_title="Version", yaxis_title="Boot time"), f"Line graph, {args.build_name}", - f"This line graph, is generated from build_name {args.build_name}.", + f"This line graph is generated from build_name {args.build_name}." + + " The graph uses the average (mean) over a number of boots for each device. The number of boots included in the average is presented in the 'Device (number of boots in mean)' in the line graph legend.", ) ) + # Filter the DataFrame by the desired build name(s) + filtered_df2 = df[df["build_name"].str.endswith(args.build_name.split("-")[-1])] + # Group and the mean of the boot time for the desired type - this time it is # grouped by build_name_device, too, since we want to look at both the build # and what device this was run on. - dfp = df.groupby( + df_grouping2 = filtered_df2.groupby( ["created_at", "git_describe", "device", "build_name_device", "build_name"] - )["boottime"].mean() + ) + + mean_boottimes2 = df_grouping2["boottime"].mean() # Convert the Series object back to a DataFrame then sort by the created_at # and build_name_device - dfp = dfp.reset_index().sort_values(by=["created_at", "build_name_device"]) + mean_boottimes2 = mean_boottimes2.reset_index().sort_values( + by=["created_at", "build_name_device"] + ) + + logger.debug(mean_boottimes2.info()) + logger.debug(mean_boottimes2) - # Filter by results from the specified build names - dfp = dfp[dfp["build_name"].str.endswith(args.build_name.split("-")[-1])] - logger.debug(dfp.info()) - logger.debug(dfp) + # Calculate how many boottimes we averaged over per device + count_per_device2 = ( + df_grouping2["boottime"].count().groupby("build_name_device").sum() + ) + count_per_device2 = count_per_device2.reset_index().rename( + columns={"boottime": col_name_boottime_count} + ) + + # Create a new column with the name and count, then stick together the + # counts and the averages + count_per_device2["build_name_device_count"] = ( + count_per_device2.build_name_device + + " (" + + count_per_device2[col_name_boottime_count].astype(str) + + ")" + ) + mean_boottimes2 = mean_boottimes2.merge( + count_per_device2, on="build_name_device", how="inner", suffixes=("_1", "_2") + ) # Create the figure for this visualisation figure_colletion.append( MetaFigure( px.line( - dfp, + mean_boottimes2, x="created_at", y="boottime", - color="build_name_device", + color="build_name_device_count", markers=True, - labels={"build_name_device": "Build name - device"}, + labels={ + "build_name_device_count": "Build name - device (number of boots in mean)" + }, + ) + .update_xaxes( + tickvals=mean_boottimes2["created_at"], + ticktext=mean_boottimes2["git_describe"], ) - .update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"]) .update_layout(xaxis_title="Version", yaxis_title="Boot time"), f"Line graph, {args.build_name.split('-')[-1]}", - f"This line graph, is generated from \"{args.build_name.split('-')[-1]}\".", + f"This line graph is generated from \"{args.build_name.split('-')[-1]}\"." + + " The graph uses the average (mean) over a number of boots for each build_name-device combination. The number of boots included in the average is presented in the 'Build name - device (number of boots in mean)' in the line graph legend.", ) ) From 73f964b933f1a0b52b18d3ddf4c2c81fd5b908d7 Mon Sep 17 00:00:00 2001 From: Katie Worton Date: Thu, 9 May 2024 08:41:48 +0100 Subject: [PATCH 2/3] squad-track-duration: Remove unneeded DataFrame setup Remove code that creates a DataFrame then reassigns it before it is ever used. Signed-off-by: Katie Worton --- squad-track-duration | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/squad-track-duration b/squad-track-duration index fc3b7dd..f94a7d8 100755 --- a/squad-track-duration +++ b/squad-track-duration @@ -284,17 +284,6 @@ def run(): if args.start_datetime > args.end_datetime: raise Exception("Start time must be earlier than end time.") - df = pd.DataFrame( - { - "build_name": [], - "git_describe": [], - "device": [], - "boottime": [], - "finished": [], - "created_at": [], - } - ) - build_cache = get_cache_from_artifactorial() data = [] data, build_cache = get_data(args, build_cache) From cfb98daedba25d250e398341fd003bcb29f82bad Mon Sep 17 00:00:00 2001 From: Katie Worton Date: Thu, 9 May 2024 08:49:00 +0100 Subject: [PATCH 3/3] squad-track-duration: Update sorting to fix legend order Update the sorting of the data so it is sorted by the legend lines then by created_at. This will ensure the data for each graph line is in the correct order while also putting the legend in alphabetical order. Signed-off-by: Katie Worton --- squad-track-duration | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/squad-track-duration b/squad-track-duration index f94a7d8..1ad1a30 100755 --- a/squad-track-duration +++ b/squad-track-duration @@ -313,8 +313,13 @@ def run(): mean_boottimes1 = df_grouping1["boottime"].mean() - # Convert the Series object back to a DataFrame then sort by the created_at - mean_boottimes1 = mean_boottimes1.reset_index().sort_values(by="created_at") + # Convert the Series object back to a DataFrame then sort values first by + # device, then by created_at. This will make the graph legend alphabetised + # while also ensuring the dates for each line are ordered by created_at so + # the graph's lines will be drawn correctly. + mean_boottimes1 = mean_boottimes1.reset_index().sort_values( + by=["device", "created_at"] + ) # Calculate how many boottimes we averaged over per device count_per_device1 = df_grouping1["boottime"].count().groupby("device").sum() @@ -369,10 +374,12 @@ def run(): mean_boottimes2 = df_grouping2["boottime"].mean() - # Convert the Series object back to a DataFrame then sort by the created_at - # and build_name_device + # Convert the Series object back to a DataFrame then sort values first by + # build_name_device, then by created_at. This will make the graph legend + # alphabetised while also ensuring the dates for each line are ordered by + # created_at so the graph's lines will be drawn correctly. mean_boottimes2 = mean_boottimes2.reset_index().sort_values( - by=["created_at", "build_name_device"] + by=["build_name_device", "created_at"] ) logger.debug(mean_boottimes2.info())