From bc68e0b3d0fc83b839238d7ea1480cd479e43f0b Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 2 May 2024 23:44:33 +0200 Subject: [PATCH] add squad-track-duration Today its hardcoded to view build_names gcc-13-lkftconfig or clang-17-lkftconfig, two line charts is presented, one for devices and the other with build-name+devices. Example: ./squad-track-duration --group lkft --project linux-next-master \ --start-datetime 2024-04-01 --end-datetime 2024-05-02 A file called builds.json functions as a database, it stores finished builds from SQUAD. Note: Incorporated Katie's suggestion regarding the datetime library. Signed-off-by: Anders Roxell --- squad-track-duration | 384 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 384 insertions(+) create mode 100755 squad-track-duration diff --git a/squad-track-duration b/squad-track-duration new file mode 100755 index 0000000..3e4013d --- /dev/null +++ b/squad-track-duration @@ -0,0 +1,384 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# vim: set ts=4 +# +# Copyright 2024-present Linaro Limited +# +# SPDX-License-Identifier: MIT + + +import argparse +import json +import logging +import os +import sys +from datetime import datetime, timedelta +from pathlib import Path + +import pandas as pd +import plotly.express as px +from squad_client.core.api import SquadApi +from squad_client.core.models import ALL, Squad + +squad_host_url = "https://qa-reports.linaro.org/" +SquadApi.configure(cache=3600, url=os.getenv("SQUAD_HOST", squad_host_url)) + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +ARTIFACTORIAL_FILENAME = "builds.json" + + +class MetaFigure: + def __init__(self, plotly_fig, title, description): + self.plotly_fig = plotly_fig + self.title = title + self.description = description + + def fig(self): + return self.fig + + def title(self): + return self.title + + def description(self): + return self.description + + +def parse_datetime_from_string(datetime_string): + accepted_datetime_formats = ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"] + + # Loop through each accepted datetime format and try parse it + for datetime_format in accepted_datetime_formats: + try: + # If the format parses successfully, return the datetime object + return datetime.strptime(datetime_string, datetime_format) + except ValueError: + pass + + # If no format can be parsed, raise an argument error + raise argparse.ArgumentTypeError( + f"Unsupported datetime format {datetime_string}. Accepted formats are {accepted_datetime_formats}" + ) + + +def parse_args(): + parser = argparse.ArgumentParser(description="Track duration") + + parser.add_argument( + "--group", + required=True, + help="squad group", + ) + + parser.add_argument( + "--project", + required=True, + help="squad project", + ) + + parser.add_argument( + "--start-datetime", + type=parse_datetime_from_string, + required=True, + help="Starting date time. Example: 2022-01-01 or 2022-01-01T00:00:00", + ) + + parser.add_argument( + "--end-datetime", + type=parse_datetime_from_string, + required=True, + help="Ending date time. Example: 2022-12-31 or 2022-12-31T00:00:00", + ) + + parser.add_argument( + "--build-name", + required=False, + default="gcc-13-lkftconfig", + help="Build name", + ) + + parser.add_argument( + "--debug", + action="store_true", + default=False, + help="Display debug messages", + ) + + return parser.parse_args() + + +def get_cache_from_artifactorial(): + exists = os.path.exists(ARTIFACTORIAL_FILENAME) + if not exists: + return {} + + with open(ARTIFACTORIAL_FILENAME, "r") as fp: + builds = json.load(fp) + return builds + + return {} + + +def save_build_cache_to_artifactorial(data, days_ago=None): + with open(ARTIFACTORIAL_FILENAME, "w") as fp: + json.dump(data, fp) + + +def get_data(args, build_cache): + start_datetime = args.start_datetime + end_datetime = args.end_datetime + + group = Squad().group(args.group) + project = group.project(args.project) + environments = project.environments(count=ALL).values() + + first_start_day = True + final_end_date = False + tmp_data = [] + + # Set up a delta which determines how many days of data to read from SQUAD + # per loop. Minimum delta is 1 day and delta must be in whole days to keep + # this code easy to read, understand and debug. + delta = timedelta(days=1) + + if delta.days < 1: + raise Exception("Minimum delta is 1 day for this code to work.") + if delta.seconds != 0 or delta.microseconds != 0: + raise Exception("Deltas must be whole days only.") + + # Loops through each delta until the end date and filters the SQUAD data + # for that delta + while not final_end_date: + + # If it is the first date in the range, use the provided start datetime + if first_start_day: + first_start_day = False + # Use the provided start time for the first day + tmp_start_datetime = start_datetime + else: + # For all other days, update the date by the delta then use the + # start of the day by zeroing hours, minutes and seconds + tmp_start_datetime += delta + tmp_start_datetime = tmp_start_datetime.replace(hour=0, minute=0, second=0) + + # If the delta for this iteration sends us over the end of the range, + # use the provided end datetime + if tmp_start_datetime + delta >= end_datetime: + # We have reached the last day, so use this as the end date + tmp_end_datetime = end_datetime + final_end_date = True + else: + # Otherwise take the start time (with minutes zeroed) + delta + tmp_end_datetime = ( + tmp_start_datetime.replace(hour=0, minute=0, second=0) + delta + ) + + logger.info( + f"Fetching builds from SQUAD, start_datetime: {tmp_start_datetime}, end_datetime: {tmp_end_datetime}" + ) + + filters = { + "created_at__lt": tmp_end_datetime.strftime("%Y-%m-%dT%H:%M:%S"), + "created_at__gt": tmp_start_datetime.strftime("%Y-%m-%dT%H:%M:%S"), + "count": ALL, + } + + builds = project.builds(**filters) + device_dict = {} + + # Loop through the environments and create a lookup table for URL -> device name (slug) + for env in environments: + device_dict[env.url] = env.slug + + # Loop through the builds in the specified window and cache their data + # to a file if they are marked as finished. This will mean that we don't + # have to look them up again is SQUAD if we have already looked them up. + for build_id, build in builds.items(): + if str(build_id) in build_cache.keys(): + logger.debug(f"cached: {build_id}") + tmp_data = tmp_data + build_cache[str(build_id)] + else: + logger.debug(f"no-cache: {build_id}") + tmp_build_cache = [] + testruns = build.testruns(count=ALL, prefetch_metadata=True) + for testrun_key, testrun in testruns.items(): + device = device_dict[testrun.environment] + metadata = testrun.metadata + + durations = metadata.durations + # Ignore testruns without duration data + if durations is None: + continue + + build_name = metadata.build_name + # Ignore testruns without a build_name + if build_name is None: + continue + + # Read the boot time from the duration data + boottime = durations["tests"]["boot"] + tmp = { + "build_id": build_id, + "build_name": build_name, + "git_describe": build.version.strip(), + "device": device, + "boottime": float(boottime), + "finished": build.finished, + "created_at": build.created_at, + } + tmp_data.append(tmp) + tmp_build_cache.append(tmp) + + # Cache data for builds that are marked finished + if build.finished and len(tmp_build_cache) > 0: + build_cache[str(build_id)] = tmp_build_cache + logger.debug(f"finished: {build_id}, {build.finished}") + + return tmp_data, build_cache + + +def combine_plotly_figs_to_html( + figs, + html_fname, + main_title, + main_description, + include_plotlyjs="cdn", + separator=None, + auto_open=False, +): + with open(html_fname, "w") as f: + f.write(f"

{main_title}

") + f.write(f"
{main_description}
") + index = 0 + f.write("

Page content

") + f.write("") + f.write(f'

{figs[0].title}

') + f.write(f"
{figs[0].description}
") + f.write(figs[0].plotly_fig.to_html(include_plotlyjs=include_plotlyjs)) + index = 0 + for fig in figs[1:]: + index = index + 1 + if separator: + f.write(separator) + f.write(f'

{fig.title}

') + f.write(f"
{fig.description}
") + f.write(fig.plotly_fig.to_html(full_html=False, include_plotlyjs=False)) + + if auto_open: + import webbrowser + + uri = Path(html_fname).absolute().as_uri() + webbrowser.open(uri) + + +def run(): + args = parse_args() + if args.debug: + logger.setLevel(level=logging.DEBUG) + + if args.start_datetime > args.end_datetime: + raise Exception("Start time must be earlier than end time.") + + df = pd.DataFrame( + { + "build_name": [], + "git_describe": [], + "device": [], + "boottime": [], + "finished": [], + "created_at": [], + } + ) + + build_cache = get_cache_from_artifactorial() + data = [] + data, build_cache = get_data(args, build_cache) + + save_build_cache_to_artifactorial(build_cache) + + # Turn the data (list of dicts) into a pandas dataframe + df = pd.DataFrame(data) + + logger.debug("***********************") + logger.debug(df) + logger.debug(df.info()) + logger.debug("***********************") + + # Generate a build_name_device column and add this as a column in the dataframe + df["build_name_device"] = df.build_name + "-" + df.device + figure_colletion = [] + + # Create a dataframe which groups by type then takes the mean of the boot + # time per type. + dft = df.groupby(["created_at", "git_describe", "device", "build_name"])[ + "boottime" + ].mean() + + # Convert the Series object back to a DataFrame then sort by the created_at + dft = dft.reset_index().sort_values(by="created_at") + + # Filter these results by the desired build name(s) + dft = dft[dft["build_name"].isin([args.build_name])] + + # Create the figure to display this data + figure_colletion.append( + MetaFigure( + px.line(dft, x="created_at", y="boottime", color="device", markers=True) + .update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"]) + .update_layout(xaxis_title="Version", yaxis_title="Boot time"), + f"Line graph, {args.build_name}", + f"This line graph, is generated from build_name {args.build_name}.", + ) + ) + + # Group and the mean of the boot time for the desired type - this time it is + # grouped by build_name_device, too, since we want to look at both the build + # and what device this was run on. + dfp = df.groupby( + ["created_at", "git_describe", "device", "build_name_device", "build_name"] + )["boottime"].mean() + + # Convert the Series object back to a DataFrame then sort by the created_at + # and build_name_device + dfp = dfp.reset_index().sort_values(by=["created_at", "build_name_device"]) + + # Filter by results from the specified build names + dfp = dfp[dfp['build_name'].str.endswith(args.build_name.split('-')[-1])] + logger.debug(dfp.info()) + logger.debug(dfp) + + # Create the figure for this visualisation + figure_colletion.append( + MetaFigure( + px.line( + dfp, + x="created_at", + y="boottime", + color="build_name_device", + markers=True, + labels={"build_name_device": "Build name - device"}, + ) + .update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"]) + .update_layout(xaxis_title="Version", yaxis_title="Boot time"), + f"Line graph, {args.build_name.split('-')[-1]}", + f"This line graph, is generated from \"{args.build_name.split('-')[-1]}\".", + ) + ) + + combine_plotly_figs_to_html( + figure_colletion, + "index.html", + "This page show some interesting data around LKFT's builds", + f"These graphs is based on LKFT's {args.project} branch", + ) + + exit(0) + + +if __name__ == "__main__": + sys.exit(run())