From bc68e0b3d0fc83b839238d7ea1480cd479e43f0b Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Thu, 2 May 2024 23:44:33 +0200
Subject: [PATCH] add squad-track-duration

Today its hardcoded to view build_names gcc-13-lkftconfig or
clang-17-lkftconfig, two line charts is presented, one for devices and
the other with build-name+devices.

Example:
./squad-track-duration --group lkft --project linux-next-master \
--start-datetime 2024-04-01 --end-datetime 2024-05-02

A file called builds.json functions as a database, it stores finished
builds from SQUAD.

Note: Incorporated Katie's suggestion regarding the datetime library.

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
---
 squad-track-duration | 384 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 384 insertions(+)
 create mode 100755 squad-track-duration

diff --git a/squad-track-duration b/squad-track-duration
new file mode 100755
index 0000000..3e4013d
--- /dev/null
+++ b/squad-track-duration
@@ -0,0 +1,384 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# vim: set ts=4
+#
+# Copyright 2024-present Linaro Limited
+#
+# SPDX-License-Identifier: MIT
+
+
+import argparse
+import json
+import logging
+import os
+import sys
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import pandas as pd
+import plotly.express as px
+from squad_client.core.api import SquadApi
+from squad_client.core.models import ALL, Squad
+
+squad_host_url = "https://qa-reports.linaro.org/"
+SquadApi.configure(cache=3600, url=os.getenv("SQUAD_HOST", squad_host_url))
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+ARTIFACTORIAL_FILENAME = "builds.json"
+
+
+class MetaFigure:
+    def __init__(self, plotly_fig, title, description):
+        self.plotly_fig = plotly_fig
+        self.title = title
+        self.description = description
+
+    def fig(self):
+        return self.fig
+
+    def title(self):
+        return self.title
+
+    def description(self):
+        return self.description
+
+
+def parse_datetime_from_string(datetime_string):
+    accepted_datetime_formats = ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]
+
+    # Loop through each accepted datetime format and try parse it
+    for datetime_format in accepted_datetime_formats:
+        try:
+            # If the format parses successfully, return the datetime object
+            return datetime.strptime(datetime_string, datetime_format)
+        except ValueError:
+            pass
+
+    # If no format can be parsed, raise an argument error
+    raise argparse.ArgumentTypeError(
+        f"Unsupported datetime format {datetime_string}. Accepted formats are {accepted_datetime_formats}"
+    )
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Track duration")
+
+    parser.add_argument(
+        "--group",
+        required=True,
+        help="squad group",
+    )
+
+    parser.add_argument(
+        "--project",
+        required=True,
+        help="squad project",
+    )
+
+    parser.add_argument(
+        "--start-datetime",
+        type=parse_datetime_from_string,
+        required=True,
+        help="Starting date time. Example: 2022-01-01 or 2022-01-01T00:00:00",
+    )
+
+    parser.add_argument(
+        "--end-datetime",
+        type=parse_datetime_from_string,
+        required=True,
+        help="Ending date time. Example: 2022-12-31 or 2022-12-31T00:00:00",
+    )
+
+    parser.add_argument(
+        "--build-name",
+        required=False,
+        default="gcc-13-lkftconfig",
+        help="Build name",
+    )
+
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        default=False,
+        help="Display debug messages",
+    )
+
+    return parser.parse_args()
+
+
+def get_cache_from_artifactorial():
+    exists = os.path.exists(ARTIFACTORIAL_FILENAME)
+    if not exists:
+        return {}
+
+    with open(ARTIFACTORIAL_FILENAME, "r") as fp:
+        builds = json.load(fp)
+        return builds
+
+    return {}
+
+
+def save_build_cache_to_artifactorial(data, days_ago=None):
+    with open(ARTIFACTORIAL_FILENAME, "w") as fp:
+        json.dump(data, fp)
+
+
+def get_data(args, build_cache):
+    start_datetime = args.start_datetime
+    end_datetime = args.end_datetime
+
+    group = Squad().group(args.group)
+    project = group.project(args.project)
+    environments = project.environments(count=ALL).values()
+
+    first_start_day = True
+    final_end_date = False
+    tmp_data = []
+
+    # Set up a delta which determines how many days of data to read from SQUAD
+    # per loop. Minimum delta is 1 day and delta must be in whole days to keep
+    # this code easy to read, understand and debug.
+    delta = timedelta(days=1)
+
+    if delta.days < 1:
+        raise Exception("Minimum delta is 1 day for this code to work.")
+    if delta.seconds != 0 or delta.microseconds != 0:
+        raise Exception("Deltas must be whole days only.")
+
+    # Loops through each delta until the end date and filters the SQUAD data
+    # for that delta
+    while not final_end_date:
+
+        # If it is the first date in the range, use the provided start datetime
+        if first_start_day:
+            first_start_day = False
+            # Use the provided start time for the first day
+            tmp_start_datetime = start_datetime
+        else:
+            # For all other days, update the date by the delta then use the
+            # start of the day by zeroing hours, minutes and seconds
+            tmp_start_datetime += delta
+            tmp_start_datetime = tmp_start_datetime.replace(hour=0, minute=0, second=0)
+
+        # If the delta for this iteration sends us over the end of the range,
+        # use the provided end datetime
+        if tmp_start_datetime + delta >= end_datetime:
+            # We have reached the last day, so use this as the end date
+            tmp_end_datetime = end_datetime
+            final_end_date = True
+        else:
+            # Otherwise take the start time (with minutes zeroed) + delta
+            tmp_end_datetime = (
+                tmp_start_datetime.replace(hour=0, minute=0, second=0) + delta
+            )
+
+        logger.info(
+            f"Fetching builds from SQUAD, start_datetime: {tmp_start_datetime}, end_datetime: {tmp_end_datetime}"
+        )
+
+        filters = {
+            "created_at__lt": tmp_end_datetime.strftime("%Y-%m-%dT%H:%M:%S"),
+            "created_at__gt": tmp_start_datetime.strftime("%Y-%m-%dT%H:%M:%S"),
+            "count": ALL,
+        }
+
+        builds = project.builds(**filters)
+        device_dict = {}
+
+        # Loop through the environments and create a lookup table for URL -> device name (slug)
+        for env in environments:
+            device_dict[env.url] = env.slug
+
+        # Loop through the builds in the specified window and cache their data
+        # to a file if they are marked as finished. This will mean that we don't
+        # have to look them up again is SQUAD if we have already looked them up.
+        for build_id, build in builds.items():
+            if str(build_id) in build_cache.keys():
+                logger.debug(f"cached: {build_id}")
+                tmp_data = tmp_data + build_cache[str(build_id)]
+            else:
+                logger.debug(f"no-cache: {build_id}")
+                tmp_build_cache = []
+                testruns = build.testruns(count=ALL, prefetch_metadata=True)
+                for testrun_key, testrun in testruns.items():
+                    device = device_dict[testrun.environment]
+                    metadata = testrun.metadata
+
+                    durations = metadata.durations
+                    # Ignore testruns without duration data
+                    if durations is None:
+                        continue
+
+                    build_name = metadata.build_name
+                    # Ignore testruns without a build_name
+                    if build_name is None:
+                        continue
+
+                    # Read the boot time from the duration data
+                    boottime = durations["tests"]["boot"]
+                    tmp = {
+                        "build_id": build_id,
+                        "build_name": build_name,
+                        "git_describe": build.version.strip(),
+                        "device": device,
+                        "boottime": float(boottime),
+                        "finished": build.finished,
+                        "created_at": build.created_at,
+                    }
+                    tmp_data.append(tmp)
+                    tmp_build_cache.append(tmp)
+
+                # Cache data for builds that are marked finished
+                if build.finished and len(tmp_build_cache) > 0:
+                    build_cache[str(build_id)] = tmp_build_cache
+                    logger.debug(f"finished: {build_id}, {build.finished}")
+
+    return tmp_data, build_cache
+
+
+def combine_plotly_figs_to_html(
+    figs,
+    html_fname,
+    main_title,
+    main_description,
+    include_plotlyjs="cdn",
+    separator=None,
+    auto_open=False,
+):
+    with open(html_fname, "w") as f:
+        f.write(f"<h1>{main_title}</h1>")
+        f.write(f"<div>{main_description}</div>")
+        index = 0
+        f.write("<h2>Page content</h2>")
+        f.write("<ul>")
+        for fig in figs[1:]:
+            index = index + 1
+            f.write(f'<li><a href="#fig{index}">{fig.title}</a></li>')
+        f.write("</ul>")
+        f.write(f'<h2><a id="fig0">{figs[0].title}</a></h2>')
+        f.write(f"<div>{figs[0].description}</div>")
+        f.write(figs[0].plotly_fig.to_html(include_plotlyjs=include_plotlyjs))
+        index = 0
+        for fig in figs[1:]:
+            index = index + 1
+            if separator:
+                f.write(separator)
+            f.write(f'<h2><a id="fig{index}">{fig.title}</a></h2>')
+            f.write(f"<div>{fig.description}</div>")
+            f.write(fig.plotly_fig.to_html(full_html=False, include_plotlyjs=False))
+
+    if auto_open:
+        import webbrowser
+
+        uri = Path(html_fname).absolute().as_uri()
+        webbrowser.open(uri)
+
+
+def run():
+    args = parse_args()
+    if args.debug:
+        logger.setLevel(level=logging.DEBUG)
+
+    if args.start_datetime > args.end_datetime:
+        raise Exception("Start time must be earlier than end time.")
+
+    df = pd.DataFrame(
+        {
+            "build_name": [],
+            "git_describe": [],
+            "device": [],
+            "boottime": [],
+            "finished": [],
+            "created_at": [],
+        }
+    )
+
+    build_cache = get_cache_from_artifactorial()
+    data = []
+    data, build_cache = get_data(args, build_cache)
+
+    save_build_cache_to_artifactorial(build_cache)
+
+    # Turn the data (list of dicts) into a pandas dataframe
+    df = pd.DataFrame(data)
+
+    logger.debug("***********************")
+    logger.debug(df)
+    logger.debug(df.info())
+    logger.debug("***********************")
+
+    # Generate a build_name_device column and add this as a column in the dataframe
+    df["build_name_device"] = df.build_name + "-" + df.device
+    figure_colletion = []
+
+    # Create a dataframe which groups by type then takes the mean of the boot
+    # time per type.
+    dft = df.groupby(["created_at", "git_describe", "device", "build_name"])[
+        "boottime"
+    ].mean()
+
+    # Convert the Series object back to a DataFrame then sort by the created_at
+    dft = dft.reset_index().sort_values(by="created_at")
+
+    # Filter these results by the desired build name(s)
+    dft = dft[dft["build_name"].isin([args.build_name])]
+
+    # Create the figure to display this data
+    figure_colletion.append(
+        MetaFigure(
+            px.line(dft, x="created_at", y="boottime", color="device", markers=True)
+            .update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"])
+            .update_layout(xaxis_title="Version", yaxis_title="Boot time"),
+            f"Line graph, {args.build_name}",
+            f"This line graph, is generated from build_name {args.build_name}.",
+        )
+    )
+
+    # Group and the mean of the boot time for the desired type - this time it is
+    # grouped by build_name_device, too, since we want to look at both the build
+    # and what device this was run on.
+    dfp = df.groupby(
+        ["created_at", "git_describe", "device", "build_name_device", "build_name"]
+    )["boottime"].mean()
+
+    # Convert the Series object back to a DataFrame then sort by the created_at
+    # and build_name_device
+    dfp = dfp.reset_index().sort_values(by=["created_at", "build_name_device"])
+
+    # Filter by results from the specified build names
+    dfp = dfp[dfp['build_name'].str.endswith(args.build_name.split('-')[-1])]
+    logger.debug(dfp.info())
+    logger.debug(dfp)
+
+    # Create the figure for this visualisation
+    figure_colletion.append(
+        MetaFigure(
+            px.line(
+                dfp,
+                x="created_at",
+                y="boottime",
+                color="build_name_device",
+                markers=True,
+                labels={"build_name_device": "Build name - device"},
+            )
+            .update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"])
+            .update_layout(xaxis_title="Version", yaxis_title="Boot time"),
+            f"Line graph, {args.build_name.split('-')[-1]}",
+            f"This line graph, is generated from \"{args.build_name.split('-')[-1]}\".",
+        )
+    )
+
+    combine_plotly_figs_to_html(
+        figure_colletion,
+        "index.html",
+        "This page show some interesting data around LKFT's builds",
+        f"These graphs is based on LKFT's {args.project} branch",
+    )
+
+    exit(0)
+
+
+if __name__ == "__main__":
+    sys.exit(run())