Skip to content
This repository has been archived by the owner on Jul 30, 2024. It is now read-only.

add squad-track-duration #43

Merged
merged 2 commits into from
May 8, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
384 changes: 384 additions & 0 deletions squad-track-duration
Original file line number Diff line number Diff line change
@@ -0,0 +1,384 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vim: set ts=4
#
# Copyright 2024-present Linaro Limited
#
# SPDX-License-Identifier: MIT


import argparse
import json
import logging
import os
import sys
from datetime import datetime, timedelta
from pathlib import Path

import pandas as pd
import plotly.express as px
from squad_client.core.api import SquadApi
from squad_client.core.models import ALL, Squad

squad_host_url = "https://qa-reports.linaro.org/"
SquadApi.configure(cache=3600, url=os.getenv("SQUAD_HOST", squad_host_url))

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

ARTIFACTORIAL_FILENAME = "builds.json"


class MetaFigure:
def __init__(self, plotly_fig, title, description):
self.plotly_fig = plotly_fig
self.title = title
self.description = description

def fig(self):
return self.fig

def title(self):
return self.title

def description(self):
return self.description


def parse_datetime_from_string(datetime_string):
accepted_datetime_formats = ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]

# Loop through each accepted datetime format and try parse it
for datetime_format in accepted_datetime_formats:
try:
# If the format parses successfully, return the datetime object
return datetime.strptime(datetime_string, datetime_format)
except ValueError:
pass

# If no format can be parsed, raise an argument error
raise argparse.ArgumentTypeError(
f"Unsupported datetime format {datetime_string}. Accepted formats are {accepted_datetime_formats}"
)


def parse_args():
parser = argparse.ArgumentParser(description="Track duration")

parser.add_argument(
"--group",
required=True,
help="squad group",
)

parser.add_argument(
"--project",
required=True,
help="squad project",
)

parser.add_argument(
"--start-datetime",
type=parse_datetime_from_string,
required=True,
help="Starting date time. Example: 2022-01-01 or 2022-01-01T00:00:00",
)

parser.add_argument(
"--end-datetime",
type=parse_datetime_from_string,
required=True,
help="Ending date time. Example: 2022-12-31 or 2022-12-31T00:00:00",
)

parser.add_argument(
"--build-name",
required=False,
default="gcc-13-lkftconfig",
help="Build name",
)

parser.add_argument(
"--debug",
action="store_true",
default=False,
help="Display debug messages",
)

return parser.parse_args()


def get_cache_from_artifactorial():
exists = os.path.exists(ARTIFACTORIAL_FILENAME)
if not exists:
return {}

with open(ARTIFACTORIAL_FILENAME, "r") as fp:
builds = json.load(fp)
return builds

return {}


def save_build_cache_to_artifactorial(data, days_ago=None):
with open(ARTIFACTORIAL_FILENAME, "w") as fp:
json.dump(data, fp)


def get_data(args, build_cache):
start_datetime = args.start_datetime
end_datetime = args.end_datetime

group = Squad().group(args.group)
project = group.project(args.project)
environments = project.environments(count=ALL).values()

first_start_day = True
final_end_date = False
tmp_data = []

# Set up a delta which determines how many days of data to read from SQUAD
# per loop. Minimum delta is 1 day and delta must be in whole days to keep
# this code easy to read, understand and debug.
delta = timedelta(days=1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we go through the data a day at a time? I feel like this should be configurable - when I ran locally I increased this to 30 days at a time :)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did set it to 1 day so we can get some output. =)
could be nice to have in the pipeline.


if delta.days < 1:
raise Exception("Minimum delta is 1 day for this code to work.")
if delta.seconds != 0 or delta.microseconds != 0:
raise Exception("Deltas must be whole days only.")

# Loops through each delta until the end date and filters the SQUAD data
# for that delta
while not final_end_date:

# If it is the first date in the range, use the provided start datetime
if first_start_day:
first_start_day = False
# Use the provided start time for the first day
tmp_start_datetime = start_datetime
else:
# For all other days, update the date by the delta then use the
# start of the day by zeroing hours, minutes and seconds
tmp_start_datetime += delta
tmp_start_datetime = tmp_start_datetime.replace(hour=0, minute=0, second=0)

# If the delta for this iteration sends us over the end of the range,
# use the provided end datetime
if tmp_start_datetime + delta >= end_datetime:
# We have reached the last day, so use this as the end date
tmp_end_datetime = end_datetime
final_end_date = True
else:
# Otherwise take the start time (with minutes zeroed) + delta
tmp_end_datetime = (
tmp_start_datetime.replace(hour=0, minute=0, second=0) + delta
)

logger.info(
f"Fetching builds from SQUAD, start_datetime: {tmp_start_datetime}, end_datetime: {tmp_end_datetime}"
)

filters = {
"created_at__lt": tmp_end_datetime.strftime("%Y-%m-%dT%H:%M:%S"),
"created_at__gt": tmp_start_datetime.strftime("%Y-%m-%dT%H:%M:%S"),
"count": ALL,
}

builds = project.builds(**filters)
device_dict = {}

# Loop through the environments and create a lookup table for URL -> device name (slug)
for env in environments:
device_dict[env.url] = env.slug

# Loop through the builds in the specified window and cache their data
# to a file if they are marked as finished. This will mean that we don't
# have to look them up again is SQUAD if we have already looked them up.
for build_id, build in builds.items():
if str(build_id) in build_cache.keys():
logger.debug(f"cached: {build_id}")
tmp_data = tmp_data + build_cache[str(build_id)]
else:
logger.debug(f"no-cache: {build_id}")
tmp_build_cache = []
testruns = build.testruns(count=ALL, prefetch_metadata=True)
for testrun_key, testrun in testruns.items():
device = device_dict[testrun.environment]
metadata = testrun.metadata

durations = metadata.durations
# Ignore testruns without duration data
if durations is None:
continue

build_name = metadata.build_name
# Ignore testruns without a build_name
if build_name is None:
continue

# Read the boot time from the duration data
boottime = durations["tests"]["boot"]
tmp = {
"build_id": build_id,
"build_name": build_name,
"git_describe": build.version.strip(),
"device": device,
"boottime": float(boottime),
"finished": build.finished,
"created_at": build.created_at,
}
tmp_data.append(tmp)
tmp_build_cache.append(tmp)

# Cache data for builds that are marked finished
if build.finished and len(tmp_build_cache) > 0:
build_cache[str(build_id)] = tmp_build_cache
logger.debug(f"finished: {build_id}, {build.finished}")

return tmp_data, build_cache


def combine_plotly_figs_to_html(
figs,
html_fname,
main_title,
main_description,
include_plotlyjs="cdn",
separator=None,
auto_open=False,
):
with open(html_fname, "w") as f:
f.write(f"<h1>{main_title}</h1>")
f.write(f"<div>{main_description}</div>")
index = 0
f.write("<h2>Page content</h2>")
f.write("<ul>")
for fig in figs[1:]:
index = index + 1
f.write(f'<li><a href="#fig{index}">{fig.title}</a></li>')
f.write("</ul>")
f.write(f'<h2><a id="fig0">{figs[0].title}</a></h2>')
f.write(f"<div>{figs[0].description}</div>")
f.write(figs[0].plotly_fig.to_html(include_plotlyjs=include_plotlyjs))
index = 0
for fig in figs[1:]:
index = index + 1
if separator:
f.write(separator)
f.write(f'<h2><a id="fig{index}">{fig.title}</a></h2>')
f.write(f"<div>{fig.description}</div>")
f.write(fig.plotly_fig.to_html(full_html=False, include_plotlyjs=False))

if auto_open:
import webbrowser

uri = Path(html_fname).absolute().as_uri()
webbrowser.open(uri)


def run():
args = parse_args()
if args.debug:
logger.setLevel(level=logging.DEBUG)

if args.start_datetime > args.end_datetime:
raise Exception("Start time must be earlier than end time.")

df = pd.DataFrame(
{
"build_name": [],
"git_describe": [],
"device": [],
"boottime": [],
"finished": [],
"created_at": [],
}
)

build_cache = get_cache_from_artifactorial()
data = []
data, build_cache = get_data(args, build_cache)

save_build_cache_to_artifactorial(build_cache)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Big fan of this data caching - I was actually interested in doing something similar in another project. In that case, there is too much data for json files to be viable, so I was wondering if a proper database could be used to better handle the scale of the data (DuckDB seemed easiest from my investigation). Seems overkill in this case unless you start seeing issues with the jsons, of course :)


# Turn the data (list of dicts) into a pandas DataFrame
df = pd.DataFrame(data)

logger.debug("***********************")
logger.debug(df)
logger.debug(df.info())
logger.debug("***********************")

# Generate a build_name_device column and add this as a column in the DataFrame
df["build_name_device"] = df.build_name + "-" + df.device
figure_colletion = []

# Create a DataFrame which groups by type then takes the mean of the boot
# time per type.
dft = df.groupby(["created_at", "git_describe", "device", "build_name"])[
"boottime"
].mean()

# Convert the Series object back to a DataFrame then sort by the created_at
dft = dft.reset_index().sort_values(by="created_at")

# Filter these results by the desired build name(s)
dft = dft[dft["build_name"].isin([args.build_name])]

# Create the figure to display this data
figure_colletion.append(
MetaFigure(
px.line(dft, x="created_at", y="boottime", color="device", markers=True)
.update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"])
.update_layout(xaxis_title="Version", yaxis_title="Boot time"),
f"Line graph, {args.build_name}",
f"This line graph, is generated from build_name {args.build_name}.",
)
)

# Group and the mean of the boot time for the desired type - this time it is
# grouped by build_name_device, too, since we want to look at both the build
# and what device this was run on.
dfp = df.groupby(
["created_at", "git_describe", "device", "build_name_device", "build_name"]
)["boottime"].mean()

# Convert the Series object back to a DataFrame then sort by the created_at
# and build_name_device
dfp = dfp.reset_index().sort_values(by=["created_at", "build_name_device"])

# Filter by results from the specified build names
dfp = dfp[dfp['build_name'].str.endswith(args.build_name.split('-')[-1])]
logger.debug(dfp.info())
logger.debug(dfp)

# Create the figure for this visualisation
figure_colletion.append(
MetaFigure(
px.line(
dfp,
x="created_at",
y="boottime",
color="build_name_device",
markers=True,
labels={"build_name_device": "Build name - device"},
)
.update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"])
.update_layout(xaxis_title="Version", yaxis_title="Boot time"),
f"Line graph, {args.build_name.split('-')[-1]}",
f"This line graph, is generated from \"{args.build_name.split('-')[-1]}\".",
)
)

combine_plotly_figs_to_html(
figure_colletion,
"index.html",
"This page show some interesting data around LKFT's builds",
f"These graphs is based on LKFT's {args.project} branch",
)

exit(0)


if __name__ == "__main__":
sys.exit(run())
Loading