This repository has been archived by the owner on Jul 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
add squad-track-duration #43
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,384 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
# vim: set ts=4 | ||
# | ||
# Copyright 2024-present Linaro Limited | ||
# | ||
# SPDX-License-Identifier: MIT | ||
|
||
|
||
import argparse | ||
import json | ||
import logging | ||
import os | ||
import sys | ||
from datetime import datetime, timedelta | ||
from pathlib import Path | ||
|
||
import pandas as pd | ||
import plotly.express as px | ||
from squad_client.core.api import SquadApi | ||
from squad_client.core.models import ALL, Squad | ||
|
||
squad_host_url = "https://qa-reports.linaro.org/" | ||
SquadApi.configure(cache=3600, url=os.getenv("SQUAD_HOST", squad_host_url)) | ||
|
||
logging.basicConfig(level=logging.INFO) | ||
logger = logging.getLogger(__name__) | ||
|
||
ARTIFACTORIAL_FILENAME = "builds.json" | ||
|
||
|
||
class MetaFigure: | ||
def __init__(self, plotly_fig, title, description): | ||
self.plotly_fig = plotly_fig | ||
self.title = title | ||
self.description = description | ||
|
||
def fig(self): | ||
return self.fig | ||
|
||
def title(self): | ||
return self.title | ||
|
||
def description(self): | ||
return self.description | ||
|
||
|
||
def parse_datetime_from_string(datetime_string): | ||
accepted_datetime_formats = ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"] | ||
|
||
# Loop through each accepted datetime format and try parse it | ||
for datetime_format in accepted_datetime_formats: | ||
try: | ||
# If the format parses successfully, return the datetime object | ||
return datetime.strptime(datetime_string, datetime_format) | ||
except ValueError: | ||
pass | ||
|
||
# If no format can be parsed, raise an argument error | ||
raise argparse.ArgumentTypeError( | ||
f"Unsupported datetime format {datetime_string}. Accepted formats are {accepted_datetime_formats}" | ||
) | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description="Track duration") | ||
|
||
parser.add_argument( | ||
"--group", | ||
required=True, | ||
help="squad group", | ||
) | ||
|
||
parser.add_argument( | ||
"--project", | ||
required=True, | ||
help="squad project", | ||
) | ||
|
||
parser.add_argument( | ||
"--start-datetime", | ||
type=parse_datetime_from_string, | ||
required=True, | ||
help="Starting date time. Example: 2022-01-01 or 2022-01-01T00:00:00", | ||
) | ||
|
||
parser.add_argument( | ||
"--end-datetime", | ||
type=parse_datetime_from_string, | ||
required=True, | ||
help="Ending date time. Example: 2022-12-31 or 2022-12-31T00:00:00", | ||
) | ||
|
||
parser.add_argument( | ||
"--build-name", | ||
required=False, | ||
default="gcc-13-lkftconfig", | ||
help="Build name", | ||
) | ||
|
||
parser.add_argument( | ||
"--debug", | ||
action="store_true", | ||
default=False, | ||
help="Display debug messages", | ||
) | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def get_cache_from_artifactorial(): | ||
exists = os.path.exists(ARTIFACTORIAL_FILENAME) | ||
if not exists: | ||
return {} | ||
|
||
with open(ARTIFACTORIAL_FILENAME, "r") as fp: | ||
builds = json.load(fp) | ||
return builds | ||
|
||
return {} | ||
|
||
|
||
def save_build_cache_to_artifactorial(data, days_ago=None): | ||
with open(ARTIFACTORIAL_FILENAME, "w") as fp: | ||
json.dump(data, fp) | ||
|
||
|
||
def get_data(args, build_cache): | ||
start_datetime = args.start_datetime | ||
end_datetime = args.end_datetime | ||
|
||
group = Squad().group(args.group) | ||
project = group.project(args.project) | ||
environments = project.environments(count=ALL).values() | ||
|
||
first_start_day = True | ||
final_end_date = False | ||
tmp_data = [] | ||
|
||
# Set up a delta which determines how many days of data to read from SQUAD | ||
# per loop. Minimum delta is 1 day and delta must be in whole days to keep | ||
# this code easy to read, understand and debug. | ||
delta = timedelta(days=1) | ||
|
||
if delta.days < 1: | ||
raise Exception("Minimum delta is 1 day for this code to work.") | ||
if delta.seconds != 0 or delta.microseconds != 0: | ||
raise Exception("Deltas must be whole days only.") | ||
|
||
# Loops through each delta until the end date and filters the SQUAD data | ||
# for that delta | ||
while not final_end_date: | ||
|
||
# If it is the first date in the range, use the provided start datetime | ||
if first_start_day: | ||
first_start_day = False | ||
# Use the provided start time for the first day | ||
tmp_start_datetime = start_datetime | ||
else: | ||
# For all other days, update the date by the delta then use the | ||
# start of the day by zeroing hours, minutes and seconds | ||
tmp_start_datetime += delta | ||
tmp_start_datetime = tmp_start_datetime.replace(hour=0, minute=0, second=0) | ||
|
||
# If the delta for this iteration sends us over the end of the range, | ||
# use the provided end datetime | ||
if tmp_start_datetime + delta >= end_datetime: | ||
# We have reached the last day, so use this as the end date | ||
tmp_end_datetime = end_datetime | ||
final_end_date = True | ||
else: | ||
# Otherwise take the start time (with minutes zeroed) + delta | ||
tmp_end_datetime = ( | ||
tmp_start_datetime.replace(hour=0, minute=0, second=0) + delta | ||
) | ||
|
||
logger.info( | ||
f"Fetching builds from SQUAD, start_datetime: {tmp_start_datetime}, end_datetime: {tmp_end_datetime}" | ||
) | ||
|
||
filters = { | ||
"created_at__lt": tmp_end_datetime.strftime("%Y-%m-%dT%H:%M:%S"), | ||
"created_at__gt": tmp_start_datetime.strftime("%Y-%m-%dT%H:%M:%S"), | ||
"count": ALL, | ||
} | ||
|
||
builds = project.builds(**filters) | ||
device_dict = {} | ||
|
||
# Loop through the environments and create a lookup table for URL -> device name (slug) | ||
for env in environments: | ||
device_dict[env.url] = env.slug | ||
|
||
# Loop through the builds in the specified window and cache their data | ||
# to a file if they are marked as finished. This will mean that we don't | ||
# have to look them up again is SQUAD if we have already looked them up. | ||
for build_id, build in builds.items(): | ||
if str(build_id) in build_cache.keys(): | ||
logger.debug(f"cached: {build_id}") | ||
tmp_data = tmp_data + build_cache[str(build_id)] | ||
else: | ||
logger.debug(f"no-cache: {build_id}") | ||
tmp_build_cache = [] | ||
testruns = build.testruns(count=ALL, prefetch_metadata=True) | ||
for testrun_key, testrun in testruns.items(): | ||
device = device_dict[testrun.environment] | ||
metadata = testrun.metadata | ||
|
||
durations = metadata.durations | ||
# Ignore testruns without duration data | ||
if durations is None: | ||
continue | ||
|
||
build_name = metadata.build_name | ||
# Ignore testruns without a build_name | ||
if build_name is None: | ||
continue | ||
|
||
# Read the boot time from the duration data | ||
boottime = durations["tests"]["boot"] | ||
tmp = { | ||
"build_id": build_id, | ||
"build_name": build_name, | ||
"git_describe": build.version.strip(), | ||
"device": device, | ||
"boottime": float(boottime), | ||
"finished": build.finished, | ||
"created_at": build.created_at, | ||
} | ||
tmp_data.append(tmp) | ||
tmp_build_cache.append(tmp) | ||
|
||
# Cache data for builds that are marked finished | ||
if build.finished and len(tmp_build_cache) > 0: | ||
build_cache[str(build_id)] = tmp_build_cache | ||
logger.debug(f"finished: {build_id}, {build.finished}") | ||
|
||
return tmp_data, build_cache | ||
|
||
|
||
def combine_plotly_figs_to_html( | ||
figs, | ||
html_fname, | ||
main_title, | ||
main_description, | ||
include_plotlyjs="cdn", | ||
separator=None, | ||
auto_open=False, | ||
): | ||
with open(html_fname, "w") as f: | ||
f.write(f"<h1>{main_title}</h1>") | ||
f.write(f"<div>{main_description}</div>") | ||
index = 0 | ||
f.write("<h2>Page content</h2>") | ||
f.write("<ul>") | ||
for fig in figs[1:]: | ||
index = index + 1 | ||
f.write(f'<li><a href="#fig{index}">{fig.title}</a></li>') | ||
f.write("</ul>") | ||
f.write(f'<h2><a id="fig0">{figs[0].title}</a></h2>') | ||
f.write(f"<div>{figs[0].description}</div>") | ||
f.write(figs[0].plotly_fig.to_html(include_plotlyjs=include_plotlyjs)) | ||
index = 0 | ||
for fig in figs[1:]: | ||
index = index + 1 | ||
if separator: | ||
f.write(separator) | ||
f.write(f'<h2><a id="fig{index}">{fig.title}</a></h2>') | ||
f.write(f"<div>{fig.description}</div>") | ||
f.write(fig.plotly_fig.to_html(full_html=False, include_plotlyjs=False)) | ||
|
||
if auto_open: | ||
import webbrowser | ||
|
||
uri = Path(html_fname).absolute().as_uri() | ||
webbrowser.open(uri) | ||
|
||
|
||
def run(): | ||
args = parse_args() | ||
if args.debug: | ||
logger.setLevel(level=logging.DEBUG) | ||
|
||
if args.start_datetime > args.end_datetime: | ||
raise Exception("Start time must be earlier than end time.") | ||
|
||
df = pd.DataFrame( | ||
{ | ||
"build_name": [], | ||
"git_describe": [], | ||
"device": [], | ||
"boottime": [], | ||
"finished": [], | ||
"created_at": [], | ||
} | ||
) | ||
|
||
build_cache = get_cache_from_artifactorial() | ||
data = [] | ||
data, build_cache = get_data(args, build_cache) | ||
|
||
save_build_cache_to_artifactorial(build_cache) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Big fan of this data caching - I was actually interested in doing something similar in another project. In that case, there is too much data for json files to be viable, so I was wondering if a proper database could be used to better handle the scale of the data (DuckDB seemed easiest from my investigation). Seems overkill in this case unless you start seeing issues with the jsons, of course :) |
||
|
||
# Turn the data (list of dicts) into a pandas DataFrame | ||
df = pd.DataFrame(data) | ||
|
||
logger.debug("***********************") | ||
logger.debug(df) | ||
logger.debug(df.info()) | ||
logger.debug("***********************") | ||
|
||
# Generate a build_name_device column and add this as a column in the DataFrame | ||
df["build_name_device"] = df.build_name + "-" + df.device | ||
figure_colletion = [] | ||
|
||
# Create a DataFrame which groups by type then takes the mean of the boot | ||
# time per type. | ||
dft = df.groupby(["created_at", "git_describe", "device", "build_name"])[ | ||
"boottime" | ||
].mean() | ||
|
||
# Convert the Series object back to a DataFrame then sort by the created_at | ||
dft = dft.reset_index().sort_values(by="created_at") | ||
|
||
# Filter these results by the desired build name(s) | ||
dft = dft[dft["build_name"].isin([args.build_name])] | ||
|
||
# Create the figure to display this data | ||
figure_colletion.append( | ||
MetaFigure( | ||
px.line(dft, x="created_at", y="boottime", color="device", markers=True) | ||
.update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"]) | ||
.update_layout(xaxis_title="Version", yaxis_title="Boot time"), | ||
f"Line graph, {args.build_name}", | ||
f"This line graph, is generated from build_name {args.build_name}.", | ||
) | ||
) | ||
|
||
# Group and the mean of the boot time for the desired type - this time it is | ||
# grouped by build_name_device, too, since we want to look at both the build | ||
# and what device this was run on. | ||
dfp = df.groupby( | ||
["created_at", "git_describe", "device", "build_name_device", "build_name"] | ||
)["boottime"].mean() | ||
|
||
# Convert the Series object back to a DataFrame then sort by the created_at | ||
# and build_name_device | ||
dfp = dfp.reset_index().sort_values(by=["created_at", "build_name_device"]) | ||
|
||
# Filter by results from the specified build names | ||
dfp = dfp[dfp['build_name'].str.endswith(args.build_name.split('-')[-1])] | ||
logger.debug(dfp.info()) | ||
logger.debug(dfp) | ||
|
||
# Create the figure for this visualisation | ||
figure_colletion.append( | ||
MetaFigure( | ||
px.line( | ||
dfp, | ||
x="created_at", | ||
y="boottime", | ||
color="build_name_device", | ||
markers=True, | ||
labels={"build_name_device": "Build name - device"}, | ||
) | ||
.update_xaxes(tickvals=dft["created_at"], ticktext=dft["git_describe"]) | ||
.update_layout(xaxis_title="Version", yaxis_title="Boot time"), | ||
f"Line graph, {args.build_name.split('-')[-1]}", | ||
f"This line graph, is generated from \"{args.build_name.split('-')[-1]}\".", | ||
) | ||
) | ||
|
||
combine_plotly_figs_to_html( | ||
figure_colletion, | ||
"index.html", | ||
"This page show some interesting data around LKFT's builds", | ||
f"These graphs is based on LKFT's {args.project} branch", | ||
) | ||
|
||
exit(0) | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(run()) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason we go through the data a day at a time? I feel like this should be configurable - when I ran locally I increased this to 30 days at a time :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did set it to 1 day so we can get some output. =)
could be nice to have in the pipeline.