Skip to content

Commit 9571290

Browse files
authored
[Add notification for Pytorch Operator Microbenchmark (#7447)
## Overview Add notification for pytorch operator microbenchmark Add median option as option to choose the median value as baseline the data visualization example; https://hud.pytorch.org/benchmark/regression/report/acfae3e8-7680-403b-a234-79e5c194f4c0 ## Bug Fix 1. [Notification] return skip when timeseries from api resp is empty list, not the class object 2. [API] fix the emptyTimeseriesResponse to match the response format with the data one ## Next step Add search filter for regression report
1 parent 775426e commit 9571290

File tree

4 files changed

+80
-15
lines changed

4 files changed

+80
-15
lines changed

aws/lambda/benchmark_regression_summary_report/common/config.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,57 @@
1111
)
1212

1313

14+
PYTORCH_OPERATOR_MICROBENCH_CONFIG = BenchmarkConfig(
15+
name="Pytorch Operator Microbench Regression",
16+
id="pytorch_operator_microbenchmark",
17+
source=BenchmarkApiSource(
18+
api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series",
19+
type="benchmark_time_series_api",
20+
api_endpoint_params_template="""
21+
{
22+
"name": "pytorch_operator_microbenchmark",
23+
"query_params": {
24+
"mode": "",
25+
"branches": ["main"],
26+
"repo": "pytorch/pytorch",
27+
"device": "",
28+
"benchmarkName": "PyTorch operator microbenchmark",
29+
"startTime": "{{ startTime }}",
30+
"stopTime": "{{ stopTime }}"
31+
},
32+
"response_formats":["time_series"]
33+
}
34+
""",
35+
),
36+
hud_info={
37+
"url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_operator_microbenchmark",
38+
},
39+
# set baseline from past 4-8 days, and compare with the lastest 4 day
40+
policy=Policy(
41+
frequency=Frequency(value=1, unit="days"),
42+
range=RangeConfig(
43+
baseline=DayRangeWindow(value=4),
44+
comparison=DayRangeWindow(value=4),
45+
),
46+
metrics={
47+
"latency": RegressionPolicy(
48+
name="latency",
49+
condition="greater_equal",
50+
threshold=0.85,
51+
baseline_aggregation="median",
52+
),
53+
},
54+
notification_config={
55+
"type": "github",
56+
"repo": "pytorch/test-infra",
57+
"issue": "7445",
58+
},
59+
),
60+
report_config=ReportConfig(
61+
report_level="insufficient_data",
62+
),
63+
)
64+
1465
# Compiler benchmark regression config
1566
# todo(elainewy): eventually each team should configure
1667
# their own benchmark regression config, currenlty place
@@ -94,6 +145,7 @@
94145
BENCHMARK_REGRESSION_CONFIG = BenchmarkRegressionConfigBook(
95146
configs={
96147
"compiler_regression": COMPILER_BENCHMARK_CONFIG,
148+
"pytorch_operator_microbenchmark": PYTORCH_OPERATOR_MICROBENCH_CONFIG,
97149
}
98150
)
99151

aws/lambda/benchmark_regression_summary_report/common/regression_utils.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import datetime as dt
22
import logging
3+
import math
4+
import statistics
35
from typing import Any, Counter, Dict, List, Literal, Optional, TypedDict
46

57
from common.benchmark_time_series_api_model import (
@@ -78,9 +80,11 @@ def get_regression_status(regression_summary: BenchmarkRegressionSummary) -> str
7880
status = (
7981
"regression"
8082
if regression_summary.get("regression_count", 0) > 0
81-
else "suspicious"
82-
if regression_summary.get("suspicious_count", 0) > 0
83-
else "no_regression"
83+
else (
84+
"suspicious"
85+
if regression_summary.get("suspicious_count", 0) > 0
86+
else "no_regression"
87+
)
8488
)
8589
return status
8690

@@ -274,10 +278,13 @@ def _get_baseline(
274278
calculate the baseline value based on the mode
275279
mode: mean, p90, max, min, target, p50, p95
276280
"""
277-
items = [d for d in data["values"] if field in d]
281+
items = [
282+
d
283+
for d in data["values"]
284+
if field in d and d[field] is not None and not math.isnan(float(d[field]))
285+
]
278286
if not items:
279287
return None
280-
281288
if mode == "max":
282289
baseline_obj = max(items, key=lambda d: float(d[field]))
283290
elif mode == "min":
@@ -286,10 +293,12 @@ def _get_baseline(
286293
baseline_obj = items[-1]
287294
elif mode == "earliest":
288295
baseline_obj = items[0]
296+
elif mode == "median":
297+
median_val = statistics.median([float(d[field]) for d in items])
298+
baseline_obj = min(items, key=lambda d: abs(float(d[field]) - median_val))
289299
else:
290300
logger.warning("Unknown mode: %s", mode)
291301
return None
292-
293302
result: BaselineResult = {
294303
"group_info": data["group_info"],
295304
"value": float(baseline_obj[field]),

aws/lambda/benchmark_regression_summary_report/lambda_function.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,14 @@ def process(
137137
)
138138

139139
target, ls, le = self.get_target(config, self.end_time)
140-
if not target:
140+
if not target.time_series:
141141
self.log_info(
142142
f"no target data found for time range [{ls},{le}] with frequency {report_freq.get_text()}..."
143143
)
144144
return
145145
baseline, bs, be = self.get_baseline(config, self.end_time)
146-
if not baseline:
146+
147+
if not baseline.time_series:
147148
self.log_info(
148149
f"no baseline data found for time range [{bs},{be}] with frequency {report_freq.get_text()}..."
149150
)
@@ -255,7 +256,7 @@ def _fetch_from_benchmark_ts_api(
255256
}
256257
)
257258
url = source.api_query_url
258-
259+
self.log_info(f"query peek: {query}")
259260
self.log_info(f"trying to call {url}")
260261
t0 = time.perf_counter()
261262
try:

torchci/lib/benchmark/api_helper/backend/common/utils.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,12 +217,14 @@ export function toTimeSeriesResponse(
217217
export function emptyTimeSeriesResponse() {
218218
return {
219219
total_rows: 0,
220-
time_series: [],
221-
table: [],
222220
time_range: {
223221
start: new Date().toISOString(),
224222
end: new Date().toISOString(),
225223
},
224+
data: {
225+
time_series: [],
226+
table: [],
227+
},
226228
};
227229
}
228230

@@ -265,6 +267,7 @@ export function to_time_series_data(
265267
diffs.push({
266268
key: `${key}___${sub_key}`,
267269
data: item.data,
270+
length: item.data.length,
268271
});
269272
}
270273
return item.data[0];
@@ -286,11 +289,11 @@ export function to_time_series_data(
286289
console.log(
287290
`we detected multiple datapoints for the same group keys ${
288291
diffs.length
289-
}, peak first on \n ${JSON.stringify(
290-
diffs[0].key
291-
)}, \n Data1: ${JSON.stringify(
292+
}, peak first on \n ${JSON.stringify(diffs[0].key)},\n duplicates ${
293+
diffs[0].length
294+
} \n Data1: ${JSON.stringify(
292295
diffs[0].data[0]
293-
)}, Data:2 ${JSON.stringify(diffs[0].data[1])}`
296+
)},\n Data:2 ${JSON.stringify(diffs[0].data[1])}`
294297
);
295298
}
296299
return result;

0 commit comments

Comments
 (0)