Skip to content

Commit 39523dd

Browse files
authored
Add Helion Regression Config (#7473)
#Overview Add regression report for helion # frequency the summary report is generated daily # what is considered as regression for helion 1. we find baseline point used to judge new data: median of (4th - 8th day speedup data) 2. if more than 2 new dp of helion speedup are continuously 15% lower than the basline value, considered as regression ## Demo Regression Report https://torchci-git-addhelionregressionreport-fbopensource.vercel.app/benchmark/regression/report/ada0e5ba-874b-47ff-b76c-b281ac08d179 <img width="741" height="851" alt="image" src="https://github.com/user-attachments/assets/abcde75b-f34d-49d1-9c23-0544ac38ba37" /> ## Notification Currently we do not trigger workplace chat notification, this is experimental. But we do: 1. have a github issue that will be used to accept regression report as link, can be used for notification when it's ready 2. have helion dashboard to access to the regression report list, and signal regression if find any #7472
1 parent d9e1320 commit 39523dd

File tree

4 files changed

+80
-12
lines changed

4 files changed

+80
-12
lines changed

aws/lambda/benchmark_regression_summary_report/common/config.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,58 @@
1111
)
1212

1313

14+
PYTORCH_HELION_CONFIG = BenchmarkConfig(
15+
name="Helion Benchmark Regression",
16+
id="pytorch_helion",
17+
source=BenchmarkApiSource(
18+
api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series",
19+
type="benchmark_time_series_api",
20+
api_endpoint_params_template="""
21+
{
22+
"name": "pytorch_helion",
23+
"query_params": {
24+
"mode": "",
25+
"branches": ["main"],
26+
"repo": "pytorch/helion",
27+
"device": "",
28+
"arch":"",
29+
"benchmarkName": "Helion Benchmark",
30+
"startTime": "{{ startTime }}",
31+
"stopTime": "{{ stopTime }}"
32+
},
33+
"response_formats":["time_series"]
34+
}
35+
""",
36+
),
37+
hud_info={
38+
"url": "https://hud.pytorch.org/benchmark/v3/dashboard/pytorch_helion",
39+
},
40+
# set baseline from past 4-8 days, and compare with the lastest 4 day
41+
policy=Policy(
42+
frequency=Frequency(value=1, unit="days"),
43+
range=RangeConfig(
44+
baseline=DayRangeWindow(value=4),
45+
comparison=DayRangeWindow(value=4),
46+
),
47+
metrics={
48+
"helion_speedup": RegressionPolicy(
49+
name="helion_speedup",
50+
condition="greater_equal",
51+
threshold=0.85,
52+
baseline_aggregation="median",
53+
),
54+
},
55+
notification_config={
56+
"type": "github",
57+
"repo": "pytorch/test-infra",
58+
"issue": "7472",
59+
},
60+
),
61+
report_config=ReportConfig(
62+
report_level="insufficient_data",
63+
),
64+
)
65+
1466
PYTORCH_OPERATOR_MICROBENCH_CONFIG = BenchmarkConfig(
1567
name="Pytorch Operator Microbench Regression",
1668
id="pytorch_operator_microbenchmark",
@@ -146,6 +198,7 @@
146198
configs={
147199
"compiler_regression": COMPILER_BENCHMARK_CONFIG,
148200
"pytorch_operator_microbenchmark": PYTORCH_OPERATOR_MICROBENCH_CONFIG,
201+
"pytorch_helion": PYTORCH_HELION_CONFIG,
149202
}
150203
)
151204

aws/lambda/benchmark_regression_summary_report/common/regression_utils.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,18 @@ class BenchmarkRegressionReport(TypedDict):
7777

7878

7979
def get_regression_status(regression_summary: BenchmarkRegressionSummary) -> str:
80-
status = (
81-
"regression"
82-
if regression_summary.get("regression_count", 0) > 0
83-
else (
84-
"suspicious"
85-
if regression_summary.get("suspicious_count", 0) > 0
86-
else "no_regression"
87-
)
88-
)
89-
return status
80+
if regression_summary.get("regression_count", 0) > 0:
81+
return "regression"
82+
if regression_summary.get("suspicious_count", 0) > 0:
83+
return "suspicious"
84+
if regression_summary.get("insufficient_data_count", 0) > 0:
85+
insufficient_data = regression_summary.get("insufficient_data_count", 0)
86+
# default to 1 to avoid dividen issue
87+
total = regression_summary.get("total_count", 1)
88+
percentage = insufficient_data / total
89+
if percentage >= 0.9:
90+
return "insufficient_data"
91+
return "no_regression"
9092

9193

9294
class BenchmarkRegressionReportGenerator:
@@ -251,7 +253,19 @@ def _to_data_map(
251253
for d in sorted(
252254
ts_group.data, key=lambda d: isoparse(d["granularity_bucket"])
253255
):
256+
# skip if field is not in data, or field is None
254257
if field not in d:
258+
logger.warning(
259+
"[_to_data_map] field %s not found or value is undefined", field
260+
)
261+
continue
262+
if d[field] is None or math.isnan(float(d[field])):
263+
logger.warning(
264+
"[_to_data_map] Skip %s with value %s with group key [%s]",
265+
field,
266+
d[field],
267+
group_keys,
268+
)
255269
continue
256270

257271
p: BenchmarkRegressionPoint = {

torchci/components/benchmark_v3/configs/configurations.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ export const BENCHMARK_ID_MAPPING: Record<string, BenchmarkIdMappingItem> = {
107107
* @returns
108108
*/
109109
export function getBenchmarkIdFromReportId(reportId: string): string {
110-
return REPORT_ID_TO_BENCHMARK_ID_MAPPING[reportId] ?? "";
110+
return REPORT_ID_TO_BENCHMARK_ID_MAPPING[reportId] ?? reportId;
111111
}
112112

113113
export function getBenchmarkIdMappingItem(

torchci/components/benchmark_v3/pages/BenchmarkListPage.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ export function getBenchmarkMainRouteById(id: string): string | undefined {
1212
}
1313
}
1414
}
15-
return undefined;
15+
// by default, form the v3 route to dashboard page
16+
return `/benchmark/v3/dashboard/${id}`;
1617
}
1718

1819
export function benchmarkCategoryCardToNavGroup(

0 commit comments

Comments
 (0)