Skip to content

Commit 3535c27

Browse files
authored
Sync the regression policy with the regression notification (#7301)
1 parent ec9c167 commit 3535c27

File tree

3 files changed

+10
-50
lines changed

3 files changed

+10
-50
lines changed

aws/lambda/benchmark_regression_summary_report/common/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
"compilation_latency": RegressionPolicy(
7777
name="compilation_latency",
7878
condition="less_equal",
79-
threshold=1.05,
79+
threshold=1.15,
8080
baseline_aggregation="min",
8181
),
8282
},

aws/lambda/benchmark_regression_summary_report/common/report_manager.py

Lines changed: 7 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,55 +14,15 @@
1414
REPORT_MD_TEMPLATE = """# Benchmark Report {{ id }}
1515
config_id: `{{ report_id }}`
1616
17-
We have detected **{{ status }}** in benchmark results for `{{ report_id }}` (id: `{{ id }}`).
18-
(HUD benchmark regression page coming soon...)
17+
See Page https://hud.pytorch.org/benchmark/regression/report/{{ id }} for more details.
1918
20-
> **Status:** {{ status }} · **Frequency:** {{ frequency }}
19+
Report Status: **{{ status }}**
2120
22-
## Summary
23-
| Metric | Value |
24-
| :-- | --: |
25-
| Total | {{ summary.total_count | default(0) }} |
26-
| Regressions | {{ summary.regression_count | default(0) }} |
27-
| Suspicious | {{ summary.suspicious_count | default(0) }} |
28-
| No Regression | {{ summary.no_regression_count | default(0) }} |
29-
| Insufficient Data | {{ summary.insufficient_data_count | default(0) }} |
30-
31-
## Data Windows
32-
Baseline is a single reference value (e.g., mean, max, min, latest) aggregated from the previous few days,
33-
used to detect regressions by comparing against metric values in the target window.
34-
35-
### Baseline window (used to calculate baseline value)
36-
- **Start:** `{{ baseline.start.timestamp | default('') }}` (commit: `{{ baseline.start.commit | default('') }}`)
37-
- **End:** `{{ baseline.end.timestamp | default('') }}` (commit: `{{ baseline.end.commit | default('') }}`)
38-
39-
### Target window (used to compare against baseline value)
40-
- **Start:** `{{ target.start.timestamp | default('') }}` (commit: `{{ target.start.commit | default('') }}`)
41-
- **End:** `{{ target.end.timestamp | default('') }}` (commit: `{{ target.end.commit | default('') }}`)
42-
43-
{% if regression_items and regression_items|length > 0 %}
44-
## Regression Glance
45-
{% if url %}
46-
Use items below in [HUD]({{ url }}) to see regression.
47-
{% endif %}
48-
49-
{% set items = regression_items if regression_items|length <= 10 else regression_items[:10] %}
50-
{% if regression_items|length > 10 %}
51-
… (showing first 10 only, total {{ regression_items|length }} regressions)
52-
{% endif %}
53-
{% for item in items %}
54-
{% set kv = item.group_info|dictsort %}
55-
{{ "" }}|{% for k, _ in kv %}{{ k }} |{% endfor %}{{ "\n" -}}
56-
|{% for _k, _ in kv %}---|{% endfor %}{{ "\n" -}}
57-
|{% for _k, v in kv %}{{ v }} |{% endfor %}{{ "\n\n" -}}
58-
{% if item.baseline_point -%}
59-
- **baseline**: {{ item.baseline_point.value}},
60-
- **startTime**: {{ item.baseline_point.timestamp }}, **endTime**: {{ target.end.timestamp }}
61-
- **lcommit**: `{{ item.baseline_point.commit }}`, **rcommit**: `{{ target.end.commit }}`
62-
{{ "\n" }}
63-
{%- endif %}
64-
{% endfor %}
65-
{% endif %}
21+
- Total: {{ summary.total_count | default(0) }}
22+
- Regressions: {{ summary.regression_count | default(0) }}
23+
- Suspicious: {{ summary.suspicious_count | default(0) }}
24+
- No Regression: {{ summary.no_regression_count | default(0) }}
25+
- Insufficient Data: {{ summary.insufficient_data_count | default(0) }}
6626
"""
6727

6828

torchci/components/benchmark/v3/configs/teams/compilers/config.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ const COMPILATION_LATENCY_COMPARISON_POLICY: BenchmarkComparisonPolicyConfig = {
3838
target: "compilation_latency",
3939
type: "ratio",
4040
ratioPolicy: {
41-
badRatio: 1.05,
42-
goodRatio: 0.9,
41+
badRatio: 1.15,
42+
goodRatio: 0.85,
4343
direction: "down",
4444
},
4545
};

0 commit comments

Comments
 (0)