Regression Micro Benchmark and Torchao (#7478)

yangw-dev · web-flow · commit 4b41c2b72b61 · 2025-11-17T18:52:36.000-08:00
add regression config
diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py
@@ -63,6 +63,70 @@
     ),
 )
 
+
+TORCHAO_MICRO_API_CONFIG = BenchmarkConfig(
+    name="Torchao Micro Api Regression",
+    id="torchao_micro_api_benchmark",
+    source=BenchmarkApiSource(
+        api_query_url="https://hud.pytorch.org/api/benchmark/get_time_series",
+        type="benchmark_time_series_api",
+        api_endpoint_params_template="""
+                {
+                  "name": "torchao_micro_api_benchmark",
+                  "query_params": {
+                    "mode": "",
+                    "branches": ["main"],
+                    "repo": "pytorch/ao",
+                    "device": "",
+                    "benchmarkName": "micro-benchmark api",
+                    "startTime": "{{ startTime }}",
+                    "stopTime": "{{ stopTime }}"
+                    },
+                    "response_formats":["time_series"]
+                }
+                """,
+    ),
+    hud_info={
+        "url": "https://hud.pytorch.org/benchmark/v3/dashboard/torchao_micro_api_benchmark",
+    },
+    # set baseline from past 4-8 days, and compare with the lastest 4 day
+    policy=Policy(
+        frequency=Frequency(value=1, unit="days"),
+        range=RangeConfig(
+            baseline=DayRangeWindow(value=4),
+            comparison=DayRangeWindow(value=4),
+        ),
+        metrics={
+            "bfloat16 fwd time (ms)": RegressionPolicy(
+                name="bfloat16 fwd time (ms)",
+                condition="less_equal",
+                threshold=1.20,
+                baseline_aggregation="min",
+            ),
+            "quantized fwd time (ms)": RegressionPolicy(
+                name="quantized fwd time (ms)",
+                condition="less_equal",
+                threshold=1.20,
+                baseline_aggregation="min",
+            ),
+            "fwd speedup (x)": RegressionPolicy(
+                name="fwd speedup (x)",
+                condition="greater_equal",
+                threshold=0.9,
+                baseline_aggregation="median",
+            ),
+        },
+        notification_config={
+            "type": "github",
+            "repo": "pytorch/test-infra",
+            "issue": "7477",
+        },
+    ),
+    report_config=ReportConfig(
+        report_level="clear",
+    ),
+)
+
 PYTORCH_OPERATOR_MICROBENCH_CONFIG = BenchmarkConfig(
     name="Pytorch Operator Microbench Regression",
     id="pytorch_operator_microbenchmark",
@@ -98,8 +162,8 @@
         metrics={
             "latency": RegressionPolicy(
                 name="latency",
-                condition="greater_equal",
-                threshold=0.85,
+                condition="less_equal",
+                threshold=1.20,
                 baseline_aggregation="median",
             ),
         },
@@ -110,7 +174,7 @@
         },
     ),
     report_config=ReportConfig(
-        report_level="insufficient_data",
+        report_level="regression",
     ),
 )
 
@@ -199,6 +263,7 @@
         "compiler_regression": COMPILER_BENCHMARK_CONFIG,
         "pytorch_operator_microbenchmark": PYTORCH_OPERATOR_MICROBENCH_CONFIG,
         "pytorch_helion": PYTORCH_HELION_CONFIG,
+        "torchao_micro_api_benchmark": TORCHAO_MICRO_API_CONFIG,
     }
 )
 
diff --git a/aws/lambda/benchmark_regression_summary_report/lambda_function.py b/aws/lambda/benchmark_regression_summary_report/lambda_function.py
@@ -372,6 +372,12 @@ def main(
         minute=0, second=0, microsecond=0
     )
     end_time_ts = int(end_time.timestamp())
+
+    # override end_time if args is provided
+    if args and args.end_time:
+        end_time = isoparse(args.end_time)
+        end_time = truncate_to_hour(end_time)
+        end_time_ts = int(end_time.timestamp())
     logger.info(
         "[Main] current time with hour granularity(utc) %s with unix timestamp %s",
         end_time,
@@ -462,6 +468,11 @@ def parse_args() -> argparse.Namespace:
         default=ENVS["GITHUB_TOKEN"],
         help="the github access token to access github api",
     )
+    parser.add_argument(
+        "--end-time",
+        type=str,
+        help="the end time to run, in format of YYYY-MM-DD HH:MM:SS",
+    )
     parser.set_defaults(dry_run=True)  # default is True
     args, _ = parser.parse_known_args()
     return args