From c4689dcd5ecfd7fb8ee72e82a1d7bd70a4d0c23b Mon Sep 17 00:00:00 2001
From: zhouyi <1529198419@qq.com>
Date: Wed, 3 Jun 2026 21:46:44 +0800
Subject: [PATCH 1/3] feat: add CSV/JSON export and cycle_time P95/P99 to
 benchmark_plot.py

- --csv flag: exports one row per run with all metrics
- --json flag: exports per-run details + weighted cross-run summary
- cycle_time P95/P99/median/std now computed and displayed
- EXPORT_KEYS defines consistent CSV column order

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/benchmark_plot.py | 134 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 132 insertions(+), 2 deletions(-)

diff --git a/scripts/benchmark_plot.py b/scripts/benchmark_plot.py
index e62ea52..e2c82bf 100755
--- a/scripts/benchmark_plot.py
+++ b/scripts/benchmark_plot.py
@@ -35,8 +35,11 @@
   # Generate report + publication-quality plots
   python3 scripts/benchmark_plot.py --bags mpc_run --plot
 
+  # Export metrics to CSV / JSON
+  python3 scripts/benchmark_plot.py --bags mpc_run --csv --json
+
   # Demo mode (no rosbag needed)
-  python3 scripts/benchmark_plot.py --demo --plot
+  python3 scripts/benchmark_plot.py --demo --plot --csv
 """
 
 import argparse
@@ -467,7 +470,11 @@ def compute_metrics(data: dict, name: str) -> dict:
     ct = data["cycle_time_us"]
     if ct.max() > 0:
         metrics["cycle_time_mean_us"] = float(np.mean(ct))
+        metrics["cycle_time_median_us"] = float(np.median(ct))
+        metrics["cycle_time_p95_us"] = float(np.percentile(ct, 95))
+        metrics["cycle_time_p99_us"] = float(np.percentile(ct, 99))
         metrics["cycle_time_max_us"] = float(np.max(ct))
+        metrics["cycle_time_std_us"] = float(np.std(ct))
 
     su = data["setup_time_us"]
     if su.max() > 0:
@@ -862,7 +869,10 @@ def print_report(all_metrics: list):
         if key.endswith("_error") or key in (
             "solve_time_mean_us", "solve_time_p95_us", "solve_time_max_us",
             "solve_time_median_us", "solve_time_std_us",
-            "cycle_time_mean_us", "setup_time_mean_us",
+            "cycle_time_mean_us", "cycle_time_median_us",
+            "cycle_time_p95_us", "cycle_time_p99_us",
+            "cycle_time_max_us", "cycle_time_std_us",
+            "setup_time_mean_us",
             "diagnostics_rate_hz", "duration_s", "total_steps",
             "solved_steps", "solve_failures", "position_rms_error",
             "optimal_steps", "approximate_steps", "failed_steps",
@@ -885,6 +895,117 @@ def print_report(all_metrics: list):
     print()
 
 
+# ---------------------------------------------------------------------------
+# CSV / JSON export
+# ---------------------------------------------------------------------------
+
+# Metrics to export (order matters for CSV columns)
+EXPORT_KEYS = [
+    "name", "duration_s", "total_steps", "state_dim",
+    "solve_time_mean_us", "solve_time_median_us",
+    "solve_time_p95_us", "solve_time_p99_us",
+    "solve_time_max_us", "solve_time_std_us",
+    "cycle_time_mean_us", "cycle_time_median_us",
+    "cycle_time_p95_us", "cycle_time_p99_us",
+    "cycle_time_max_us", "cycle_time_std_us",
+    "setup_time_mean_us",
+    "optimal_steps", "approximate_steps", "failed_steps",
+    "optimal_rate_pct", "approximate_rate_pct",
+    "solve_failures", "solved_steps",
+    "hold_count", "hold_rate_pct",
+    "deadline_misses", "deadline_miss_pct",
+    "position_rms_error",
+    "diagnostics_rate_hz",
+    "slack_max_vel_mean", "slack_max_vel_max",
+    "slack_l1_mean", "slack_l1_max", "slack_active_pct",
+]
+
+
+def export_csv(all_metrics: list, output_dir: str):
+    """Export benchmark metrics to CSV (one row per run)."""
+    import csv
+
+    # Collect all keys that appear in any metrics dict
+    all_keys = []
+    for key in EXPORT_KEYS:
+        if any(key in m for m in all_metrics):
+            all_keys.append(key)
+    # Add any extra keys not in EXPORT_KEYS
+    seen = set(all_keys)
+    for m in all_metrics:
+        for k in m:
+            if k not in seen:
+                all_keys.append(k)
+                seen.add(k)
+
+    fname = Path(output_dir) / "benchmark_results.csv"
+    with open(fname, "w", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=all_keys, extrasaction="ignore")
+        writer.writeheader()
+        for m in all_metrics:
+            row = {k: m.get(k, "") for k in all_keys}
+            writer.writerow(row)
+
+    print(f"  CSV saved to {fname}")
+
+
+def export_json(all_metrics: list, all_data: list, output_dir: str):
+    """Export benchmark metrics to JSON with full details."""
+    import json
+
+    result = {
+        "runs": [],
+        "summary": {},
+    }
+
+    for m in all_metrics:
+        # Flatten numpy types for JSON serialization
+        clean = {}
+        for k, v in m.items():
+            if isinstance(v, (np.integer,)):
+                clean[k] = int(v)
+            elif isinstance(v, (np.floating,)):
+                clean[k] = float(v)
+            elif isinstance(v, np.ndarray):
+                clean[k] = v.tolist()
+            else:
+                clean[k] = v
+        result["runs"].append(clean)
+
+    # Cross-run summary (weighted averages)
+    total_steps = sum(m.get("total_steps", 0) for m in all_metrics)
+    if total_steps > 0:
+        ws = lambda key: sum(
+            m.get("total_steps", 0) * m.get(key, 0) for m in all_metrics
+        ) / total_steps
+
+        result["summary"] = {
+            "total_steps": total_steps,
+            "num_runs": len(all_metrics),
+            "weighted_solve_time_mean_us": ws("solve_time_mean_us"),
+            "weighted_solve_time_p95_us": ws("solve_time_p95_us"),
+            "weighted_solve_time_p99_us": ws("solve_time_p99_us"),
+            "weighted_cycle_time_mean_us": ws("cycle_time_mean_us"),
+            "weighted_cycle_time_p95_us": ws("cycle_time_p95_us"),
+            "weighted_cycle_time_p99_us": ws("cycle_time_p99_us"),
+            "weighted_optimal_rate_pct": ws("optimal_rate_pct"),
+            "weighted_deadline_miss_pct": ws("deadline_miss_pct"),
+            "weighted_position_rms_error": ws("position_rms_error"),
+        }
+        # Clean numpy types
+        for k, v in result["summary"].items():
+            if isinstance(v, (np.integer,)):
+                result["summary"][k] = int(v)
+            elif isinstance(v, (np.floating,)):
+                result["summary"][k] = float(v)
+
+    fname = Path(output_dir) / "benchmark_results.json"
+    with open(fname, "w") as f:
+        json.dump(result, f, indent=2)
+
+    print(f"  JSON saved to {fname}")
+
+
 # ---------------------------------------------------------------------------
 # Main
 # ---------------------------------------------------------------------------
@@ -902,6 +1023,10 @@ def main():
                         help="Output directory for plots (default: results/)")
     parser.add_argument("--plot", action="store_true",
                         help="Generate publication-quality plots")
+    parser.add_argument("--csv", action="store_true",
+                        help="Export metrics to CSV (one row per run)")
+    parser.add_argument("--json", action="store_true",
+                        help="Export metrics to JSON with cross-run summary")
     parser.add_argument("--demo", action="store_true",
                         help="Run in demo mode with simulated data (no rosbag)")
     args = parser.parse_args()
@@ -955,6 +1080,11 @@ def main():
 
     print_report(all_metrics)
 
+    if args.csv:
+        export_csv(all_metrics, output_dir)
+    if args.json:
+        export_json(all_metrics, all_data, output_dir)
+
     if args.plot or args.demo or not args.bags:
         args.plot = True
 

From 5db3e1962f4bceff8d2dafa32d9af5a1ccb78b47 Mon Sep 17 00:00:00 2001
From: zhouyi <1529198419@qq.com>
Date: Wed, 3 Jun 2026 21:56:18 +0800
Subject: [PATCH 2/3] feat: add --summary flag and benchmark reproduction guide

- --summary prints cross-run weighted averages with std/min/max
- README section: how to reproduce benchmarks with --csv --json --summary

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 README.md                 | 31 +++++++++++++
 scripts/benchmark_plot.py | 92 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+)

diff --git a/README.md b/README.md
index 11ab52b..82028cb 100644
--- a/README.md
+++ b/README.md
@@ -253,6 +253,37 @@ confirmed under controlled conditions.
 - **Sporadic `PRIMAL_INFEASIBLE` bursts with sentinel slack values (P2 — RESOLVED via warm-start hardening)**: On rare cycles (0–14% per run) OSQP returned status −3 (`PRIMAL_INFEASIBLE`) with slack variables at a sentinel value of 2,143,289,344 (0x7fc00000, a quiet NaN in IEEE 754 single-precision). Once triggered, the bad ADMM state could cascade via warm start, causing contiguous failure blocks. **Root cause identified:** The receding-horizon warm-start shift treated the partitioned decision vector z = [U, ε] as a monolithic block, interleaving U and slack components during the shift. This corrupted the slack initial guess, driving OSQP into an invalid ADMM state that propagated across cycles. **Fix:** Partitioned the warm-start shift into independent U-block and ε-block shifts, with `allFinite()` guards and error-checked reset on solver failure. Post-fix benchmarks confirm **zero sentinel occurrences across 22,179 cycles** (6 runs). See [osqp_solver.cpp:143-171](src/osqp_solver.cpp#L143-L171) for the fix.
 - **Deadline misses**: In clean runs the cached condensed Hessian reduced mean cycle time from 7.41 ms (v0.2.0) to 4.00 ms in the original benchmark, and from 3.06 ms to 2.69 ms (−12%) in the paired A/B validation. The Hessian cache eliminates ~512K FLOPs/cycle of redundant matrix-matrix products, reducing per-cycle Eigen heap allocations from 17+ to ~3. Remaining contributors include WSL2 virtualization overhead, Gazebo scheduling, and solver polishing cost.
 
+### Reproducing the Benchmark
+
+Prerequisites: ROS 2 Jazzy, Gazebo (gz_ros2_control), `rosbag2_py`, `numpy`, `matplotlib`.
+
+```bash
+# 1. Build
+source /opt/ros/jazzy/setup.bash
+cd ros2_ws && colcon build --packages-select mpc_controller
+source install/setup.bash
+
+# 2. Record a single run (60s)
+ros2 launch mpc_controller rrbot_mpc.launch.py &
+sleep 60  # wait for simulation to stabilize
+ros2 bag record /mpc_controller/diagnostics -o bench_run_01
+kill %1
+
+# 3. Analyze
+python3 src/mpc_controller/scripts/benchmark_plot.py \
+  --bags bench_run_01 --output results --plot --csv --json
+
+# 4. Multi-run summary (2+ runs)
+python3 src/mpc_controller/scripts/benchmark_plot.py \
+  --bags bench_run_01 bench_run_02 bench_run_03 \
+  --output results --summary --csv --json
+```
+
+The `--csv` flag exports a spreadsheet-friendly table (one row per run). The
+`--json` flag exports per-run details plus a weighted cross-run summary.
+The `--summary` flag prints aggregated statistics with cross-run standard
+deviation when 2+ bags are provided.
+
 ## Dynamic Parameter Tuning
 
 Weights can be updated at runtime without restarting the controller.
diff --git a/scripts/benchmark_plot.py b/scripts/benchmark_plot.py
index e2c82bf..f3a25d8 100755
--- a/scripts/benchmark_plot.py
+++ b/scripts/benchmark_plot.py
@@ -895,6 +895,93 @@ def print_report(all_metrics: list):
     print()
 
 
+# ---------------------------------------------------------------------------
+# Repeated-run summary
+# ---------------------------------------------------------------------------
+
+def print_summary(all_metrics: list):
+    """Print aggregated statistics across multiple benchmark runs.
+
+    Shows weighted averages and cross-run variability (std, min, max)
+    for key metrics.
+    """
+    if len(all_metrics) < 2:
+        print("  (Need 2+ runs for cross-run summary)")
+        return
+
+    total_steps = sum(m.get("total_steps", 0) for m in all_metrics)
+    if total_steps == 0:
+        return
+
+    def wmean(key):
+        return sum(m.get("total_steps", 0) * m.get(key, 0)
+                   for m in all_metrics) / total_steps
+
+    def across_runs(key):
+        vals = [m.get(key, 0) for m in all_metrics if key in m]
+        if not vals:
+            return None
+        arr = np.array(vals)
+        return {
+            "mean": float(np.mean(arr)),
+            "std": float(np.std(arr)),
+            "min": float(np.min(arr)),
+            "max": float(np.max(arr)),
+        }
+
+    print("\n" + "=" * 85)
+    print(f"  REPEATED-RUN SUMMARY ({len(all_metrics)} runs, {total_steps:,} total cycles)")
+    print("=" * 85)
+
+    # Weighted averages (cycle-time weighted by run length)
+    weighted_metrics = [
+        ("solve_time_mean_us", "Solve time mean"),
+        ("solve_time_p95_us", "Solve time P95"),
+        ("solve_time_p99_us", "Solve time P99"),
+        ("cycle_time_mean_us", "Cycle time mean"),
+        ("cycle_time_p95_us", "Cycle time P95"),
+        ("cycle_time_p99_us", "Cycle time P99"),
+        ("optimal_rate_pct", "Optimal solve rate"),
+        ("deadline_miss_pct", "Deadline miss rate"),
+        ("position_rms_error", "Position RMS error"),
+        ("hold_rate_pct", "Hold rate"),
+    ]
+
+    print(f"\n  {'Metric':<30} {'Weighted':>12} {'Run σ':>10} {'Min':>12} {'Max':>12}")
+    print("  " + "-" * 76)
+
+    for key, label in weighted_metrics:
+        wm = wmean(key)
+        ar = across_runs(key)
+        if ar is None:
+            continue
+        # Format based on metric type
+        if "pct" in key:
+            fmt = lambda v: f"{v:.2f}%"
+        elif "rad" in key or "error" in key:
+            fmt = lambda v: f"{v:.3f} rad"
+        elif "us" in key:
+            fmt = lambda v: f"{v:.0f} µs" if v < 1000 else f"{v/1000:.2f} ms"
+        else:
+            fmt = lambda v: f"{v:.4f}"
+
+        print(f"  {label:<30} {fmt(wm):>12} {fmt(ar['std']):>10} "
+              f"{fmt(ar['min']):>12} {fmt(ar['max']):>12}")
+
+    # Per-run opt rate
+    print(f"\n  Per-run optimal solve rates:")
+    for m in all_metrics:
+        name = m.get("name", "?")
+        opt = m.get("optimal_rate_pct", 0)
+        steps = m.get("total_steps", 0)
+        dl = m.get("deadline_miss_pct", 0)
+        print(f"    {name:<25} {opt:>6.1f}%  ({steps:>5} cycles, "
+              f"DL miss {dl:.1f}%)")
+
+    print("\n" + "=" * 85)
+    print()
+
+
 # ---------------------------------------------------------------------------
 # CSV / JSON export
 # ---------------------------------------------------------------------------
@@ -1027,6 +1114,8 @@ def main():
                         help="Export metrics to CSV (one row per run)")
     parser.add_argument("--json", action="store_true",
                         help="Export metrics to JSON with cross-run summary")
+    parser.add_argument("--summary", action="store_true",
+                        help="Print repeated-run aggregated statistics (2+ bags)")
     parser.add_argument("--demo", action="store_true",
                         help="Run in demo mode with simulated data (no rosbag)")
     args = parser.parse_args()
@@ -1080,6 +1169,9 @@ def main():
 
     print_report(all_metrics)
 
+    if args.summary and len(all_metrics) >= 2:
+        print_summary(all_metrics)
+
     if args.csv:
         export_csv(all_metrics, output_dir)
     if args.json:

From 09355d04ea00c536e70382eb67170000629b4d8f Mon Sep 17 00:00:00 2001
From: zhouyi <1529198419@qq.com>
Date: Wed, 3 Jun 2026 21:59:54 +0800
Subject: [PATCH 3/3] docs: mark completed v0.2.2 benchmark tooling tasks

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 ROADMAP.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ROADMAP.md b/ROADMAP.md
index 190a858..a2c282d 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -26,9 +26,10 @@
 
 ### v0.2.2 — Runtime Characterization
 - [ ] Collect native Ubuntu 24.04 benchmark results ([#1](https://github.com/yeezhouyi/mpc_controller/issues/1))
-- [ ] Add P95 / P99 latency statistics
-- [ ] Add repeated-run benchmark summary
-- [ ] Export benchmark results to CSV / JSON
+- [x] Add P95 / P99 latency statistics
+- [x] Add repeated-run benchmark summary
+- [x] Export benchmark results to CSV / JSON
+- [x] Add benchmark reproduction guide
 - [ ] Update README benchmark table with native results
 - [ ] Document WSL2 vs native Linux timing limitations
 - [ ] ControllerUpdateStats integration