From c4689dcd5ecfd7fb8ee72e82a1d7bd70a4d0c23b Mon Sep 17 00:00:00 2001 From: zhouyi <1529198419@qq.com> Date: Wed, 3 Jun 2026 21:46:44 +0800 Subject: [PATCH 1/3] feat: add CSV/JSON export and cycle_time P95/P99 to benchmark_plot.py - --csv flag: exports one row per run with all metrics - --json flag: exports per-run details + weighted cross-run summary - cycle_time P95/P99/median/std now computed and displayed - EXPORT_KEYS defines consistent CSV column order Co-Authored-By: Claude Opus 4.7 --- scripts/benchmark_plot.py | 134 +++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 2 deletions(-) diff --git a/scripts/benchmark_plot.py b/scripts/benchmark_plot.py index e62ea52..e2c82bf 100755 --- a/scripts/benchmark_plot.py +++ b/scripts/benchmark_plot.py @@ -35,8 +35,11 @@ # Generate report + publication-quality plots python3 scripts/benchmark_plot.py --bags mpc_run --plot + # Export metrics to CSV / JSON + python3 scripts/benchmark_plot.py --bags mpc_run --csv --json + # Demo mode (no rosbag needed) - python3 scripts/benchmark_plot.py --demo --plot + python3 scripts/benchmark_plot.py --demo --plot --csv """ import argparse @@ -467,7 +470,11 @@ def compute_metrics(data: dict, name: str) -> dict: ct = data["cycle_time_us"] if ct.max() > 0: metrics["cycle_time_mean_us"] = float(np.mean(ct)) + metrics["cycle_time_median_us"] = float(np.median(ct)) + metrics["cycle_time_p95_us"] = float(np.percentile(ct, 95)) + metrics["cycle_time_p99_us"] = float(np.percentile(ct, 99)) metrics["cycle_time_max_us"] = float(np.max(ct)) + metrics["cycle_time_std_us"] = float(np.std(ct)) su = data["setup_time_us"] if su.max() > 0: @@ -862,7 +869,10 @@ def print_report(all_metrics: list): if key.endswith("_error") or key in ( "solve_time_mean_us", "solve_time_p95_us", "solve_time_max_us", "solve_time_median_us", "solve_time_std_us", - "cycle_time_mean_us", "setup_time_mean_us", + "cycle_time_mean_us", "cycle_time_median_us", + "cycle_time_p95_us", "cycle_time_p99_us", + "cycle_time_max_us", "cycle_time_std_us", + "setup_time_mean_us", "diagnostics_rate_hz", "duration_s", "total_steps", "solved_steps", "solve_failures", "position_rms_error", "optimal_steps", "approximate_steps", "failed_steps", @@ -885,6 +895,117 @@ def print_report(all_metrics: list): print() +# --------------------------------------------------------------------------- +# CSV / JSON export +# --------------------------------------------------------------------------- + +# Metrics to export (order matters for CSV columns) +EXPORT_KEYS = [ + "name", "duration_s", "total_steps", "state_dim", + "solve_time_mean_us", "solve_time_median_us", + "solve_time_p95_us", "solve_time_p99_us", + "solve_time_max_us", "solve_time_std_us", + "cycle_time_mean_us", "cycle_time_median_us", + "cycle_time_p95_us", "cycle_time_p99_us", + "cycle_time_max_us", "cycle_time_std_us", + "setup_time_mean_us", + "optimal_steps", "approximate_steps", "failed_steps", + "optimal_rate_pct", "approximate_rate_pct", + "solve_failures", "solved_steps", + "hold_count", "hold_rate_pct", + "deadline_misses", "deadline_miss_pct", + "position_rms_error", + "diagnostics_rate_hz", + "slack_max_vel_mean", "slack_max_vel_max", + "slack_l1_mean", "slack_l1_max", "slack_active_pct", +] + + +def export_csv(all_metrics: list, output_dir: str): + """Export benchmark metrics to CSV (one row per run).""" + import csv + + # Collect all keys that appear in any metrics dict + all_keys = [] + for key in EXPORT_KEYS: + if any(key in m for m in all_metrics): + all_keys.append(key) + # Add any extra keys not in EXPORT_KEYS + seen = set(all_keys) + for m in all_metrics: + for k in m: + if k not in seen: + all_keys.append(k) + seen.add(k) + + fname = Path(output_dir) / "benchmark_results.csv" + with open(fname, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=all_keys, extrasaction="ignore") + writer.writeheader() + for m in all_metrics: + row = {k: m.get(k, "") for k in all_keys} + writer.writerow(row) + + print(f" CSV saved to {fname}") + + +def export_json(all_metrics: list, all_data: list, output_dir: str): + """Export benchmark metrics to JSON with full details.""" + import json + + result = { + "runs": [], + "summary": {}, + } + + for m in all_metrics: + # Flatten numpy types for JSON serialization + clean = {} + for k, v in m.items(): + if isinstance(v, (np.integer,)): + clean[k] = int(v) + elif isinstance(v, (np.floating,)): + clean[k] = float(v) + elif isinstance(v, np.ndarray): + clean[k] = v.tolist() + else: + clean[k] = v + result["runs"].append(clean) + + # Cross-run summary (weighted averages) + total_steps = sum(m.get("total_steps", 0) for m in all_metrics) + if total_steps > 0: + ws = lambda key: sum( + m.get("total_steps", 0) * m.get(key, 0) for m in all_metrics + ) / total_steps + + result["summary"] = { + "total_steps": total_steps, + "num_runs": len(all_metrics), + "weighted_solve_time_mean_us": ws("solve_time_mean_us"), + "weighted_solve_time_p95_us": ws("solve_time_p95_us"), + "weighted_solve_time_p99_us": ws("solve_time_p99_us"), + "weighted_cycle_time_mean_us": ws("cycle_time_mean_us"), + "weighted_cycle_time_p95_us": ws("cycle_time_p95_us"), + "weighted_cycle_time_p99_us": ws("cycle_time_p99_us"), + "weighted_optimal_rate_pct": ws("optimal_rate_pct"), + "weighted_deadline_miss_pct": ws("deadline_miss_pct"), + "weighted_position_rms_error": ws("position_rms_error"), + } + # Clean numpy types + for k, v in result["summary"].items(): + if isinstance(v, (np.integer,)): + result["summary"][k] = int(v) + elif isinstance(v, (np.floating,)): + result["summary"][k] = float(v) + + fname = Path(output_dir) / "benchmark_results.json" + with open(fname, "w") as f: + json.dump(result, f, indent=2) + + print(f" JSON saved to {fname}") + + # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- @@ -902,6 +1023,10 @@ def main(): help="Output directory for plots (default: results/)") parser.add_argument("--plot", action="store_true", help="Generate publication-quality plots") + parser.add_argument("--csv", action="store_true", + help="Export metrics to CSV (one row per run)") + parser.add_argument("--json", action="store_true", + help="Export metrics to JSON with cross-run summary") parser.add_argument("--demo", action="store_true", help="Run in demo mode with simulated data (no rosbag)") args = parser.parse_args() @@ -955,6 +1080,11 @@ def main(): print_report(all_metrics) + if args.csv: + export_csv(all_metrics, output_dir) + if args.json: + export_json(all_metrics, all_data, output_dir) + if args.plot or args.demo or not args.bags: args.plot = True From 5db3e1962f4bceff8d2dafa32d9af5a1ccb78b47 Mon Sep 17 00:00:00 2001 From: zhouyi <1529198419@qq.com> Date: Wed, 3 Jun 2026 21:56:18 +0800 Subject: [PATCH 2/3] feat: add --summary flag and benchmark reproduction guide - --summary prints cross-run weighted averages with std/min/max - README section: how to reproduce benchmarks with --csv --json --summary Co-Authored-By: Claude Opus 4.7 --- README.md | 31 +++++++++++++ scripts/benchmark_plot.py | 92 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/README.md b/README.md index 11ab52b..82028cb 100644 --- a/README.md +++ b/README.md @@ -253,6 +253,37 @@ confirmed under controlled conditions. - **Sporadic `PRIMAL_INFEASIBLE` bursts with sentinel slack values (P2 — RESOLVED via warm-start hardening)**: On rare cycles (0–14% per run) OSQP returned status −3 (`PRIMAL_INFEASIBLE`) with slack variables at a sentinel value of 2,143,289,344 (0x7fc00000, a quiet NaN in IEEE 754 single-precision). Once triggered, the bad ADMM state could cascade via warm start, causing contiguous failure blocks. **Root cause identified:** The receding-horizon warm-start shift treated the partitioned decision vector z = [U, ε] as a monolithic block, interleaving U and slack components during the shift. This corrupted the slack initial guess, driving OSQP into an invalid ADMM state that propagated across cycles. **Fix:** Partitioned the warm-start shift into independent U-block and ε-block shifts, with `allFinite()` guards and error-checked reset on solver failure. Post-fix benchmarks confirm **zero sentinel occurrences across 22,179 cycles** (6 runs). See [osqp_solver.cpp:143-171](src/osqp_solver.cpp#L143-L171) for the fix. - **Deadline misses**: In clean runs the cached condensed Hessian reduced mean cycle time from 7.41 ms (v0.2.0) to 4.00 ms in the original benchmark, and from 3.06 ms to 2.69 ms (−12%) in the paired A/B validation. The Hessian cache eliminates ~512K FLOPs/cycle of redundant matrix-matrix products, reducing per-cycle Eigen heap allocations from 17+ to ~3. Remaining contributors include WSL2 virtualization overhead, Gazebo scheduling, and solver polishing cost. +### Reproducing the Benchmark + +Prerequisites: ROS 2 Jazzy, Gazebo (gz_ros2_control), `rosbag2_py`, `numpy`, `matplotlib`. + +```bash +# 1. Build +source /opt/ros/jazzy/setup.bash +cd ros2_ws && colcon build --packages-select mpc_controller +source install/setup.bash + +# 2. Record a single run (60s) +ros2 launch mpc_controller rrbot_mpc.launch.py & +sleep 60 # wait for simulation to stabilize +ros2 bag record /mpc_controller/diagnostics -o bench_run_01 +kill %1 + +# 3. Analyze +python3 src/mpc_controller/scripts/benchmark_plot.py \ + --bags bench_run_01 --output results --plot --csv --json + +# 4. Multi-run summary (2+ runs) +python3 src/mpc_controller/scripts/benchmark_plot.py \ + --bags bench_run_01 bench_run_02 bench_run_03 \ + --output results --summary --csv --json +``` + +The `--csv` flag exports a spreadsheet-friendly table (one row per run). The +`--json` flag exports per-run details plus a weighted cross-run summary. +The `--summary` flag prints aggregated statistics with cross-run standard +deviation when 2+ bags are provided. + ## Dynamic Parameter Tuning Weights can be updated at runtime without restarting the controller. diff --git a/scripts/benchmark_plot.py b/scripts/benchmark_plot.py index e2c82bf..f3a25d8 100755 --- a/scripts/benchmark_plot.py +++ b/scripts/benchmark_plot.py @@ -895,6 +895,93 @@ def print_report(all_metrics: list): print() +# --------------------------------------------------------------------------- +# Repeated-run summary +# --------------------------------------------------------------------------- + +def print_summary(all_metrics: list): + """Print aggregated statistics across multiple benchmark runs. + + Shows weighted averages and cross-run variability (std, min, max) + for key metrics. + """ + if len(all_metrics) < 2: + print(" (Need 2+ runs for cross-run summary)") + return + + total_steps = sum(m.get("total_steps", 0) for m in all_metrics) + if total_steps == 0: + return + + def wmean(key): + return sum(m.get("total_steps", 0) * m.get(key, 0) + for m in all_metrics) / total_steps + + def across_runs(key): + vals = [m.get(key, 0) for m in all_metrics if key in m] + if not vals: + return None + arr = np.array(vals) + return { + "mean": float(np.mean(arr)), + "std": float(np.std(arr)), + "min": float(np.min(arr)), + "max": float(np.max(arr)), + } + + print("\n" + "=" * 85) + print(f" REPEATED-RUN SUMMARY ({len(all_metrics)} runs, {total_steps:,} total cycles)") + print("=" * 85) + + # Weighted averages (cycle-time weighted by run length) + weighted_metrics = [ + ("solve_time_mean_us", "Solve time mean"), + ("solve_time_p95_us", "Solve time P95"), + ("solve_time_p99_us", "Solve time P99"), + ("cycle_time_mean_us", "Cycle time mean"), + ("cycle_time_p95_us", "Cycle time P95"), + ("cycle_time_p99_us", "Cycle time P99"), + ("optimal_rate_pct", "Optimal solve rate"), + ("deadline_miss_pct", "Deadline miss rate"), + ("position_rms_error", "Position RMS error"), + ("hold_rate_pct", "Hold rate"), + ] + + print(f"\n {'Metric':<30} {'Weighted':>12} {'Run σ':>10} {'Min':>12} {'Max':>12}") + print(" " + "-" * 76) + + for key, label in weighted_metrics: + wm = wmean(key) + ar = across_runs(key) + if ar is None: + continue + # Format based on metric type + if "pct" in key: + fmt = lambda v: f"{v:.2f}%" + elif "rad" in key or "error" in key: + fmt = lambda v: f"{v:.3f} rad" + elif "us" in key: + fmt = lambda v: f"{v:.0f} µs" if v < 1000 else f"{v/1000:.2f} ms" + else: + fmt = lambda v: f"{v:.4f}" + + print(f" {label:<30} {fmt(wm):>12} {fmt(ar['std']):>10} " + f"{fmt(ar['min']):>12} {fmt(ar['max']):>12}") + + # Per-run opt rate + print(f"\n Per-run optimal solve rates:") + for m in all_metrics: + name = m.get("name", "?") + opt = m.get("optimal_rate_pct", 0) + steps = m.get("total_steps", 0) + dl = m.get("deadline_miss_pct", 0) + print(f" {name:<25} {opt:>6.1f}% ({steps:>5} cycles, " + f"DL miss {dl:.1f}%)") + + print("\n" + "=" * 85) + print() + + # --------------------------------------------------------------------------- # CSV / JSON export # --------------------------------------------------------------------------- @@ -1027,6 +1114,8 @@ def main(): help="Export metrics to CSV (one row per run)") parser.add_argument("--json", action="store_true", help="Export metrics to JSON with cross-run summary") + parser.add_argument("--summary", action="store_true", + help="Print repeated-run aggregated statistics (2+ bags)") parser.add_argument("--demo", action="store_true", help="Run in demo mode with simulated data (no rosbag)") args = parser.parse_args() @@ -1080,6 +1169,9 @@ def main(): print_report(all_metrics) + if args.summary and len(all_metrics) >= 2: + print_summary(all_metrics) + if args.csv: export_csv(all_metrics, output_dir) if args.json: From 09355d04ea00c536e70382eb67170000629b4d8f Mon Sep 17 00:00:00 2001 From: zhouyi <1529198419@qq.com> Date: Wed, 3 Jun 2026 21:59:54 +0800 Subject: [PATCH 3/3] docs: mark completed v0.2.2 benchmark tooling tasks Co-Authored-By: Claude Opus 4.7 --- ROADMAP.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 190a858..a2c282d 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -26,9 +26,10 @@ ### v0.2.2 — Runtime Characterization - [ ] Collect native Ubuntu 24.04 benchmark results ([#1](https://github.com/yeezhouyi/mpc_controller/issues/1)) -- [ ] Add P95 / P99 latency statistics -- [ ] Add repeated-run benchmark summary -- [ ] Export benchmark results to CSV / JSON +- [x] Add P95 / P99 latency statistics +- [x] Add repeated-run benchmark summary +- [x] Export benchmark results to CSV / JSON +- [x] Add benchmark reproduction guide - [ ] Update README benchmark table with native results - [ ] Document WSL2 vs native Linux timing limitations - [ ] ControllerUpdateStats integration