From b187d5c4080d72de696c93902c2a5006c5e1e03b Mon Sep 17 00:00:00 2001 From: Hans Halverson Date: Thu, 18 Jun 2026 15:58:46 -0700 Subject: [PATCH] [tests] Third-party benchmark test suite that runs Octane, JetStream, and Web Tooling Benchmark --- .github/workflows/ci.yml | 7 +- Cargo.lock | 10 ++ Cargo.toml | 1 + src/benches/README.md | 2 +- tests/perf/.gitignore | 1 + tests/perf/Cargo.toml | 18 ++ tests/perf/README.md | 58 +++++++ tests/perf/install.sh | 67 +++++++ tests/perf/shims/octane.js | 12 ++ tests/perf/src/main.rs | 249 +++++++++++++++++++++++++++ tests/perf/src/report.rs | 144 ++++++++++++++++ tests/perf/src/runner.rs | 148 ++++++++++++++++ tests/perf/src/suite.rs | 126 ++++++++++++++ tests/perf/src/suites/jetstream.rs | 217 +++++++++++++++++++++++ tests/perf/src/suites/mod.rs | 3 + tests/perf/src/suites/octane.rs | 145 ++++++++++++++++ tests/perf/src/suites/web_tooling.rs | 145 ++++++++++++++++ 17 files changed, 1350 insertions(+), 3 deletions(-) create mode 100644 tests/perf/.gitignore create mode 100644 tests/perf/Cargo.toml create mode 100644 tests/perf/README.md create mode 100755 tests/perf/install.sh create mode 100644 tests/perf/shims/octane.js create mode 100644 tests/perf/src/main.rs create mode 100644 tests/perf/src/report.rs create mode 100644 tests/perf/src/runner.rs create mode 100644 tests/perf/src/suite.rs create mode 100644 tests/perf/src/suites/jetstream.rs create mode 100644 tests/perf/src/suites/mod.rs create mode 100644 tests/perf/src/suites/octane.rs create mode 100644 tests/perf/src/suites/web_tooling.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eea90326..77cf29bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -118,7 +118,7 @@ jobs: - name: Run integration tests on nightly without alloc_error feature enabled run: cargo brimstone-test --release --no-default-features --features nightly -- --reindex --ignore-unimplemented - build-benchmarks: + build-perf: runs-on: ubuntu-latest steps: - name: Checkout code @@ -126,7 +126,10 @@ jobs: - uses: Swatinem/rust-cache@82a92a6e8fbeee089604da2575dc567ae9ddeaab # v2.7.5 - - name: Build benchmarks + - name: Build performance test suite + run: cargo build -p brimstone_perf + + - name: Build benchmarks tests run: cargo bench --no-run build-fuzzer: diff --git a/Cargo.lock b/Cargo.lock index ab14f883..08b32688 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -132,6 +132,16 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "brimstone_perf" +version = "0.1.0" +dependencies = [ + "clap", + "regex", + "serde", + "serde_json", +] + [[package]] name = "brimstone_serialized_heap" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index e1695d3b..964466ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ members = [ "tests", "tests/fuzz", "tests/harness", + "tests/perf", ] resolver = "2" diff --git a/src/benches/README.md b/src/benches/README.md index 5e4e78c4..e85929e3 100644 --- a/src/benches/README.md +++ b/src/benches/README.md @@ -1,6 +1,6 @@ # Benchmarks -Brimstone's performance testing is found in this directory. +Brimstone's first party performance microbenchmark testing is found in this directory. ## Installation diff --git a/tests/perf/.gitignore b/tests/perf/.gitignore new file mode 100644 index 00000000..22d0d82f --- /dev/null +++ b/tests/perf/.gitignore @@ -0,0 +1 @@ +vendor diff --git a/tests/perf/Cargo.toml b/tests/perf/Cargo.toml new file mode 100644 index 00000000..9bc35e71 --- /dev/null +++ b/tests/perf/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "brimstone_perf" +version.workspace = true +authors.workspace = true +edition.workspace = true + +[[bin]] +name = "bs-perf" +path = "src/main.rs" + +[dependencies] +clap = { workspace = true, features = ["derive"] } +regex.workspace = true +serde = { workspace = true, features = ["derive"] } +serde_json.workspace = true + +[lints] +workspace = true diff --git a/tests/perf/README.md b/tests/perf/README.md new file mode 100644 index 00000000..a6311c2e --- /dev/null +++ b/tests/perf/README.md @@ -0,0 +1,58 @@ +# Performance suite + +Runs standard third-party JavaScript performance suites against brimstone. + +Supports [Octane](https://github.com/chromium/octane), [JetStream](https://github.com/WebKit/JetStream), and [Web Tooling Benchmark](https://github.com/v8/web-tooling-benchmark/). + +## Installation + +Suites are vendored into `vendor/` with `./install.sh`. Requires `git`, and `npm` for the Web Tooling Benchmark build. + +## Run + +``` +# Build bs in release mode and run Octane: +cargo run -p brimstone_perf -- --suite octane + +# Run only specific benchmarks (case-insensitive substring match, repeatable): +cargo run -p brimstone_perf -- --suite octane --bench richards --bench splay + +# Structured JSON, written to a file: +cargo run -p brimstone_perf -- --suite octane --format json --out octane.json +``` + +Useful flags: `--bs-path ` (use an existing binary instead of building), +`--vendor-dir `, `--format pretty|json`, `--out `, `--flamegraph []`. + +## Profiling a run (flamegraph) + +Pass `--flamegraph` to profile the `bs` process for a run and write a flamegraph SVG, +using the [`flamegraph`](https://github.com/flamegraph-rs/flamegraph) CLI. + +``` +# Initial setup: make sure flamegraph is installed +cargo install flamegraph + +# Run a single benchmark with profiling and write flamegraph output +cargo run --release -p brimstone_perf -- --suite octane --bench raytrace --flamegraph +``` + +Notes: + +- **Flamegraph arguments.** `--flamegraph-arg=` forwards any raw `flamegraph` argument +- **Output file.** `--flamegraph=` writes the output to a paricular file. Defaults to + `flamegraph.svg` if no file name is provided. +- **Build with symbols.** When `--flamegraph` is set and the harness builds `bs` itself, it + builds release *with debug info* (`CARGO_PROFILE_RELEASE_DEBUG=true`) so frames are named. + If you supply your own `--bs-path`, build it with debug symbols yourself. + +## How it works + +The harness runs `bs` as a subprocess and prints results to stdout. `bs` is run with: + +``` +bs --expose-test-shell-compat [-- ] +``` + +`--expose-test-shell-compat` installs the shell host functions that benchmark suites +expect. Everything after `--` is exposed as `globalThis.arguments`. diff --git a/tests/perf/install.sh b/tests/perf/install.sh new file mode 100755 index 00000000..74bb3fe8 --- /dev/null +++ b/tests/perf/install.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +# Installs the 3p performance suites into ./vendor. + +set -e + +CURRENT_DIR=$(cd "$(dirname "$0")" && pwd) +VENDOR_DIR="$CURRENT_DIR/vendor" + +# Pinned commits for each 3p suite +OCTANE_COMMIT=570ad1ccfe86e3eecba0636c8f932ac08edec517 +JETSTREAM_COMMIT=b7babdf323e64e69bd2f6c376189c15825f5c73a +WEB_TOOLING_COMMIT=4a12828c6a1eed02a70c011bd080445dd319a05f + +OCTANE_DIR="$VENDOR_DIR/octane" +JETSTREAM_DIR="$VENDOR_DIR/jetstream" +WEB_TOOLING_BENCHMARK_DIR="$VENDOR_DIR/web-tooling-benchmark" + +mkdir -p "$VENDOR_DIR" + +# Shallow-fetch a single pinned commit: clone_pinned . +clone_pinned() { + rm -rf "$1" && + git init -q "$1" && + git -C "$1" remote add origin "$2" && + git -C "$1" fetch -q --depth 1 origin "$3" && + git -C "$1" checkout -q FETCH_HEAD +} + +if [ ! -d "$OCTANE_DIR/.git" ]; then + echo "==> Installing Octane" + clone_pinned "$OCTANE_DIR" https://github.com/chromium/octane "$OCTANE_COMMIT" +else + echo "==> Octane already installed" +fi + +if [ ! -d "$JETSTREAM_DIR/.git" ]; then + echo "==> Installing JetStream" + clone_pinned "$JETSTREAM_DIR" https://github.com/WebKit/JetStream "$JETSTREAM_COMMIT" +else + echo "==> JetStream already installed" +fi + +WEB_TOOLING_BENCHMARKS="acorn babel babel-minify babylon buble chai coffeescript espree \ + esprima jshint lebab postcss prepack prettier source-map terser typescript uglify-js" + +if [ ! -f "$WEB_TOOLING_BENCHMARK_DIR/dist/cli-acorn.js" ]; then + echo "==> Installing Web Tooling Benchmark" + if [ ! -d "$WEB_TOOLING_BENCHMARK_DIR/.git" ]; then + clone_pinned "$WEB_TOOLING_BENCHMARK_DIR" https://github.com/v8/web-tooling-benchmark "$WEB_TOOLING_COMMIT" + fi + ( + cd "$WEB_TOOLING_BENCHMARK_DIR" + [ -d node_modules ] || npm ci + + # Build separate bundles for each individual benchmark so they can be run independently. + for bench in $WEB_TOOLING_BENCHMARKS; do + echo " building standalone bundle for $bench" + npm run build -- --env.only "$bench" + mv dist/cli.js "dist/cli-$bench.js" + done + ) +else + echo "==> Web Tooling Benchmark already installed" +fi + +echo "Done. Vendored suites are in $VENDOR_DIR" diff --git a/tests/perf/shims/octane.js b/tests/perf/shims/octane.js new file mode 100644 index 00000000..1b118b90 --- /dev/null +++ b/tests/perf/shims/octane.js @@ -0,0 +1,12 @@ +// Print easily parseable results from a particular run of Octane +BenchmarkSuite.RunSuites({ + NotifyResult: function (name, result) { + print("RESULT " + name + " " + result); + }, + NotifyError: function (name, error) { + print("ERROR " + name + " " + error); + }, + NotifyScore: function (score) { + print("SCORE " + score); + }, +}); diff --git a/tests/perf/src/main.rs b/tests/perf/src/main.rs new file mode 100644 index 00000000..3afb29a9 --- /dev/null +++ b/tests/perf/src/main.rs @@ -0,0 +1,249 @@ +mod report; +mod runner; +mod suite; +mod suites; + +use std::{ + path::{Path, PathBuf}, + process::Command, +}; + +use clap::{Parser, ValueEnum}; + +use crate::report::SuiteRun; +use crate::runner::Flamegraph; +use crate::suite::{BenchFilter, RunContext, all_suites, find_suite}; + +#[derive(Clone, Copy, ValueEnum)] +enum Format { + Pretty, + Json, +} + +#[derive(Parser)] +#[command(about = "Run standard JS performance suites against the brimstone `bs` engine")] +struct Args { + /// Suite(s) to run: octane, web-tooling, jetstream, or all. Repeatable. + #[arg(long, default_values_t = vec!["octane".to_string()])] + suite: Vec, + + /// Run only the benchmarks with this name. Repeatable. + #[arg(long)] + bench: Vec, + + /// Output format. + #[arg(long, value_enum, default_value_t = Format::Pretty)] + format: Format, + + /// Write output to this file instead of stdout. + #[arg(long)] + out: Option, + + /// Path to the brimstone executable. If omitted, builds and uses a release build. + #[arg(long)] + bs_path: Option, + + /// Directory where suites are installed. Defaults to the crate's vendor/ directory. + #[arg(long)] + vendor_dir: Option, + + /// Profile each `bs` run with the `flamegraph` CLI (must be on PATH), writing an SVG to + /// this path (default: flamegraph.svg) + #[arg(long, num_args = 0..=1, default_missing_value = "flamegraph.svg")] + flamegraph: Option, + + /// Extra raw argument forwarded to the `flamegraph` CLI, before the `--` separator. Repeatable. + #[arg(long)] + flamegraph_arg: Vec, +} + +fn main() { + let args = Args::parse(); + + let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let workspace_root = manifest_dir + .parent() + .and_then(Path::parent) + .unwrap_or(manifest_dir) + .to_path_buf(); + + let shims_dir = manifest_dir.join("shims"); + let vendor_dir = args + .vendor_dir + .clone() + .unwrap_or_else(|| manifest_dir.join("vendor")); + + let requested = resolve_suites(&args.suite); + let suite_specs = match requested { + Ok(s) => s, + Err(e) => { + eprintln!("error: {e}"); + std::process::exit(2); + } + }; + + let filter = BenchFilter::new(&args.bench); + + // Validate the flamegraph CLI up front rather than failing on the first `bs` spawn. + let flamegraph_config = match build_flamegraph_config(&args) { + Ok(cfg) => cfg, + Err(e) => { + eprintln!("error: {e}"); + std::process::exit(1); + } + }; + + // Locate or build bs (with debug symbols when profiling, for named frames). + let bs_path = match resolve_bs_path(&args, &workspace_root, flamegraph_config.is_some()) { + Ok(p) => p, + Err(e) => { + eprintln!("error: {e}"); + std::process::exit(1); + } + }; + + let ctx = RunContext { + bs_path: &bs_path, + vendor_dir: &vendor_dir, + shims_dir: &shims_dir, + filter: &filter, + flamegraph: flamegraph_config.as_ref(), + }; + + let mut runs: Vec = Vec::new(); + for spec in &suite_specs { + if !spec.is_available(&ctx) { + eprintln!( + "skipping {}: not installed under {} (run tests/perf/install.sh)", + spec.name(), + vendor_dir.join(spec.vendor_subdir()).display() + ); + continue; + } + eprintln!("running {}...", spec.name()); + runs.push(spec.run(&ctx)); + } + + if runs.is_empty() { + eprintln!("no suites ran; nothing to report"); + std::process::exit(1); + } + + let rendered = match args.format { + Format::Pretty => report::to_pretty(&runs), + Format::Json => report::to_json(&runs), + }; + + match &args.out { + Some(path) => { + if let Err(e) = std::fs::write(path, rendered) { + eprintln!("error: failed to write {}: {e}", path.display()); + std::process::exit(1); + } + eprintln!("wrote results to {}", path.display()); + } + None => println!("{rendered}"), + } +} + +/// Resolve suite names (including "all") to specs, in order and de-duplicated. +fn resolve_suites(names: &[String]) -> Result>, String> { + let mut selected: Vec> = Vec::new(); + let mut seen: Vec<&str> = Vec::new(); + + let push = |spec: Box, + seen: &mut Vec<&'static str>, + out: &mut Vec>| { + if !seen.contains(&spec.name()) { + seen.push(spec.name()); + out.push(spec); + } + }; + + for name in names { + if name == "all" { + for spec in all_suites() { + push(spec, &mut seen, &mut selected); + } + } else { + match find_suite(name) { + Some(spec) => push(spec, &mut seen, &mut selected), + None => { + let known: Vec<&str> = all_suites().iter().map(|s| s.name()).collect(); + return Err(format!( + "unknown suite '{name}'; known suites: {}, all", + known.join(", ") + )); + } + } + } + } + + Ok(selected) +} + +/// Path to the `bs` binary, building it in release if needed. `with_debug_symbols` adds +/// debug info (for readable flamegraph frames) while keeping release optimizations. +fn resolve_bs_path( + args: &Args, + workspace_root: &Path, + with_debug_symbols: bool, +) -> Result { + if let Some(path) = &args.bs_path { + if path.is_file() { + if with_debug_symbols { + eprintln!( + "note: profiling a pre-built --bs-path; for readable frames it should be \ + built with debug symbols (e.g. CARGO_PROFILE_RELEASE_DEBUG=true)" + ); + } + return Ok(path.clone()); + } + return Err(format!("--bs-path {} does not exist", path.display())); + } + + let default = workspace_root.join("target/release/bs"); + + let mut cmd = Command::new("cargo"); + cmd.current_dir(workspace_root) + .args(["build", "--release", "-p", "brimstone"]); + if with_debug_symbols { + // Keep release optimizations but emit debug info, so flamegraph frames are named. + eprintln!("building bs (release + debug symbols)..."); + cmd.env("CARGO_PROFILE_RELEASE_DEBUG", "true"); + } else { + eprintln!("building bs (release)..."); + } + let status = cmd + .status() + .map_err(|e| format!("failed to run cargo build: {e}"))?; + if !status.success() { + return Err("cargo build --release -p brimstone failed".to_string()); + } + + if default.is_file() { + Ok(default) + } else { + Err(format!( + "bs binary not found at {}; build it or pass --bs-path", + default.display() + )) + } +} + +fn build_flamegraph_config(args: &Args) -> Result, String> { + let Some(output) = args.flamegraph.clone() else { + if !args.flamegraph_arg.is_empty() { + eprintln!("note: --flamegraph-arg ignored without --flamegraph"); + } + return Ok(None); + }; + + if !runner::flamegraph_available() { + return Err("--flamegraph requires the `flamegraph` CLI on PATH; install it with \ + `cargo install flamegraph` (it uses perf on Linux, dtrace on macOS)" + .to_string()); + } + + Ok(Some(Flamegraph { output, extra_args: args.flamegraph_arg.clone() })) +} diff --git a/tests/perf/src/report.rs b/tests/perf/src/report.rs new file mode 100644 index 00000000..8cd9fe79 --- /dev/null +++ b/tests/perf/src/report.rs @@ -0,0 +1,144 @@ +use serde::Serialize; + +#[derive(Clone, Copy, Serialize)] +#[serde(rename_all = "lowercase")] +pub enum Status { + Ok, + /// Intentionally not run (e.g. needs an engine feature brimstone lacks). + Skipped, + /// Attempted but failed (crash, exception, or missing score in output). + Error, +} + +#[derive(Clone, Serialize)] +pub struct BenchResult { + pub name: String, + /// Score as reported by the suite. `None` for skipped/errored. + pub score: Option, + /// Suite-defined unit, e.g. "octane", "runs/s". + pub unit: String, + pub status: Status, + #[serde(skip_serializing_if = "Option::is_none")] + pub note: Option, +} + +impl BenchResult { + pub fn ok(name: impl Into, score: f64, unit: impl Into) -> Self { + BenchResult { + name: name.into(), + score: Some(score), + unit: unit.into(), + status: Status::Ok, + note: None, + } + } + + pub fn skipped( + name: impl Into, + unit: impl Into, + note: impl Into, + ) -> Self { + BenchResult { + name: name.into(), + score: None, + unit: unit.into(), + status: Status::Skipped, + note: Some(note.into()), + } + } + + pub fn error( + name: impl Into, + unit: impl Into, + note: impl Into, + ) -> Self { + BenchResult { + name: name.into(), + score: None, + unit: unit.into(), + status: Status::Error, + note: Some(note.into()), + } + } +} + +#[derive(Clone, Serialize)] +pub struct SuiteRun { + pub suite: String, + pub bs_path: String, + pub wall_clock_ms: f64, + pub results: Vec, + /// Overall score for the suite, as reported by the suite itself. + #[serde(skip_serializing_if = "Option::is_none")] + pub summary: Option, + #[serde(skip)] + pub pretty_score_precision: usize, +} + +pub fn to_json(runs: &[SuiteRun]) -> String { + serde_json::to_string_pretty(runs).expect("suite runs are serializable") +} + +/// Render the suite runs as a human-readable aligned table. +pub fn to_pretty(runs: &[SuiteRun]) -> String { + let mut out = String::new(); + for run in runs { + out.push_str(&format!("=== {} ===\n", run.suite)); + out.push_str(&format!("wall clock: {:.0} ms\n\n", run.wall_clock_ms)); + + let name_width = run + .results + .iter() + .map(|r| r.name.len()) + .chain(std::iter::once("Benchmark".len())) + .max() + .unwrap_or(9) + .max(9); + + out.push_str(&format!( + "{:14} {:<8} {}\n", + "Benchmark", + "Score", + "Status", + "Note", + width = name_width + )); + out.push_str(&format!("{}\n", "-".repeat(name_width + 14 + 8 + 10))); + + for r in &run.results { + let status = match r.status { + Status::Ok => "ok", + Status::Skipped => "skipped", + Status::Error => "error", + }; + out.push_str(&format!( + "{:14} {:<8} {}\n", + r.name, + format_score(r.score, run.pretty_score_precision), + status, + r.note.as_deref().unwrap_or(""), + width = name_width + )); + } + + if let Some(summary) = &run.summary { + out.push_str(&format!("{}\n", "-".repeat(name_width + 14 + 8 + 10))); + out.push_str(&format!( + "{:14} ({})\n", + summary.name, + format_score(summary.score, run.pretty_score_precision), + summary.unit, + width = name_width + )); + } + out.push('\n'); + } + out +} + +fn format_score(score: Option, precision: usize) -> String { + match score { + None => "-".to_string(), + Some(score) => format!("{score:.precision$}"), + } +} diff --git a/tests/perf/src/runner.rs b/tests/perf/src/runner.rs new file mode 100644 index 00000000..d13ecd90 --- /dev/null +++ b/tests/perf/src/runner.rs @@ -0,0 +1,148 @@ +use std::{ + path::{Path, PathBuf}, + process::Command, + sync::atomic::{AtomicU64, Ordering}, + time::{Duration, Instant}, +}; + +pub struct BsOutput { + pub stdout: String, + pub stderr: String, + pub success: bool, + pub wall_clock_ms: f64, + pub timed_out: bool, +} + +/// Profiles `bs` via the [`flamegraph`](https://github.com/flamegraph-rs/flamegraph) CLI, +/// writing an SVG to `output`. +#[derive(Clone)] +pub struct Flamegraph { + pub output: PathBuf, + pub extra_args: Vec, +} + +impl Flamegraph { + pub fn labeled(&self, label: &str) -> Flamegraph { + let sanitized: String = label + .chars() + .map(|c| { + if c.is_ascii_alphanumeric() || c == '-' || c == '_' { + c + } else { + '-' + } + }) + .collect(); + let stem = self + .output + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("flamegraph"); + let ext = self + .output + .extension() + .and_then(|s| s.to_str()) + .unwrap_or("svg"); + Flamegraph { + output: self + .output + .with_file_name(format!("{stem}.{sanitized}.{ext}")), + extra_args: self.extra_args.clone(), + } + } +} + +pub fn flamegraph_available() -> bool { + Command::new("flamegraph") + .arg("--version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .map(|s| s.success()) + .unwrap_or(false) +} + +pub fn run_bs( + bs_path: &Path, + flags: &[String], + files: &[PathBuf], + flamegraph: Option<&Flamegraph>, +) -> std::io::Result { + run_bs_with_timeout(bs_path, flags, files, &[], None, flamegraph) +} + +pub fn run_bs_with_timeout( + bs_path: &Path, + flags: &[String], + files: &[PathBuf], + script_args: &[String], + timeout: Option, + flamegraph: Option<&Flamegraph>, +) -> std::io::Result { + static COUNTER: AtomicU64 = AtomicU64::new(0); + let id = COUNTER.fetch_add(1, Ordering::Relaxed); + let base = std::env::temp_dir().join(format!("bs-perf-{}-{}", std::process::id(), id)); + let out_path = base.with_extension("out"); + let err_path = base.with_extension("err"); + + // `flamegraph -- bs `: flamegraph forwards everything after `--` to bs verbatim, + // including the second `--` that hands `script_args` to bs. + let mut cmd = match flamegraph { + Some(fg) => { + eprintln!(" profiling -> {}", fg.output.display()); + let mut cmd = Command::new("flamegraph"); + cmd.arg("-o").arg(&fg.output); + cmd.args(&fg.extra_args); + cmd.arg("--"); + cmd.arg(bs_path); + cmd + } + None => Command::new(bs_path), + }; + cmd.args(flags); + for file in files { + cmd.arg(file); + } + if !script_args.is_empty() { + cmd.arg("--"); + cmd.args(script_args); + } + cmd.stdout(std::fs::File::create(&out_path)?) + .stderr(std::fs::File::create(&err_path)?); + + let start = Instant::now(); + let mut child = cmd.spawn()?; + + let mut timed_out = false; + let status = loop { + if let Some(status) = child.try_wait()? { + break status; + } + if let Some(timeout) = timeout { + if start.elapsed() > timeout { + let _ = child.kill(); + timed_out = true; + break child.wait()?; + } + } + std::thread::sleep(Duration::from_millis(50)); + }; + + let wall_clock_ms = start.elapsed().as_secs_f64() * 1000.0; + let stdout = std::fs::read_to_string(&out_path).unwrap_or_default(); + let mut stderr = std::fs::read_to_string(&err_path).unwrap_or_default(); + let _ = std::fs::remove_file(&out_path); + let _ = std::fs::remove_file(&err_path); + + if timed_out { + stderr.push_str(&format!("\nkilled: exceeded {:?} timeout", timeout.unwrap())); + } + + Ok(BsOutput { + stdout, + stderr, + success: status.success() && !timed_out, + wall_clock_ms, + timed_out, + }) +} diff --git a/tests/perf/src/suite.rs b/tests/perf/src/suite.rs new file mode 100644 index 00000000..dc0c5fbc --- /dev/null +++ b/tests/perf/src/suite.rs @@ -0,0 +1,126 @@ +use std::path::{Path, PathBuf}; + +use crate::report::{BenchResult, SuiteRun}; +use crate::runner::Flamegraph; +use crate::suites; + +/// Case-insensitive substring filter over benchmark names; an empty filter matches all. +#[derive(Default)] +pub struct BenchFilter { + terms: Vec, +} + +impl BenchFilter { + pub fn new(terms: &[String]) -> Self { + BenchFilter { terms: terms.iter().map(|t| t.to_lowercase()).collect() } + } + + pub fn is_empty(&self) -> bool { + self.terms.is_empty() + } + + pub fn matches(&self, name: &str) -> bool { + if self.is_empty() { + return true; + } + let name = name.to_lowercase(); + self.terms.iter().any(|t| name.contains(t)) + } +} + +/// Inputs shared by every suite run. +pub struct RunContext<'a> { + pub bs_path: &'a Path, + pub vendor_dir: &'a Path, + pub shims_dir: &'a Path, + /// Which benchmarks to run; empty = all. + pub filter: &'a BenchFilter, + pub flamegraph: Option<&'a Flamegraph>, +} + +impl RunContext<'_> { + pub fn suite_dir(&self, subdir: &str) -> PathBuf { + self.vendor_dir.join(subdir) + } + + pub fn shim(&self, file: &str) -> PathBuf { + self.shims_dir.join(file) + } + + /// The flamegraph spec for a profiled `bs` invocation, or `None` if profiling is off. + /// Pass a `label` (e.g. a benchmark name) when a suite spawns `bs` more than once. + pub fn flamegraph(&self, label: Option<&str>) -> Option { + self.flamegraph.map(|base| match label { + Some(label) => base.labeled(label), + None => base.clone(), + }) + } + + pub fn suite_run( + &self, + suite: &impl SuiteSpec, + wall_clock_ms: f64, + results: Vec, + summary: Option, + ) -> SuiteRun { + SuiteRun { + suite: suite.name().to_string(), + bs_path: self.bs_path.display().to_string(), + wall_clock_ms, + results, + summary, + pretty_score_precision: suite.pretty_score_precision(), + } + } + + pub fn single_error_run( + &self, + suite: &impl SuiteSpec, + unit: &str, + message: impl Into, + ) -> SuiteRun { + self.suite_run(suite, 0.0, vec![BenchResult::error("", unit, message)], None) + } +} + +pub trait SuiteSpec { + fn name(&self) -> &'static str; + + fn vendor_subdir(&self) -> &'static str; + + fn is_available(&self, ctx: &RunContext) -> bool; + + fn pretty_score_precision(&self) -> usize; + + fn run(&self, ctx: &RunContext) -> SuiteRun; +} + +pub fn all_suites() -> Vec> { + vec![ + Box::new(suites::octane::Octane), + Box::new(suites::web_tooling::WebTooling), + Box::new(suites::jetstream::JetStream), + ] +} + +pub fn find_suite(name: &str) -> Option> { + all_suites().into_iter().find(|s| s.name() == name) +} + +/// The first non-blank line of `s` (trimmed), if any. +pub fn first_nonempty_line(s: &str) -> Option<&str> { + s.lines().map(str::trim).find(|l| !l.is_empty()) +} + +/// Geometric mean of the scored results (the suites' own scoring), or `None` if nothing +/// scored. Reported as a "Total" carrying the unit of the first result. +pub fn geomean_summary(results: &[BenchResult]) -> Option { + let scores: Vec = results.iter().filter_map(|r| r.score).collect(); + if scores.is_empty() { + return None; + } + let unit = results.first().map_or("", |r| r.unit.as_str()); + let sum_ln: f64 = scores.iter().map(|s| s.ln()).sum(); + let geomean = (sum_ln / scores.len() as f64).exp(); + Some(BenchResult::ok("Total", geomean, unit)) +} diff --git a/tests/perf/src/suites/jetstream.rs b/tests/perf/src/suites/jetstream.rs new file mode 100644 index 00000000..ecf7fca2 --- /dev/null +++ b/tests/perf/src/suites/jetstream.rs @@ -0,0 +1,217 @@ +use std::{path::Path, time::Duration}; + +use serde_json::Value as Json; + +use crate::report::{BenchResult, SuiteRun}; +use crate::runner::run_bs_with_timeout; +use crate::suite::{RunContext, SuiteSpec, first_nonempty_line, geomean_summary}; + +const UNIT: &str = "jetstream"; + +/// JetStream's shell entrypoint. +const CLI: &str = "cli.js"; + +/// `iteration-count` must exceed `worst-case-count`; kept modest so a full run is tractable. +const ITERATION_COUNT: &str = "--iteration-count=8"; +const WORST_CASE_COUNT: &str = "--worst-case-count=2"; + +const BENCH_TIMEOUT: Duration = Duration::from_secs(90); + +/// All individual benchmarks in this suite. +const BENCHMARKS: &[&str] = &[ + "8bitbench-wasm", + "acorn-wtb", + "ai-astar", + "Air", + "argon2-wasm", + "async-fs", + "babel-minify-wtb", + "babel-wtb", + "Babylon", + "babylon-wtb", + "babylonjs-scene-es6", + "babylonjs-startup-es6", + "Basic", + "bigint-noble-ed25519", + "Box2D", + "cdjs", + "chai-wtb", + "crypto", + "Dart-flute-todomvc-wasm", + "delta-blue", + "dotnet-aot-wasm", + "dotnet-interp-wasm", + "doxbee-async", + "doxbee-promise", + "earley-boyer", + "espree-wtb", + "esprima-next-wtb", + "first-inspector-code-load", + "FlightPlanner", + "gaussian-blur", + "gbemu", + "hash-map", + "j2cl-box2d-wasm", + "js-tokens", + "jsdom-d3-startup", + "json-parse-inspector", + "json-stringify-inspector", + "Kotlin-compose-wasm", + "lazy-collections", + "mandreel", + "ML", + "mobx-startup", + "multi-inspector-code-load", + "navier-stokes", + "octane-code-load", + "OfflineAssembler", + "pdfjs", + "postcss-wtb", + "prettier-wtb", + "prismjs-startup-es6", + "proxy-mobx", + "proxy-vue", + "raytrace", + "raytrace-private-class-fields", + "raytrace-public-class-fields", + "regexp-octane", + "richards", + "richards-wasm", + "source-map-wtb", + "splay", + "sqlite3-wasm", + "stanford-crypto-aes", + "stanford-crypto-pbkdf2", + "stanford-crypto-sha256", + "Sunspider", + "sync-fs", + "threejs", + "transformersjs-bert-wasm", + "tsf-wasm", + "typescript-lib", + "UniPoker", + "validatorjs", + "web-ssr", + "WSL", + "zlib-wasm", +]; + +pub struct JetStream; + +impl SuiteSpec for JetStream { + fn name(&self) -> &'static str { + "jetstream" + } + + fn vendor_subdir(&self) -> &'static str { + "jetstream" + } + + fn is_available(&self, ctx: &RunContext) -> bool { + ctx.suite_dir(self.vendor_subdir()).join(CLI).is_file() + } + + fn pretty_score_precision(&self) -> usize { + 2 + } + + fn run(&self, ctx: &RunContext) -> SuiteRun { + let cli = ctx.suite_dir(self.vendor_subdir()).join(CLI); + + let selected: Vec<&str> = BENCHMARKS + .iter() + .copied() + .filter(|name| ctx.filter.matches(name)) + .collect(); + let mut results = Vec::new(); + let mut wall_clock_ms = 0.0; + for (index, name) in selected.iter().enumerate() { + eprintln!(" [{}/{}] {name}", index + 1, selected.len()); + let (result, ms) = run_one(ctx, &cli, name); + wall_clock_ms += ms; + results.push(result); + } + + let summary = geomean_summary(&results); + + ctx.suite_run(self, wall_clock_ms, results, summary) + } +} + +fn run_one(ctx: &RunContext, cli: &Path, name: &str) -> (BenchResult, f64) { + let script_args = [ + format!("--test={name}"), + "--no-prefetch".to_string(), + "--dump-json-results".to_string(), + "--force-gc".to_string(), + ITERATION_COUNT.to_string(), + WORST_CASE_COUNT.to_string(), + ]; + + let flamegraph = ctx.flamegraph(Some(name)); + let output = match run_bs_with_timeout( + ctx.bs_path, + &shell_flags(), + &[cli.to_path_buf()], + &script_args, + Some(BENCH_TIMEOUT), + flamegraph.as_ref(), + ) { + Ok(output) => output, + Err(e) => return (BenchResult::error(name, UNIT, format!("failed to spawn bs: {e}")), 0.0), + }; + + if let Some(score) = parse_score(&output.stdout, name) { + return (BenchResult::ok(name, score, UNIT), output.wall_clock_ms); + } + + if output.timed_out { + return ( + BenchResult::error(name, UNIT, format!("timed out after {BENCH_TIMEOUT:?}")), + output.wall_clock_ms, + ); + } + + // WebAssembly benchmarks are skipped, anything else is a hard error. + let detail = failure_detail(&output.stdout, &output.stderr); + let result = if name.ends_with("-wasm") || detail.contains("WebAssembly") { + BenchResult::skipped(name, UNIT, "requires WebAssembly") + } else { + BenchResult::error(name, UNIT, detail) + }; + (result, output.wall_clock_ms) +} + +/// Score lives at `.tests..metrics.Score.current[0]`. +fn parse_score(stdout: &str, name: &str) -> Option { + let line = stdout + .lines() + .map(str::trim) + .find(|line| line.starts_with('{') && line.contains("JetStream"))?; + let json: Json = serde_json::from_str(line).ok()?; + + // The top-level key carries a version (e.g. "JetStream3.0"); take the first object. + let root = json.as_object()?.values().next()?; + root.get("tests")? + .get(name)? + .get("metrics")? + .get("Score")? + .get("current")? + .get(0)? + .as_f64() +} + +fn failure_detail(stdout: &str, stderr: &str) -> String { + stdout + .lines() + .map(str::trim) + .find(|l| l.contains("failed:") || l.contains("Error")) + .or_else(|| first_nonempty_line(stderr)) + .or_else(|| first_nonempty_line(stdout)) + .unwrap_or("no score produced") + .to_string() +} + +fn shell_flags() -> Vec { + vec!["--expose-test-shell-compat".to_string()] +} diff --git a/tests/perf/src/suites/mod.rs b/tests/perf/src/suites/mod.rs new file mode 100644 index 00000000..6f880491 --- /dev/null +++ b/tests/perf/src/suites/mod.rs @@ -0,0 +1,3 @@ +pub mod jetstream; +pub mod octane; +pub mod web_tooling; diff --git a/tests/perf/src/suites/octane.rs b/tests/perf/src/suites/octane.rs new file mode 100644 index 00000000..3e09c670 --- /dev/null +++ b/tests/perf/src/suites/octane.rs @@ -0,0 +1,145 @@ +use std::path::PathBuf; + +use crate::report::{BenchResult, SuiteRun}; +use crate::runner::run_bs; +use crate::suite::{RunContext, SuiteSpec, first_nonempty_line}; + +const UNIT: &str = "octane"; + +/// Octane 2.0 benchmark groups and their source files, in load order (mirrors Octane's +/// `run.js`). A group runs only if all its files are present in the vendored checkout. +const BENCHMARKS: &[(&str, &[&str])] = &[ + ("Richards", &["richards.js"]), + ("DeltaBlue", &["deltablue.js"]), + ("Crypto", &["crypto.js"]), + ("RayTrace", &["raytrace.js"]), + ("EarleyBoyer", &["earley-boyer.js"]), + ("RegExp", &["regexp.js"]), + ("Splay", &["splay.js"]), + ("NavierStokes", &["navier-stokes.js"]), + ("PdfJS", &["pdfjs.js"]), + ("Mandreel", &["mandreel.js"]), + ("Gameboy", &["gbemu-part1.js", "gbemu-part2.js"]), + ("CodeLoad", &["code-load.js"]), + ("Box2D", &["box2d.js"]), + ("zlib", &["zlib.js", "zlib-data.js"]), + ( + "Typescript", + &[ + "typescript.js", + "typescript-input.js", + "typescript-compiler.js", + ], + ), +]; + +pub struct Octane; + +impl SuiteSpec for Octane { + fn name(&self) -> &'static str { + "octane" + } + + fn vendor_subdir(&self) -> &'static str { + "octane" + } + + fn is_available(&self, ctx: &RunContext) -> bool { + ctx.suite_dir(self.vendor_subdir()) + .join("base.js") + .is_file() + } + + fn pretty_score_precision(&self) -> usize { + 0 + } + + fn run(&self, ctx: &RunContext) -> SuiteRun { + let suite_dir = ctx.suite_dir(self.vendor_subdir()); + + // Loaded in order: base.js, the selected benchmark files, then the driver. + let mut files: Vec = vec![suite_dir.join("base.js")]; + let mut skipped: Vec = Vec::new(); + let mut loaded_groups = 0; + + for (group, group_files) in BENCHMARKS { + if !ctx.filter.matches(group) { + continue; + } + + let paths: Vec = group_files.iter().map(|f| suite_dir.join(f)).collect(); + if paths.iter().all(|p| p.is_file()) { + files.extend(paths); + loaded_groups += 1; + } else { + skipped.push(BenchResult::skipped( + *group, + UNIT, + "benchmark source not found in vendored checkout", + )); + } + } + + // Don't spawn bs with no benchmarks; the driver would run zero suites. + if loaded_groups == 0 { + return ctx.suite_run(self, 0.0, skipped, None); + } + + files.push(ctx.shim("octane.js")); + + let flags = vec!["--expose-test-shell-compat".to_string()]; + let output = match run_bs(ctx.bs_path, &flags, &files, ctx.flamegraph(None).as_ref()) { + Ok(o) => o, + Err(e) => { + return ctx.single_error_run(self, UNIT, format!("failed to spawn bs: {e}")); + } + }; + + let (mut results, summary) = parse_octane_output(&output.stdout); + + if summary.is_none() && !output.success { + let detail = first_nonempty_line(&output.stderr) + .unwrap_or("bs exited unsuccessfully with no score"); + results.push(BenchResult::error("", UNIT, detail.to_string())); + } + + results.extend(skipped); + + ctx.suite_run(self, output.wall_clock_ms, results, summary) + } +} + +/// Parse the `RESULT`/`ERROR`/`SCORE` lines emitted by octane.js +fn parse_octane_output(stdout: &str) -> (Vec, Option) { + let mut results = Vec::new(); + let mut summary = None; + + for line in stdout.lines() { + let mut parts = line.splitn(3, ' '); + match parts.next() { + Some("RESULT") => { + if let (Some(name), Some(score)) = (parts.next(), parts.next()) { + if let Ok(score) = score.trim().parse::() { + results.push(BenchResult::ok(name, score, UNIT)); + } + } + } + Some("ERROR") => { + if let Some(name) = parts.next() { + let msg = parts.next().unwrap_or("").to_string(); + results.push(BenchResult::error(name, UNIT, msg)); + } + } + Some("SCORE") => { + if let Some(score) = parts.next() { + if let Ok(score) = score.trim().parse::() { + summary = Some(BenchResult::ok("Total", score, UNIT)); + } + } + } + _ => {} + } + } + + (results, summary) +} diff --git a/tests/perf/src/suites/web_tooling.rs b/tests/perf/src/suites/web_tooling.rs new file mode 100644 index 00000000..2987753d --- /dev/null +++ b/tests/perf/src/suites/web_tooling.rs @@ -0,0 +1,145 @@ +use std::path::Path; + +use regex::Regex; + +use crate::report::{BenchResult, SuiteRun}; +use crate::runner::run_bs; +use crate::suite::{RunContext, SuiteSpec, first_nonempty_line, geomean_summary}; + +const UNIT: &str = "runs/s"; + +/// All individual benchmarks in this suite. +const BENCHMARKS: &[&str] = &[ + "acorn", + "babel", + "babel-minify", + "babylon", + "buble", + "chai", + "coffeescript", + "espree", + "esprima", + "jshint", + "lebab", + "postcss", + "prepack", + "prettier", + "source-map", + "terser", + "typescript", + "uglify-js", +]; + +pub struct WebTooling; + +impl SuiteSpec for WebTooling { + fn name(&self) -> &'static str { + "web-tooling" + } + + fn vendor_subdir(&self) -> &'static str { + "web-tooling-benchmark" + } + + fn is_available(&self, ctx: &RunContext) -> bool { + // The per-tool bundles are what we run; check the first as a sentinel. + ctx.suite_dir(self.vendor_subdir()) + .join(bundle_name(BENCHMARKS[0])) + .is_file() + } + + fn pretty_score_precision(&self) -> usize { + 2 + } + + fn run(&self, ctx: &RunContext) -> SuiteRun { + let suite_dir = ctx.suite_dir(self.vendor_subdir()); + + // Run each benchmark in its own bundle (a fresh `bs` process). The combined bundle + // runs all 18 tools in one process, whose cumulative memory exceeds bs's heap by + // `typescript` and fatally OOMs, dropping the rest; per-tool runs avoid that and + // isolate failures. + let mut results = Vec::new(); + let mut wall_clock_ms = 0.0; + for bench in BENCHMARKS + .iter() + .copied() + .filter(|bench| ctx.filter.matches(bench)) + { + let bundle = suite_dir.join(bundle_name(bench)); + let (result, ms) = run_one(ctx, bench, &bundle); + wall_clock_ms += ms; + results.push(result); + } + + // Overall geometric mean only for a full run; a --bench subset isn't the suite metric. + let summary = if ctx.filter.is_empty() { + geomean_summary(&results) + } else { + None + }; + ctx.suite_run(self, wall_clock_ms, results, summary) + } +} + +/// Path (relative to the vendored dir) of the standalone bundle for `bench`. +fn bundle_name(bench: &str) -> String { + format!("dist/cli-{bench}.js") +} + +fn run_one(ctx: &RunContext, bench: &str, bundle: &Path) -> (BenchResult, f64) { + if !bundle.is_file() { + return ( + BenchResult::error(bench, UNIT, "standalone bundle missing; re-run install.sh"), + 0.0, + ); + } + + let output = match run_bs( + ctx.bs_path, + &flags(), + &[bundle.to_path_buf()], + ctx.flamegraph(Some(bench)).as_ref(), + ) { + Ok(o) => o, + Err(e) => { + return (BenchResult::error(bench, UNIT, format!("failed to spawn bs: {e}")), 0.0); + } + }; + + // Only one benchmark is run so take the single parsed result. + let (results, _) = parse_web_tooling_output(&output.stdout); + let result = results.into_iter().next().unwrap_or_else(|| { + let detail = first_nonempty_line(&output.stderr).unwrap_or("no result produced"); + BenchResult::error(bench, UNIT, detail.to_string()) + }); + (result, output.wall_clock_ms) +} + +/// Parse lines of the form `: runs/s` plus the trailing geometric mean. +fn parse_web_tooling_output(stdout: &str) -> (Vec, Option) { + let regex = Regex::new(r"^\s*(.+?):\s+([\d.]+)\s+runs/s").unwrap(); + let mut results = Vec::new(); + let mut summary = None; + + for line in stdout.lines() { + if let Some(captures) = regex.captures(line) { + let name = captures[1].trim().to_string(); + let score: f64 = match captures[2].parse() { + Ok(s) => s, + Err(_) => continue, + }; + if name.eq_ignore_ascii_case("geometric mean") { + summary = Some(BenchResult::ok("Total", score, UNIT)); + } else { + results.push(BenchResult::ok(name, score, UNIT)); + } + } + } + + (results, summary) +} + +fn flags() -> Vec { + vec!["--expose-test-shell-compat".to_string()] +}