Benchmarks #28

Workflow file for this run

	name: Benchmarks

	on:
	push:
	branches: [main, master]
	pull_request:
	branches: [main, master]
	schedule:
	# Run benchmarks weekly on Sunday at 00:00 UTC
	- cron: '0 0 * * 0'
	workflow_dispatch:

	env:
	RUST_BACKTRACE: 1
	CARGO_TERM_COLOR: always

	jobs:
	benchmark:
	name: Run Benchmarks
	runs-on: ubuntu-latest
	timeout-minutes: 90
	permissions:
	contents: write
	pull-requests: write
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Install Rust toolchain
	uses: dtolnay/rust-toolchain@stable

	- name: Install Python dependencies
	run: \|
	python3 -m pip install --upgrade pip

	- name: Cache cargo registry
	uses: actions/cache@v4
	with:
	path: ~/.cargo/registry/index
	key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
	restore-keys: \|
	${{ runner.os }}-cargo-registry-

	- name: Cache cargo git
	uses: actions/cache@v4
	with:
	path: ~/.cargo/git
	key: ${{ runner.os }}-cargo-git-${{ hashFiles('**/Cargo.lock') }}
	restore-keys: \|
	${{ runner.os }}-cargo-git-

	- name: Cache target directory
	uses: actions/cache@v4
	with:
	path: target
	key: ${{ runner.os }}-target-bench-${{ hashFiles('**/Cargo.lock') }}
	restore-keys: \|
	${{ runner.os }}-target-bench-

	- name: Download baseline benchmarks from main branch
	if: github.event_name == 'pull_request'
	continue-on-error: true
	uses: dawidd6/action-download-artifact@v3
	with:
	workflow: bench.yml
	branch: ${{ github.base_ref }}
	name: benchmark-results-main
	path: baseline-results

	- name: Copy baseline to criterion directory
	if: github.event_name == 'pull_request' && hashFiles('baseline-results/criterion/**') != ''
	run: \|
	mkdir -p target/criterion-baseline
	if [ -d "baseline-results/criterion" ]; then
	cp -r baseline-results/criterion/* target/criterion-baseline/
	echo "Baseline benchmarks loaded successfully"
	else
	echo "No baseline benchmarks found"
	fi

	- name: Run benchmarks
	run: \|
	cd benches
	./run_benchmarks.sh

	- name: Generate benchmark report
	run: \|
	cd benches
	python3 generate_report.py --output BENCHMARKS.md

	- name: Generate comparison report for PR
	if: github.event_name == 'pull_request' && hashFiles('baseline-results/criterion/**') != ''
	continue-on-error: true
	run: \|
	cd benches

	# Create a markdown comparison report
	cat > comparison_report.md << 'EOF'
	## Benchmark Comparison Report

	Comparing PR benchmarks against the main branch baseline.

	EOF

	# Parse and compare results using Python
	python3 << 'PYTHON_SCRIPT'
	import json
	import sys
	from pathlib import Path
	from collections import defaultdict

	def parse_criterion(base_dir):
	results = {}
	criterion_dir = Path(base_dir)
	if not criterion_dir.exists():
	return results

	for estimates_file in criterion_dir.rglob("estimates.json"):
	try:
	with open(estimates_file, 'r') as f:
	data = json.load(f)

	# Get benchmark name from path structure
	rel_path = estimates_file.relative_to(criterion_dir)
	bench_name = str(rel_path.parent.parent)

	mean_ns = data.get("mean", {}).get("point_estimate", 0)
	results[bench_name] = mean_ns
	except Exception as e:
	print(f"Warning: {e}", file=sys.stderr)

	return results

	def format_time(ns):
	if ns < 1_000:
	return f"{ns:.2f} ns"
	elif ns < 1_000_000:
	return f"{ns / 1_000:.2f} µs"
	elif ns < 1_000_000_000:
	return f"{ns / 1_000_000:.2f} ms"
	else:
	return f"{ns / 1_000_000_000:.2f} s"

	# Parse both baseline and current
	baseline = parse_criterion("../target/criterion-baseline")
	current = parse_criterion("../target/criterion")

	if not baseline or not current:
	print("Unable to compare - missing results", file=sys.stderr)
	sys.exit(0)

	# Compare common benchmarks
	common = set(baseline.keys()) & set(current.keys())

	improvements = []
	regressions = []
	neutral = []

	for bench in sorted(common):
	base_val = baseline[bench]
	curr_val = current[bench]

	if base_val == 0:
	continue

	change_pct = ((curr_val - base_val) / base_val) * 100

	item = {
	'name': bench,
	'baseline': base_val,
	'current': curr_val,
	'change_pct': change_pct
	}

	if abs(change_pct) < 5.0:
	neutral.append(item)
	elif change_pct < 0:
	improvements.append(item)
	else:
	regressions.append(item)

	# Write comparison report
	with open('comparison_report.md', 'a') as f:
	f.write(f"### Summary\n\n")
	f.write(f"- Total benchmarks compared: {len(common)}\n")
	f.write(f"- Improvements: {len(improvements)} ✅\n")
	f.write(f"- Regressions: {len(regressions)} ⚠️\n")
	f.write(f"- Neutral (< 5% change): {len(neutral)}\n\n")

	if regressions:
	# Sort by absolute change percentage
	regressions.sort(key=lambda x: abs(x['change_pct']), reverse=True)

	f.write("### Performance Regressions ⚠️\n\n")
	f.write("\| Benchmark \| Baseline \| Current \| Change \|\n")
	f.write("\|-----------\|----------\|---------\|--------\|\n")

	for item in regressions[:10]: # Top 10
	f.write(f"\| {item['name']} \| {format_time(item['baseline'])} \| ")
	f.write(f"{format_time(item['current'])} \| ")
	f.write(f"+{item['change_pct']:.2f}% ⚠️ \|\n")

	f.write("\n")

	if improvements:
	improvements.sort(key=lambda x: abs(x['change_pct']), reverse=True)

	f.write("### Performance Improvements ✅\n\n")
	f.write("\| Benchmark \| Baseline \| Current \| Change \|\n")
	f.write("\|-----------\|----------\|---------\|--------\|\n")

	for item in improvements[:10]: # Top 10
	f.write(f"\| {item['name']} \| {format_time(item['baseline'])} \| ")
	f.write(f"{format_time(item['current'])} \| ")
	f.write(f"{item['change_pct']:.2f}% ✅ \|\n")

	f.write("\n")

	if not regressions and not improvements:
	f.write("No significant performance changes detected (all changes < 5%).\n\n")

	PYTHON_SCRIPT

	- name: Upload current benchmark results
	if: github.event_name == 'push' && (github.ref == 'refs/heads/main' \|\| github.ref == 'refs/heads/master')
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-main
	path: \|
	target/criterion/
	retention-days: 90

	- name: Upload PR benchmark results
	if: github.event_name == 'pull_request'
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-pr-${{ github.event.pull_request.number }}
	path: \|
	target/criterion/
	benches/BENCHMARKS.md
	benches/comparison_report.md
	retention-days: 30

	- name: Comment benchmark results on PR
	if: github.event_name == 'pull_request'
	uses: actions/github-script@v7
	with:
	script: \|
	const fs = require('fs');

	let comment = '## 📊 Benchmark Results\n\n';

	// Add comparison report if available
	const comparisonPath = 'benches/comparison_report.md';
	if (fs.existsSync(comparisonPath)) {
	const comparison = fs.readFileSync(comparisonPath, 'utf8');
	comment += comparison + '\n';
	} else {
	comment += '⚠️ No baseline benchmarks available for comparison.\n';
	comment += 'This is expected for the first PR or if baseline artifacts have expired.\n\n';
	}

	// Add link to full report
	comment += '---\n\n';
	comment += 'Full benchmark report: Available in the workflow artifacts\n';
	comment += `Commit: ${context.sha.substring(0, 7)}\n`;
	comment += `Run: [#${context.runNumber}](${context.payload.repository.html_url}/actions/runs/${context.runId})\n`;

	// Find and update existing comment or create new one
	const { data: comments } = await github.rest.issues.listComments({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	});

	const botComment = comments.find(comment =>
	comment.user.type === 'Bot' &&
	comment.body.includes('📊 Benchmark Results')
	);

	if (botComment) {
	await github.rest.issues.updateComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	comment_id: botComment.id,
	body: comment
	});
	console.log('Updated existing benchmark comment');
	} else {
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	body: comment
	});
	console.log('Created new benchmark comment');
	}

	- name: Upload benchmark report to main branch
	if: github.event_name == 'push' && (github.ref == 'refs/heads/main' \|\| github.ref == 'refs/heads/master')
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-report
	path: benches/BENCHMARKS.md
	retention-days: 90

	system-benchmarks:
	name: System Benchmarks
	runs-on: ubuntu-latest
	timeout-minutes: 60
	steps:
	- uses: actions/checkout@v4

	- name: Install Rust toolchain
	uses: dtolnay/rust-toolchain@stable

	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3

	- name: Build anode image
	uses: docker/build-push-action@v5
	with:
	context: .
	file: deploy/docker/Dockerfile
	push: false
	load: true
	tags: anode:bench

	- name: Set up test cluster
	run: \|
	cd deploy/docker
	docker-compose up -d --wait
	docker-compose ps

	- name: Wait for cluster ready
	run: \|
	timeout 120 bash -c 'until curl -f http://localhost:8080/health 2>/dev/null; do sleep 2; done'

	- name: Run system benchmarks
	run: \|
	cd benches
	cargo run --release --bin system-bench -- \
	--endpoint http://localhost:8080 \
	--duration 300 \
	--concurrency 10 \
	--output benchmark-results.json

	- name: Upload system benchmark results
	uses: actions/upload-artifact@v4
	with:
	name: system-benchmark-results-${{ github.run_number }}
	path: benches/benchmark-results.json

	- name: Cleanup
	if: always()
	run: \|
	cd deploy/docker
	docker-compose down -v

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Benchmarks #28

Workflow file

Benchmarks #28

Uh oh!

Workflow file for this run