Benchmarks #28
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmarks | |
| on: | |
| push: | |
| branches: [main, master] | |
| pull_request: | |
| branches: [main, master] | |
| schedule: | |
| # Run benchmarks weekly on Sunday at 00:00 UTC | |
| - cron: '0 0 * * 0' | |
| workflow_dispatch: | |
| env: | |
| RUST_BACKTRACE: 1 | |
| CARGO_TERM_COLOR: always | |
| jobs: | |
| benchmark: | |
| name: Run Benchmarks | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 90 | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Install Rust toolchain | |
| uses: dtolnay/rust-toolchain@stable | |
| - name: Install Python dependencies | |
| run: | | |
| python3 -m pip install --upgrade pip | |
| - name: Cache cargo registry | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cargo/registry/index | |
| key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} | |
| restore-keys: | | |
| ${{ runner.os }}-cargo-registry- | |
| - name: Cache cargo git | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cargo/git | |
| key: ${{ runner.os }}-cargo-git-${{ hashFiles('**/Cargo.lock') }} | |
| restore-keys: | | |
| ${{ runner.os }}-cargo-git- | |
| - name: Cache target directory | |
| uses: actions/cache@v4 | |
| with: | |
| path: target | |
| key: ${{ runner.os }}-target-bench-${{ hashFiles('**/Cargo.lock') }} | |
| restore-keys: | | |
| ${{ runner.os }}-target-bench- | |
| - name: Download baseline benchmarks from main branch | |
| if: github.event_name == 'pull_request' | |
| continue-on-error: true | |
| uses: dawidd6/action-download-artifact@v3 | |
| with: | |
| workflow: bench.yml | |
| branch: ${{ github.base_ref }} | |
| name: benchmark-results-main | |
| path: baseline-results | |
| - name: Copy baseline to criterion directory | |
| if: github.event_name == 'pull_request' && hashFiles('baseline-results/criterion/**') != '' | |
| run: | | |
| mkdir -p target/criterion-baseline | |
| if [ -d "baseline-results/criterion" ]; then | |
| cp -r baseline-results/criterion/* target/criterion-baseline/ | |
| echo "Baseline benchmarks loaded successfully" | |
| else | |
| echo "No baseline benchmarks found" | |
| fi | |
| - name: Run benchmarks | |
| run: | | |
| cd benches | |
| ./run_benchmarks.sh | |
| - name: Generate benchmark report | |
| run: | | |
| cd benches | |
| python3 generate_report.py --output BENCHMARKS.md | |
| - name: Generate comparison report for PR | |
| if: github.event_name == 'pull_request' && hashFiles('baseline-results/criterion/**') != '' | |
| continue-on-error: true | |
| run: | | |
| cd benches | |
| # Create a markdown comparison report | |
| cat > comparison_report.md << 'EOF' | |
| ## Benchmark Comparison Report | |
| Comparing PR benchmarks against the main branch baseline. | |
| EOF | |
| # Parse and compare results using Python | |
| python3 << 'PYTHON_SCRIPT' | |
| import json | |
| import sys | |
| from pathlib import Path | |
| from collections import defaultdict | |
| def parse_criterion(base_dir): | |
| results = {} | |
| criterion_dir = Path(base_dir) | |
| if not criterion_dir.exists(): | |
| return results | |
| for estimates_file in criterion_dir.rglob("estimates.json"): | |
| try: | |
| with open(estimates_file, 'r') as f: | |
| data = json.load(f) | |
| # Get benchmark name from path structure | |
| rel_path = estimates_file.relative_to(criterion_dir) | |
| bench_name = str(rel_path.parent.parent) | |
| mean_ns = data.get("mean", {}).get("point_estimate", 0) | |
| results[bench_name] = mean_ns | |
| except Exception as e: | |
| print(f"Warning: {e}", file=sys.stderr) | |
| return results | |
| def format_time(ns): | |
| if ns < 1_000: | |
| return f"{ns:.2f} ns" | |
| elif ns < 1_000_000: | |
| return f"{ns / 1_000:.2f} µs" | |
| elif ns < 1_000_000_000: | |
| return f"{ns / 1_000_000:.2f} ms" | |
| else: | |
| return f"{ns / 1_000_000_000:.2f} s" | |
| # Parse both baseline and current | |
| baseline = parse_criterion("../target/criterion-baseline") | |
| current = parse_criterion("../target/criterion") | |
| if not baseline or not current: | |
| print("Unable to compare - missing results", file=sys.stderr) | |
| sys.exit(0) | |
| # Compare common benchmarks | |
| common = set(baseline.keys()) & set(current.keys()) | |
| improvements = [] | |
| regressions = [] | |
| neutral = [] | |
| for bench in sorted(common): | |
| base_val = baseline[bench] | |
| curr_val = current[bench] | |
| if base_val == 0: | |
| continue | |
| change_pct = ((curr_val - base_val) / base_val) * 100 | |
| item = { | |
| 'name': bench, | |
| 'baseline': base_val, | |
| 'current': curr_val, | |
| 'change_pct': change_pct | |
| } | |
| if abs(change_pct) < 5.0: | |
| neutral.append(item) | |
| elif change_pct < 0: | |
| improvements.append(item) | |
| else: | |
| regressions.append(item) | |
| # Write comparison report | |
| with open('comparison_report.md', 'a') as f: | |
| f.write(f"### Summary\n\n") | |
| f.write(f"- Total benchmarks compared: **{len(common)}**\n") | |
| f.write(f"- Improvements: **{len(improvements)}** ✅\n") | |
| f.write(f"- Regressions: **{len(regressions)}** ⚠️\n") | |
| f.write(f"- Neutral (< 5% change): **{len(neutral)}**\n\n") | |
| if regressions: | |
| # Sort by absolute change percentage | |
| regressions.sort(key=lambda x: abs(x['change_pct']), reverse=True) | |
| f.write("### Performance Regressions ⚠️\n\n") | |
| f.write("| Benchmark | Baseline | Current | Change |\n") | |
| f.write("|-----------|----------|---------|--------|\n") | |
| for item in regressions[:10]: # Top 10 | |
| f.write(f"| {item['name']} | {format_time(item['baseline'])} | ") | |
| f.write(f"{format_time(item['current'])} | ") | |
| f.write(f"+{item['change_pct']:.2f}% ⚠️ |\n") | |
| f.write("\n") | |
| if improvements: | |
| improvements.sort(key=lambda x: abs(x['change_pct']), reverse=True) | |
| f.write("### Performance Improvements ✅\n\n") | |
| f.write("| Benchmark | Baseline | Current | Change |\n") | |
| f.write("|-----------|----------|---------|--------|\n") | |
| for item in improvements[:10]: # Top 10 | |
| f.write(f"| {item['name']} | {format_time(item['baseline'])} | ") | |
| f.write(f"{format_time(item['current'])} | ") | |
| f.write(f"{item['change_pct']:.2f}% ✅ |\n") | |
| f.write("\n") | |
| if not regressions and not improvements: | |
| f.write("No significant performance changes detected (all changes < 5%).\n\n") | |
| PYTHON_SCRIPT | |
| - name: Upload current benchmark results | |
| if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master') | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results-main | |
| path: | | |
| target/criterion/ | |
| retention-days: 90 | |
| - name: Upload PR benchmark results | |
| if: github.event_name == 'pull_request' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results-pr-${{ github.event.pull_request.number }} | |
| path: | | |
| target/criterion/ | |
| benches/BENCHMARKS.md | |
| benches/comparison_report.md | |
| retention-days: 30 | |
| - name: Comment benchmark results on PR | |
| if: github.event_name == 'pull_request' | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| let comment = '## 📊 Benchmark Results\n\n'; | |
| // Add comparison report if available | |
| const comparisonPath = 'benches/comparison_report.md'; | |
| if (fs.existsSync(comparisonPath)) { | |
| const comparison = fs.readFileSync(comparisonPath, 'utf8'); | |
| comment += comparison + '\n'; | |
| } else { | |
| comment += '⚠️ No baseline benchmarks available for comparison.\n'; | |
| comment += 'This is expected for the first PR or if baseline artifacts have expired.\n\n'; | |
| } | |
| // Add link to full report | |
| comment += '---\n\n'; | |
| comment += '**Full benchmark report**: Available in the workflow artifacts\n'; | |
| comment += `**Commit**: ${context.sha.substring(0, 7)}\n`; | |
| comment += `**Run**: [#${context.runNumber}](${context.payload.repository.html_url}/actions/runs/${context.runId})\n`; | |
| // Find and update existing comment or create new one | |
| const { data: comments } = await github.rest.issues.listComments({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| }); | |
| const botComment = comments.find(comment => | |
| comment.user.type === 'Bot' && | |
| comment.body.includes('📊 Benchmark Results') | |
| ); | |
| if (botComment) { | |
| await github.rest.issues.updateComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| comment_id: botComment.id, | |
| body: comment | |
| }); | |
| console.log('Updated existing benchmark comment'); | |
| } else { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| body: comment | |
| }); | |
| console.log('Created new benchmark comment'); | |
| } | |
| - name: Upload benchmark report to main branch | |
| if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master') | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-report | |
| path: benches/BENCHMARKS.md | |
| retention-days: 90 | |
| system-benchmarks: | |
| name: System Benchmarks | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 60 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Install Rust toolchain | |
| uses: dtolnay/rust-toolchain@stable | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Build anode image | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: deploy/docker/Dockerfile | |
| push: false | |
| load: true | |
| tags: anode:bench | |
| - name: Set up test cluster | |
| run: | | |
| cd deploy/docker | |
| docker-compose up -d --wait | |
| docker-compose ps | |
| - name: Wait for cluster ready | |
| run: | | |
| timeout 120 bash -c 'until curl -f http://localhost:8080/health 2>/dev/null; do sleep 2; done' | |
| - name: Run system benchmarks | |
| run: | | |
| cd benches | |
| cargo run --release --bin system-bench -- \ | |
| --endpoint http://localhost:8080 \ | |
| --duration 300 \ | |
| --concurrency 10 \ | |
| --output benchmark-results.json | |
| - name: Upload system benchmark results | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: system-benchmark-results-${{ github.run_number }} | |
| path: benches/benchmark-results.json | |
| - name: Cleanup | |
| if: always() | |
| run: | | |
| cd deploy/docker | |
| docker-compose down -v |