From f7209e7e8828c66fa9b6241654eaa9c15efe8696 Mon Sep 17 00:00:00 2001 From: Georgy Shpilevoy Date: Tue, 5 Aug 2025 17:17:53 +0300 Subject: [PATCH] check ci # Conflicts: # .github/workflows/ci.yml --- .github/workflows/build.yml | 4 +- .github/workflows/test.yml | 60 ++++++++- Makefile | 2 +- build/ci/Dockerfile | 14 +- build/ci/Makefile | 2 +- build/ci/compare_benchmarks.py | 227 +++++++++++++++++++++++++++++++++ build/ci/requirements.txt | 2 + 7 files changed, 299 insertions(+), 12 deletions(-) create mode 100644 build/ci/compare_benchmarks.py create mode 100644 build/ci/requirements.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 382a66b..9b8e686 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,7 +14,7 @@ jobs: binary: runs-on: "ubuntu-latest" container: - image: "ghcr.io/tarantool/sdvg-ci:0.0.1" + image: "ghcr.io/tarantool/sdvg-ci:0.0.2" strategy: matrix: os_family: ["darwin", "linux"] @@ -38,7 +38,7 @@ jobs: docker: runs-on: "ubuntu-latest" container: - image: "ghcr.io/tarantool/sdvg-ci:0.0.1" + image: "ghcr.io/tarantool/sdvg-ci:0.0.2" strategy: matrix: os_family: ["linux"] diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 050c18f..a896e24 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,7 +17,7 @@ jobs: lint: runs-on: "ubuntu-latest" container: - image: "ghcr.io/tarantool/sdvg-ci:0.0.1" + image: "ghcr.io/tarantool/sdvg-ci:0.0.2" steps: - uses: "actions/checkout@v4" @@ -34,7 +34,7 @@ jobs: unit: runs-on: "ubuntu-latest" container: - image: "ghcr.io/tarantool/sdvg-ci:0.0.1" + image: "ghcr.io/tarantool/sdvg-ci:0.0.2" steps: - uses: "actions/checkout@v4" @@ -46,7 +46,7 @@ jobs: cover: runs-on: "ubuntu-latest" container: - image: "ghcr.io/tarantool/sdvg-ci:0.0.1" + image: "ghcr.io/tarantool/sdvg-ci:0.0.2" steps: - uses: "actions/checkout@v4" @@ -64,10 +64,58 @@ jobs: performance: runs-on: "ubuntu-latest" + env: + BENCH_MASTER_ARTIFACT_KEY: "bench-master" + BENCH_MASTER_INFO_DIR: "bench-master-info" + BENCH_MASTER_FILE_PATH: "bench-master-info/benchmark-master.txt" + BENCH_MASTER_SHA_FILE_PATH: "bench-master-info/benchmark-master-sha.txt" container: - image: "ghcr.io/tarantool/sdvg-ci:0.0.1" + image: "ghcr.io/tarantool/sdvg-ci:0.0.2" + steps: - uses: "actions/checkout@v4" - - name: "Run benchmarks" - run: "make test/performance | tee performance.out" + - name: "Download master benchmark artifact" + uses: "dawidd6/action-download-artifact@v11" + with: + github_token: "${{ secrets.GITHUB_TOKEN }}" + branch: "${{ github.event.repository.default_branch }}" + if_no_artifact_found: "warn" + allow_forks: false + name: "${{ env.BENCH_MASTER_ARTIFACT_KEY }}" + path: "${{ env.BENCH_MASTER_INFO_DIR }}" + + - name: "Run benchmarks on current branch" + run: "make test/performance | tee benchmark.txt; exit ${PIPESTATUS[0]}" + + - name: "Make comparison report" + run: | + python ./build/ci/compare_benchmarks.py \ + --old-commit-sha-path "$BENCH_MASTER_SHA_FILE_PATH" \ + "$BENCH_MASTER_FILE_PATH" \ + benchmark.txt \ + >> performance-report.md + + cat performance-report.md >> $GITHUB_STEP_SUMMARY + + - uses: "mshick/add-pr-comment@v2" + if: "${{ github.event_name == 'pull_request' }}" + with: + message-path: "performance-report.md" + message-id: "perf-report-pr-${{ github.event.pull_request.number }}" + refresh-message-position: true + + - name: "Prepare master benchmark info for uploading as artifact" + if: "${{ github.ref_name == github.event.repository.default_branch }}" + run: | + mkdir -p ${{ env.BENCH_MASTER_INFO_DIR }} + mv benchmark.txt "${{ env.BENCH_MASTER_FILE_PATH }}" + echo "${GITHUB_SHA:0:7}" > ${{ env.BENCH_MASTER_SHA_FILE_PATH }} + + - name: "Upload master benchmark artifact" + if: "${{ github.ref_name == github.event.repository.default_branch }}" + uses: "actions/upload-artifact@v4" + with: + name: "${{ env.BENCH_MASTER_ARTIFACT_KEY }}" + path: "${{ env.BENCH_MASTER_INFO_DIR }}" + diff --git a/Makefile b/Makefile index 75d1ecb..8b63335 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ test/cover: go tool cover -html=coverage.out -o coverage.html test/performance: - go test -run=^$$ -bench=. -cpu 4 ./... + go test -run=^$$ -bench=. -count=2 -cpu 4 ./... include ./build/package/Makefile include ./build/ci/Makefile diff --git a/build/ci/Dockerfile b/build/ci/Dockerfile index 7e7d4cf..ff9aa8b 100644 --- a/build/ci/Dockerfile +++ b/build/ci/Dockerfile @@ -5,7 +5,15 @@ WORKDIR /tmp # Install dependencies RUN apk update \ - && apk add --update --no-cache bash curl git make gcc musl-dev docker + && apk add --update --no-cache bash curl git make gcc musl-dev docker tar + +# Configure python + +RUN apk add --no-cache python3 py3-pip \ + && python3 -m venv /venv \ + && /venv/bin/pip install --upgrade pip setuptools wheel + +ENV PATH="/venv/bin:$PATH" # Configure Go @@ -32,6 +40,8 @@ WORKDIR /sdvg COPY ./go.mod ./go.mod COPY ./go.sum ./go.sum +COPY ./build/ci/requirements.txt ./requirements.txt RUN git config --global --add safe.directory /sdvg \ - && go mod download + && go mod download \ + && pip install -r ./requirements.txt diff --git a/build/ci/Makefile b/build/ci/Makefile index 7637573..497d52a 100644 --- a/build/ci/Makefile +++ b/build/ci/Makefile @@ -1,6 +1,6 @@ # Arguments -ci_image = ghcr.io/tarantool/sdvg-ci:0.0.1 +ci_image = ghcr.io/tarantool/sdvg-ci:0.0.2 # Targets diff --git a/build/ci/compare_benchmarks.py b/build/ci/compare_benchmarks.py new file mode 100644 index 0000000..6358c7f --- /dev/null +++ b/build/ci/compare_benchmarks.py @@ -0,0 +1,227 @@ +import argparse +import re +import statistics +import os +import textwrap +from collections import OrderedDict +from typing import Dict, Tuple, List, Literal, Optional + +import pandas as pd + +METRICS = { + 'MB/s': {'name': 'B/s', 'good_direction': 'up', 'scale': 2 ** 20}, + 'values/s': {'good_direction': 'up'}, + # 'ns/op': {'name': 's/op', 'good_direction': 'down', 'scale': 1e-9}, + # 'rows/s': {'good_direction': 'up'}, +} + +EMOJIS = { + 'good': '⚡️', + 'bad': '💔' +} + + +def format_benchmark_name(name: str) -> str: + name = name.replace("Benchmark", "") + name = name.replace("/CI/", "/") + + parts = name.split("/") + if len(parts) == 1: + return parts[0] + + base_name = " ".join(parts[:-1]) + params_split = parts[-1].split("-") + + params = [] + for i in range(0, len(params_split) - 1, 2): + params.append(f"{params_split[i]}={params_split[i + 1]}") + + if params: + return f"{base_name} ({', '.join(params)})" + else: + return base_name + + +def parse_bench_line(line: str) -> Tuple[Optional[str], Optional[Dict[str, float]]]: + """parses `go test -bench` results output. + Example: + + BenchmarkPartitioning/CI/cpu-4 2569041 475.5 ns/op 218.73 MB/s 8412793 rows/s 16825587 values/s + + result: + ('Partitioning (cpu=4)', {'ns/op': 475.5, 'MB/s': 218.73, 'rows/s': 8412793, 'values/s': 16825587} + """ + + parts = re.split(r'\s+', line.strip()) + if len(parts) < 3 or not parts[0].startswith("Benchmark") or "/CI/" not in parts[0]: + return None, None + + bench_name = format_benchmark_name(parts[0]) + + metrics = {} + for value, metric in zip(parts[2::2], parts[3::2]): + if metric not in METRICS: + continue + try: + metrics[metric] = float(value) + except ValueError: + raise ValueError(f"Failed to parse value '{value}' for '{metric}'") + + return bench_name, metrics + + +def parse_metrics_file(path: str) -> Dict[str, Dict[str, List[float]]]: + results = {} + + with open(path) as f: + for line in f: + name_test, metrics = parse_bench_line(line) + if name_test is None: + continue + + if not metrics: + continue + + if name_test not in results: + results[name_test] = {m: [] for m in METRICS.keys()} + + for metric_name, value in metrics.items(): + results[name_test][metric_name].append(value) + + return results + + +def aggregate_results( + parsed_metrics: Dict[str, Dict[str, List[float]]], + method: Literal["mean", "median"] +) -> OrderedDict[str, Dict[str, float]]: + aggregated: OrderedDict[str, Dict[str, float]] = OrderedDict() + + for bench_name, metrics in parsed_metrics.items(): + aggregated[bench_name] = {} + + for m, values in metrics.items(): + if method == "median": + aggregated[bench_name][m] = statistics.median(values) + elif method == "mean": + aggregated[bench_name][m] = statistics.mean(values) + + return aggregated + + +def humanize_number(val: float, scale: float) -> str: + if val is None: + return "?" + + val = val * scale + abs_val = abs(val) + if abs_val >= 1_000_000: + return f"{val / 1_000_000:.2f}M" + elif abs_val >= 1_000: + return f"{val / 1_000:.2f}K" + else: + return f"{val:.2f}" + + +def format_metric_changes(metric_name: str, old_val, new_val: Optional[float], alert_threshold: float) -> str: + old_val_str = humanize_number(old_val, METRICS[metric_name].get('scale', 1)) + new_val_str = humanize_number(new_val, METRICS[metric_name].get('scale', 1)) + + if old_val is None or new_val is None: + suffix = " ⚠️" + else: + change_pct = (new_val / old_val - 1) * 100 + suffix = f" ({change_pct:+.2f}%)" + + if abs(change_pct) >= alert_threshold: + is_better = METRICS[metric_name].get('good_direction') == 'up' and change_pct > 0 + suffix += f" {EMOJIS['good'] if is_better else EMOJIS['bad']}" + + return f"{old_val_str} → {new_val_str}{suffix}" + + +def compare_benchmarks_df(old_metrics, new_metrics, alert_threshold=None): + if old_metrics is None: + old_metrics = {} + + if new_metrics is None: + new_metrics = {} + + all_metrics = OrderedDict() + all_metrics.update(old_metrics) + all_metrics.update(new_metrics) + + df = pd.DataFrame(columns=["Benchmark"] + [v.get('name', k) for k, v in METRICS.items()]) + + for bench_name in all_metrics.keys(): + row = {"Benchmark": bench_name} + + for metric_name, metric_params in METRICS.items(): + old_val = old_metrics.get(bench_name, {}).get(metric_name, None) + new_val = new_metrics.get(bench_name, {}).get(metric_name, None) + row[metric_params.get('name', metric_name)] = format_metric_changes( + metric_name, old_val, new_val, alert_threshold + ) + + df.loc[len(df)] = row + + return df.to_markdown(index=False) + + +def build_report_header(old_file, sha_file: str) -> str: + event_name = os.environ.get("GITHUB_EVENT_NAME", "") + base_branch = os.environ.get("GITHUB_DEFAULT_BRANCH", "master") + + warning = "" + if not os.path.exists(old_file): + warning = textwrap.dedent(""" + > [!WARNING] + > No test results found for master branch. Please run workflow on master first to compare results. + """).strip() + + if event_name == "pull_request": + pr_branch = os.environ.get("GITHUB_HEAD_REF", "") + header_ending = f"`{pr_branch}`" if not os.path.exists(old_file) else f"`{base_branch}` VS `{pr_branch}`" + else: + if not os.path.exists(old_file): + header_ending = f"`{base_branch}`" + else: + prev_master_sha = "(sha not found)" + if sha_file and os.path.exists(sha_file): + with open(sha_file) as f: + prev_master_sha = f.read().strip() + + commit_sha = os.environ.get("GITHUB_SHA", "")[:7] + header_ending = f"`{base_branch} {prev_master_sha}` VS `{base_branch} {commit_sha}`" + + header = f"# Perf tests report: {header_ending}\n" + return f"{warning}\n\n{header}" if warning else header + + +def main(): + parser = argparse.ArgumentParser(description="Compare go test -bench results in markdown format") + parser.add_argument( + "--alert-threshold", type=float, default=7, + help="Percent change threshold for adding emoji alerts" + ) + parser.add_argument( + "--aggregation", choices=["mean", "median"], default="mean", + help="Aggregation method for multiple runs of the same benchmark" + ) + parser.add_argument("--old-commit-sha-path", help="Path to file with sha commit of the old benchmark") + parser.add_argument("old_file", help="Path to old benchmark results file", nargs='?', default="") + parser.add_argument("new_file", help="Path to new benchmark results file") + args = parser.parse_args() + + old_metrics = None + if args.old_file and os.path.exists(args.old_file): + old_metrics = aggregate_results(parse_metrics_file(args.old_file), args.aggregation) + + new_metrics = aggregate_results(parse_metrics_file(args.new_file), args.aggregation) + + print(build_report_header(args.old_file, args.old_commit_sha_path)) + print(compare_benchmarks_df(old_metrics, new_metrics, alert_threshold=args.alert_threshold)) + + +if __name__ == "__main__": + main() diff --git a/build/ci/requirements.txt b/build/ci/requirements.txt new file mode 100644 index 0000000..86ca33c --- /dev/null +++ b/build/ci/requirements.txt @@ -0,0 +1,2 @@ +pandas==2.3.1 +tabulate==0.9.0