Timefold Solver v1.18.0 vs. triceo/jx on Java 21 #186
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Both baseline and SUT (Software Under Test) are built from source first, | |
# with their binaries uploaded as artifacts. | |
# This is done on GitHub infrastructure, to achieve maximum parallelization. | |
# | |
# The benchmark job downloads the binaries and runs them. | |
# The baseline is established first, then the SUT is measured. | |
# They both run in the same job, | |
# to guarantee they ran on the same machine with the same performance characteristics. | |
# This is done on a self-hosted runner which we completely control. | |
# | |
# Each benchmark gives a 99.9 % confidence interval. | |
# The confidence intervals are compared to determine if the branch under test is a regression or an improvement. | |
# The error threshold is expected to be below +/- 2.0 %. | |
name: Performance Regression Test - Score Director | |
on: | |
workflow_dispatch: | |
inputs: | |
jdk: | |
description: 'JDK version' | |
default: '21' | |
required: true | |
baseline: | |
description: 'Timefold Solver release' | |
default: '1.18.0' | |
required: true | |
branch: | |
description: 'Branch to benchmark (needs to use 999-SNAPSHOT)' | |
default: 'main' | |
required: true | |
branch_owner: | |
description: 'User owning the branch' | |
default: 'TimefoldAI' | |
required: true | |
async_profiler_version: | |
description: 'async-profiler version' | |
default: '3.0' | |
required: true | |
run-name: "Timefold Solver v${{ github.event.inputs.baseline }} vs. ${{ github.event.inputs.branch_owner }}/${{ github.event.inputs.branch }} on Java ${{ github.event.inputs.jdk }}" | |
jobs: | |
build: | |
runs-on: ubuntu-latest # Leverage massive parallelization of Github-hosted runners. | |
strategy: | |
fail-fast: true # If one compilation fails, abort everything. | |
matrix: | |
example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing] | |
env: | |
MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' | |
MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' | |
steps: | |
- name: Checkout timefold-solver-benchmarks | |
uses: actions/checkout@v4 | |
with: | |
repository: TimefoldAI/timefold-solver-benchmarks | |
path: ./timefold-solver-benchmarks | |
- name: Setup JDK and Maven | |
uses: actions/setup-java@v4 | |
with: | |
java-version: 21 # Always build with the least recent supported JDK. | |
distribution: 'temurin' | |
cache: 'maven' | |
server-id: 'timefold-solver-enterprise' | |
server-username: 'MVN_USERNAME' | |
server-password: 'MVN_PASSWORD' | |
- name: (Baseline) Compile the benchmark | |
working-directory: ./timefold-solver-benchmarks | |
shell: bash | |
run: | | |
mvn clean install -B -Dquickly -Dversion.ai.timefold.solver=${{ github.event.inputs.baseline }} -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" | |
mv target/benchmarks.jar benchmarks-baseline.jar | |
- name: (SUT) Checkout timefold-solver | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ github.event.inputs.branch_owner }}/timefold-solver | |
ref: ${{ github.event.inputs.branch }} | |
path: ./timefold-solver | |
- name: (SUT) Quickly build timefold-solver | |
working-directory: ./timefold-solver | |
shell: bash | |
run: mvn -B -Dquickly clean install | |
# Clone timefold-solver-enterprise | |
- name: (SUT) Checkout timefold-solver-enterprise (Specified) | |
id: checkout-solver-enterprise | |
uses: actions/checkout@v4 | |
continue-on-error: true | |
with: | |
repository: TimefoldAI/timefold-solver-enterprise | |
ref: ${{ github.event.inputs.branch }} | |
token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} | |
path: ./timefold-solver-enterprise | |
- name: (SUT) Checkout timefold-solver-enterprise (Fallback) | |
if: steps.checkout-solver-enterprise.outcome != 'success' | |
uses: actions/checkout@v4 | |
with: | |
repository: TimefoldAI/timefold-solver-enterprise | |
ref: main | |
token: ${{ secrets.BENCHMARK_PUBLISH_TOKEN }} | |
path: ./timefold-solver-enterprise | |
- name: (SUT) Quickly build timefold-solver-enterprise | |
working-directory: ./timefold-solver-enterprise | |
shell: bash | |
run: mvn -B -Dquickly clean install | |
- name: (SUT) Compile the benchmarks | |
working-directory: ./timefold-solver-benchmarks | |
shell: bash | |
run: | | |
mvn clean install -B -Dquickly -Dversion.tools.provider="${{ github.event.inputs.async_profiler_version }}" | |
mv target/benchmarks.jar benchmarks-sut.jar | |
- name: Upload the binaries | |
uses: actions/upload-artifact@v4 | |
with: | |
name: binaries-${{ matrix.example }} | |
path: | | |
./timefold-solver-benchmarks/benchmarks-baseline.jar | |
./timefold-solver-benchmarks/benchmarks-sut.jar | |
if-no-files-found: error | |
benchmark: | |
needs: build | |
runs-on: self-hosted # We need a stable machine to actually run the benchmarks. | |
strategy: | |
fail-fast: false # Jobs fail if the benchmark error is over predefined thresholds; other benchmarks continue. | |
matrix: | |
example: [cloud_balancing, conference_scheduling, curriculum_course, examination, machine_reassignment, meeting_scheduling, nurse_rostering, patient_admission_scheduling, task_assigning, traveling_tournament, tsp, vehicle_routing] | |
env: | |
MVN_USERNAME: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_USERNAME }}' | |
MVN_PASSWORD: '${{ secrets.JFROG_ENTERPRISE_READ_ONLY_ACCESS_TOKEN }}' | |
steps: | |
- name: Clean results of previous runs | |
shell: bash | |
run: | | |
rm -rf timefold-solver-benchmarks | |
- name: Checkout timefold-solver-benchmarks | |
uses: actions/checkout@v4 | |
with: | |
repository: TimefoldAI/timefold-solver-benchmarks | |
path: ./timefold-solver-benchmarks | |
- name: Setup JDK and Maven | |
uses: actions/setup-java@v4 | |
with: | |
java-version: ${{ github.event.inputs.jdk }} # Use the JDK version specified by the user. | |
distribution: 'temurin' | |
check-latest: true | |
- name: Download the benchmark binaries | |
uses: actions/download-artifact@v4 | |
with: | |
name: binaries-${{ matrix.example }} | |
path: ./timefold-solver-benchmarks/ | |
- name: Setup Async Profiler | |
working-directory: ./timefold-solver-benchmarks | |
run: | | |
export FILENAME=async-profiler-${{ github.event.inputs.async_profiler_version }}-linux-x64.tar.gz | |
wget https://github.com/async-profiler/async-profiler/releases/download/v${{ github.event.inputs.async_profiler_version }}/$FILENAME | |
tar -xzf $FILENAME | |
mkdir target | |
ls -l | |
# Fine-tuned for stability on GHA. | |
- name: Configure the benchmark | |
working-directory: ./timefold-solver-benchmarks | |
shell: bash | |
run: | | |
echo "forks=20" > scoredirector-benchmark.properties | |
echo "warmup_iterations=5" >> scoredirector-benchmark.properties | |
echo "measurement_iterations=5" >> scoredirector-benchmark.properties | |
echo "relative_score_error_threshold=0.02" >> scoredirector-benchmark.properties | |
echo "score_director_type=cs" >> scoredirector-benchmark.properties | |
echo "example=${{ matrix.example }}" >> scoredirector-benchmark.properties | |
cat scoredirector-benchmark.properties | |
chmod +x run-scoredirector.sh | |
- name: (Baseline) Run the benchmark | |
working-directory: ./timefold-solver-benchmarks | |
id: benchmark_baseline | |
env: | |
RUN_ID: ${{ github.event.inputs.baseline }} | |
shell: bash | |
run: | | |
cp benchmarks-baseline.jar target/benchmarks.jar | |
./run-scoredirector.sh | |
echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" | |
echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" | |
echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ github.event.inputs.baseline }}/results.json)" >> "$GITHUB_OUTPUT" | |
- name: (SUT) Run the benchmark | |
id: benchmark_sut | |
working-directory: ./timefold-solver-benchmarks | |
env: | |
RUN_ID: ${{ github.event.inputs.branch }} | |
shell: bash | |
run: | | |
rm target/benchmarks.jar | |
cp benchmarks-sut.jar target/benchmarks.jar | |
./run-scoredirector.sh | |
echo "RANGE_START=$(jq '.[0].primaryMetric.scoreConfidence[0]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" | |
echo "RANGE_END=$(jq '.[0].primaryMetric.scoreConfidence[1]|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" | |
echo "RANGE_MID=$(jq '.[0].primaryMetric.score|round' results/scoredirector/${{ github.event.inputs.branch }}/results.json)" >> "$GITHUB_OUTPUT" | |
- name: Clean up the data | |
working-directory: ./timefold-solver-benchmarks | |
shell: bash | |
run: | | |
mv results/scoredirector/${{ github.event.inputs.baseline }} . | |
mv ${{ github.event.inputs.baseline }}/combined.jfr ${{ github.event.inputs.baseline }}/${{ matrix.example }}-${{ github.event.inputs.baseline }}-combined.jfr | |
mv results/scoredirector/${{ github.event.inputs.branch }} . | |
mv ${{ github.event.inputs.branch }}/combined.jfr ${{ github.event.inputs.branch }}/${{ matrix.example }}-${{ github.event.inputs.branch }}-combined.jfr | |
- name: Archive benchmark data | |
uses: actions/upload-artifact@v4 | |
with: | |
name: results-${{ matrix.example }}-${{ github.event.inputs.baseline }}_vs_${{ github.event.inputs.branch }} | |
path: | | |
./timefold-solver-benchmarks/scoredirector-benchmark.properties | |
./timefold-solver-benchmarks/${{ github.event.inputs.baseline }}/*.log | |
./timefold-solver-benchmarks/${{ github.event.inputs.baseline }}/*combined.jfr | |
./timefold-solver-benchmarks/${{ github.event.inputs.baseline }}/*.html | |
./timefold-solver-benchmarks/${{ github.event.inputs.baseline }}/*.json | |
./timefold-solver-benchmarks/${{ github.event.inputs.branch }}/*.log | |
./timefold-solver-benchmarks/${{ github.event.inputs.branch }}/*combined.jfr | |
./timefold-solver-benchmarks/${{ github.event.inputs.branch }}/*.html | |
./timefold-solver-benchmarks/${{ github.event.inputs.branch }}/*.json | |
- name: Report results | |
working-directory: ./timefold-solver-benchmarks | |
env: | |
BASELINE_RANGE_START: ${{ steps.benchmark_baseline.outputs.RANGE_START }} | |
BASELINE_RANGE_MID: ${{ steps.benchmark_baseline.outputs.RANGE_MID }} | |
BASELINE_RANGE_END: ${{ steps.benchmark_baseline.outputs.RANGE_END }} | |
SUT_RANGE_START: ${{ steps.benchmark_sut.outputs.RANGE_START }} | |
SUT_RANGE_MID: ${{ steps.benchmark_sut.outputs.RANGE_MID }} | |
SUT_RANGE_END: ${{ steps.benchmark_sut.outputs.RANGE_END }} | |
shell: bash | |
run: | | |
export BASELINE_DEV=$(echo "scale=2; ($BASELINE_RANGE_MID / $BASELINE_RANGE_START) * 100 - 100" | bc) | |
export SUT_DEV=$(echo "scale=2; ($SUT_RANGE_MID / $SUT_RANGE_START) * 100 - 100" | bc) | |
export DIFF_MID=$(echo "scale=2; ($BASELINE_RANGE_MID / $SUT_RANGE_MID) * 100" | bc) | |
export FAIL=false | |
if (( $(echo "$DIFF_MID >= 97.00" | bc -l) && $(echo "$DIFF_MID <= 103.00"|bc -l) )); then | |
# Ignore differences of up to 3 %; we can't expect that level of precision anyway. | |
exit 0 | |
elif [ "$SUT_RANGE_START" -gt "$BASELINE_RANGE_END" ]; then | |
echo "### 🚀🚀🚀 Statistically significant improvement 🚀🚀🚀" >> $GITHUB_STEP_SUMMARY | |
elif [ "$BASELINE_RANGE_START" -gt "$SUT_RANGE_END" ]; then | |
echo "### ‼️‼️‼️ Statistically significant regression ‼️‼️‼️" >> $GITHUB_STEP_SUMMARY | |
export FAIL=true | |
else | |
exit 0 | |
fi | |
echo "| | **Ref** | **Mean** |" >> $GITHUB_STEP_SUMMARY | |
echo "|:------:|:-----------:|:-----------------:|" >> $GITHUB_STEP_SUMMARY | |
echo "| _Old_ | [v${{ github.event.inputs.baseline }}](https://github.com/TimefoldAI/timefold-solver/releases/tag/v${{ github.event.inputs.baseline }}) | ${BASELINE_RANGE_MID} ± ${BASELINE_DEV} % |" >> $GITHUB_STEP_SUMMARY | |
echo "| _New_ | [${{ github.event.inputs.branch_owner }}'s ${{ github.event.inputs.branch }}](https://github.com/${{ github.event.inputs.branch_owner }}/timefold-solver/tree/${{ github.event.inputs.branch }}) | ${SUT_RANGE_MID} ± ${SUT_DEV} % |" >> $GITHUB_STEP_SUMMARY | |
echo "| _Diff_ | | ${DIFF_MID} % |" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "Mean is in operations per second. Higher is better." >> $GITHUB_STEP_SUMMARY | |
echo "Mean ± X % describes a 99.9 % confidence interval." >> $GITHUB_STEP_SUMMARY | |
echo "Diff under 100 % represents an improvement, over 100 % a regression." >> $GITHUB_STEP_SUMMARY | |
if [ "$FAIL" = true ]; then | |
exit 1 | |
fi |