Skip to content

[Nightly] Modify performance comparison #1646

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions .github/scripts/calculate_best_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

parser = argparse.ArgumentParser(description="Get Best Performance",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--best", default=None, required=True, help="Saved best performance file")
parser.add_argument("--new", default=None, required=True, help="New round launch")
parser.add_argument("--best", required=True, help="Saved best performance file")
parser.add_argument("--new", required=True, help="New round launch")
parser.add_argument("--device", default=None, type=str, help="Device name, such as PVC1100")
parser.add_argument("--os", default=None, type=str, help="OS version, such as Ubuntu 22.04")
parser.add_argument("--driver", default=None, type=str, help="Driver version, such as 25.05.32567")
Expand Down Expand Up @@ -63,8 +63,8 @@ def find_files(pattern, path):
new_data = pd.read_csv(new_file)
new_data = new_data.reset_index(drop=True)
for index, row in new_data.iterrows():
new_eager = row["abs_latency"] * row["speedup"]
# eager
new_eager = row["abs_latency"] * row["speedup"]
eager_row = next(([i, line] for i, line in best_data.iterrows()
if (line["Category"] == category
and line["Model"] == row["name"]
Expand All @@ -74,15 +74,15 @@ def find_files(pattern, path):
(best_data["Model"] == row["name"]) &
(best_data["Value Type"] == "eager")]
if eager_row != "N/A":
if new_eager < best_eager_value["Value"].values:
best_data.loc[eager_row[0]] = category, row["name"], "eager", new_eager, \
args.device, args.os, args.driver, args.oneapi, args.gcc, args.python, \
args.pytorch, args.torch_xpu_ops, date.today().strftime('%F')
if new_eager < best_eager_value["Value"].values[0]:
best_data.loc[eager_row[0]] = [category, row["name"], "eager", new_eager,
args.device, args.os, args.driver, args.oneapi, args.gcc, args.python,
args.pytorch, args.torch_xpu_ops, date.today().strftime('%F')]
else:
best_data.loc[len(best_data), :] = None
best_data.loc[len(best_data) - 1] = category, row["name"], "eager", new_eager, \
args.device, args.os, args.driver, args.oneapi, args.gcc, args.python, \
args.pytorch, args.torch_xpu_ops, date.today().strftime('%F')
best_data.loc[len(best_data) - 1] = [category, row["name"], "eager", new_eager,
args.device, args.os, args.driver, args.oneapi, args.gcc, args.python,
args.pytorch, args.torch_xpu_ops, date.today().strftime('%F')]
# inductor
inductor_row = next(([i, line] for i, line in best_data.iterrows()
if (line["Category"] == category
Expand All @@ -93,14 +93,14 @@ def find_files(pattern, path):
(best_data["Model"] == row["name"]) &
(best_data["Value Type"] == "inductor")]
if inductor_row != "N/A":
if row["abs_latency"] < best_inductor_value["Value"].values:
best_data.at[inductor_row[0]] = category, row["name"], "inductor", row["abs_latency"], \
args.device, args.os, args.driver, args.oneapi, args.gcc, args.python, \
args.pytorch, args.torch_xpu_ops, date.today().strftime('%F')
if row["abs_latency"] < best_inductor_value["Value"].values[0]:
best_data.loc[inductor_row[0]] = [category, row["name"], "inductor", row["abs_latency"],
args.device, args.os, args.driver, args.oneapi, args.gcc, args.python,
args.pytorch, args.torch_xpu_ops, date.today().strftime('%F')]
else:
best_data.loc[len(best_data), :] = None
best_data.loc[len(best_data) - 1] = category, row["name"], "inductor", row["abs_latency"], \
args.device, args.os, args.driver, args.oneapi, args.gcc, args.python, \
args.pytorch, args.torch_xpu_ops, date.today().strftime('%F')
best_data.loc[len(best_data) - 1] = [category, row["name"], "inductor", row["abs_latency"],
args.device, args.os, args.driver, args.oneapi, args.gcc, args.python,
args.pytorch, args.torch_xpu_ops, date.today().strftime('%F')]

best_data.to_csv(args.best, sep=',', encoding='utf-8', index=False)
33 changes: 7 additions & 26 deletions .github/scripts/e2e_summary.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

results_dir="$1"
artifact_type="$2"
reference_dir="$2"
check_file="$(dirname "$0")/../ci_expected_accuracy/check_expected.py"
rm -rf /tmp/tmp-*.txt

Expand Down Expand Up @@ -147,32 +147,13 @@ fi
performance=$(find "${results_dir}" -name "*.csv" |grep -E "_xpu_performance.csv" -c)
if [ "${performance}" -gt 0 ];then
echo "### Performance"
pip install jq > /dev/null 2>&1
if [ "${artifact_type}" != "" ];then
gh api \
--method GET -F per_page=100 -F page=10 \
-H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" \
/repos/${GITHUB_REPOSITORY:-"intel/torch-xpu-ops"}/actions/artifacts \
> ${GITHUB_WORKSPACE:-"/tmp"}/refer.json
artifact_id="$(eval "jq -r \
'[.artifacts[] | \
select(.name|test(\"${artifact_type}.*\")) | \
select(.workflow_run.head_branch|test(\"main\"))][0].id' \
${GITHUB_WORKSPACE:-"/tmp"}/refer.json")"
if [ "$artifact_id" -gt 1 ];then
gh api \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
/repos/${GITHUB_REPOSITORY:-"intel/torch-xpu-ops"}/actions/artifacts/${artifact_id}/zip > reference.zip
fi
unzip ${reference_dir}/*.zip -d ${reference_dir} > /dev/null 2>&1
if [ "${IS_PR}" == "1" ];then
python "$(dirname "$0")/perf_comparison.py" --xpu ${results_dir} --refer ${reference_dir} --pr
else
python "$(dirname "$0")/perf_comparison.py" --xpu ${results_dir} --refer ${reference_dir}
fi
rm -rf ${GITHUB_WORKSPACE:-"/tmp"}/reference
mkdir ${GITHUB_WORKSPACE:-"/tmp"}/reference
mv reference.zip ${GITHUB_WORKSPACE:-"/tmp"}/reference
unzip ${GITHUB_WORKSPACE:-"/tmp"}/reference/reference.zip -d ${GITHUB_WORKSPACE:-"/tmp"}/reference > /dev/null 2>&1
reference_dir="${GITHUB_WORKSPACE:-"/tmp"}/reference"
python "$(dirname "$0")/perf_comparison.py" -xpu ${results_dir} -refer ${reference_dir}
cp ${GITHUB_WORKSPACE:-"/tmp"}/reference/best.csv ${results_dir}/best.csv > /dev/null 2>&1 || true
cp ${reference_dir}/best.csv ${results_dir}/best.csv > /dev/null 2>&1 || true
python "$(dirname "$0")/calculate_best_perf.py" \
--new ${results_dir} \
--best ${results_dir}/best.csv \
Expand Down
34 changes: 18 additions & 16 deletions .github/scripts/perf_comparison.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# To compare the performance diff
# Usage:
# python perf_comparison.py -xpu /path/to/xpu/performance/result/dir -refer /path/to/reference/dir
# python perf_comparison.py --xpu /path/to/xpu/performance/result/dir --refer /path/to/reference/dir

import re
import os
Expand All @@ -10,8 +10,9 @@
from statistics import geometric_mean

parser = argparse.ArgumentParser(description="Analysis", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-xpu", default=None, help="XPU performance result csv files dir")
parser.add_argument("-refer", default=None, help="XPU refrerence result csv files dir")
parser.add_argument("--xpu", default=None, help="XPU performance result csv files dir")
parser.add_argument("--refer", default=None, help="XPU refrerence result csv files dir")
parser.add_argument("--pr", action="store_true", help="Only show results xpu has")
args = parser.parse_args()


Expand Down Expand Up @@ -46,7 +47,7 @@ def find_files(pattern, path):
if os.path.isfile(refer_file):
refer_data= pd.read_csv(refer_file)
refer_names = [row["name"] for index, row in refer_data.iterrows()]
names = xpu_names + refer_names
names = xpu_names if args.pr else xpu_names + refer_names
names = set(names)
names = sorted(names)
for name in names:
Expand All @@ -72,18 +73,19 @@ def find_files(pattern, path):
xpu_value = next((row for index, row in xpu_data.iterrows() if row["name"] == name), "")
xpu_eager_latency = xpu_value["speedup"] * xpu_value["abs_latency"]
output_data.append([multiple_replace(xpu_file), name, xpu_eager_latency, xpu_value["abs_latency"], xpu_value["speedup"], -1, -1, -1, -1, -1])
refer_files = find_files("*_xpu_performance.csv", args.refer)
for refer_file in refer_files:
refer_data = pd.read_csv(refer_file)
refer_names = [row["name"] for index, row in refer_data.iterrows()]
xpu_file = re.sub(args.refer, args.xpu + "/", refer_file, flags=re.IGNORECASE)
if not os.path.isfile(xpu_file):
names = set(refer_names)
names = sorted(names)
for name in names:
refer_value = next((row for index, row in refer_data.iterrows() if row["name"] == name), "")
refer_eager_latency = refer_value["speedup"] * refer_value["abs_latency"]
output_data.append([multiple_replace(refer_file), name, -1, -1, -1, refer_eager_latency, refer_value["abs_latency"], refer_value["speedup"], -1, -1])
if not args.pr:
refer_files = find_files("*_xpu_performance.csv", args.refer)
for refer_file in refer_files:
refer_data = pd.read_csv(refer_file)
refer_names = [row["name"] for index, row in refer_data.iterrows()]
xpu_file = re.sub(args.refer, args.xpu + "/", refer_file, flags=re.IGNORECASE)
if not os.path.isfile(xpu_file):
names = set(refer_names)
names = sorted(names)
for name in names:
refer_value = next((row for index, row in refer_data.iterrows() if row["name"] == name), "")
refer_eager_latency = refer_value["speedup"] * refer_value["abs_latency"]
output_data.append([multiple_replace(refer_file), name, -1, -1, -1, refer_eager_latency, refer_value["abs_latency"], refer_value["speedup"], -1, -1])

# summary
output_data = pd.DataFrame(output_data, columns=output_header)
Expand Down
30 changes: 12 additions & 18 deletions .github/workflows/_performance_comparison.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@ on:
required: true
type: string
default: ''
description: Target artifact id
description: Target run id
baseline:
required: true
type: string
default: ''
description: Baseline artifact id
description: Baseline run id

permissions: read-all

jobs:
Performance-Comparison:
env:
GH_TOKEN: ${{ github.token }}
runs-on: ubuntu-latest
steps:
- name: Cleanup workspace
Expand All @@ -29,23 +31,15 @@ jobs:
python-version: '3.10'
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Downlaod target artifacts
uses: actions/download-artifact@v4
with:
run-id: ${{ inputs.target }}
pattern: XPU-E2E
path: target
- name: Downlaod baseline artifacts
uses: actions/download-artifact@v4
with:
run-id: ${{ inputs.baseline }}
pattern: XPU-E2E
path: baseline
- name: Get comparison artifacts
- name: Downlaod artifacts
run: |
cd target && unzip *.zip
cd ../baseline && unzip *.zip
rm -rf target && mkdir target && cd target
gh --repo ${GITHUB_REPOSITORY} run download ${{ inputs.target }} -p "Inductor-*-XPU-E2E-*"
mv Inductor-*-XPU-E2E-*/* . && cd ..
rm -rf baseline && mkdir baseline && cd baseline
gh --repo ${GITHUB_REPOSITORY} run download ${{ inputs.baseline }} -p "Inductor-*-XPU-E2E-*"
mv Inductor-*-XPU-E2E-*/* . && cd ..
- name: Comparison result
run: |
python -m pip install pandas
python .github/scripts/perf_comparison.py -xpu target -refer baseline >> ${GITHUB_STEP_SUMMARY}
python .github/scripts/perf_comparison.py --xpu target --refer baseline >> ${GITHUB_STEP_SUMMARY}
27 changes: 25 additions & 2 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ on:
default: '3.10'
description: Python version

permissions: read-all
permissions: write-all

concurrency:
group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }}
Expand Down Expand Up @@ -102,6 +102,7 @@ jobs:
timeout-minutes: 3600
env:
GH_TOKEN: ${{ github.token }}
reference_issue: 1645
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build.outputs.torch_commit_id }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
Expand Down Expand Up @@ -300,10 +301,26 @@ jobs:
scenario: ${{ inputs.scenario }}
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

- name: Download Reference Artifact
id: reference_id
run: |
set -xe
source activate e2e_ci
conda install gh --channel conda-forge -y
if [ "${{ env.run_type }}" == "on-demand" ];then
artifact_type="weekly"
else
artifact_type="${{ env.run_type }}"
fi
REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
--json body -q .body |grep "Inductor-${artifact_type}-LTS-XPU-E2E" |sed 's/.*: *//')"
gh --repo ${GITHUB_REPOSITORY} run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
rm -rf reference && mv Inductor-*-XPU-E2E-* reference
- name: Summarize archieve files
id: summary
if: ${{ ! cancelled() }}
run: |
set -x -e -o pipefail
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
Expand All @@ -314,7 +331,7 @@ jobs:
source activate e2e_ci
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
${{ github.workspace }}/upload_files \
Inductor-${{ env.run_type }}-LTS-XPU-E2E \
${{ github.workspace }}/reference \
>> ${GITHUB_STEP_SUMMARY}
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
Expand All @@ -335,6 +352,12 @@ jobs:
with:
name: Inductor-${{ env.run_type }}-LTS-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
path: ${{ github.workspace }}/upload_files
- name: Upload Reference Run ID
if: ${{ env.run_type != 'on-demand' }}
run: |
gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} --json body -q .body | \
sed "s/Inductor-${{ env.run_type }}-Rolling-XPU-E2E:.*/Inductor-${{ env.run_type }}-Rolling-XPU-E2E: ${GITHUB_RUN_ID}/" | sed '/^$/d' > new_body.txt
gh --repo ${GITHUB_REPOSITORY} issue edit ${reference_issue} --body-file new_body.txt

Windows-Nightly-Ondemand-UT-Tests:
if: ${{ github.event_name == 'schedule' || inputs.ut != '' }}
Expand Down
28 changes: 25 additions & 3 deletions .github/workflows/nightly_ondemand_rolling.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ on:
default: '3.10'
description: Python version

permissions: read-all
permissions: write-all

concurrency:
group: ${{ github.workflow }}-${{ github.sha }}-${{ github.event_name }}-${{ inputs.pytorch }}-${{ inputs.keep_torch_xpu_ops }}-${{ inputs.ut }}-${{ inputs.triton }}-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ inputs.model }}-${{ inputs.python }}
Expand Down Expand Up @@ -117,6 +117,7 @@ jobs:
timeout-minutes: 3600
env:
GH_TOKEN: ${{ github.token }}
reference_issue: 1645
pytorch: ${{ needs.Linux-Nightly-Ondemand-Build-Rolling.outputs.torch_commit_id }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
Expand Down Expand Up @@ -326,11 +327,26 @@ jobs:
scenario: ${{ inputs.scenario }}
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
driver: rolling

- name: Download Reference Artifact
id: reference_id
run: |
set -xe
source activate e2e_ci
conda install gh --channel conda-forge -y
if [ "${{ env.run_type }}" == "on-demand" ];then
artifact_type="weekly"
else
artifact_type="${{ env.run_type }}"
fi
REFERENCE_RUN_ID="$(gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} \
--json body -q .body |grep "Inductor-${artifact_type}-LTS-XPU-E2E" |sed 's/.*: *//')"
gh --repo ${GITHUB_REPOSITORY} run download ${REFERENCE_RUN_ID} -p "Inductor-*-XPU-E2E-*"
rm -rf reference && mv Inductor-*-XPU-E2E-* reference
- name: Summarize archieve files
id: summary
if: ${{ ! cancelled() }}
run: |
set -x -e -o pipefail
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
mkdir -p ${{ github.workspace }}/../../_backup/ && cd ${{ github.workspace }}/../../_backup/
Expand All @@ -341,7 +357,7 @@ jobs:
source activate e2e_ci
bash ${{ github.workspace }}/.github/scripts/e2e_summary.sh \
${{ github.workspace }}/upload_files \
Inductor-${{ env.run_type }}-Rolling-XPU-E2E \
${{ github.workspace }}/reference \
>> ${GITHUB_STEP_SUMMARY}
exit_label=$(awk 'BEGIN{sum=0}{if($2>0){sum++}}END{print sum}' /tmp/tmp-result.txt)
if [ ${exit_label} -ne 0 ];then
Expand All @@ -362,6 +378,12 @@ jobs:
with:
name: Inductor-${{ env.run_type }}-Rolling-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
path: ${{ github.workspace }}/upload_files
- name: Upload Reference Run ID
if: ${{ env.run_type != 'on-demand' }}
run: |
gh --repo ${GITHUB_REPOSITORY} issue view ${reference_issue} --json body -q .body | \
sed "s/Inductor-${{ env.run_type }}-Rolling-XPU-E2E:.*/Inductor-${{ env.run_type }}-Rolling-XPU-E2E: ${GITHUB_RUN_ID}/" | sed '/^$/d' > new_body.txt
gh --repo ${GITHUB_REPOSITORY} issue edit ${reference_issue} --body-file new_body.txt

Tests-Failure-And-Report:
if: ${{ ! cancelled() }}
Expand Down
Loading