Skip to content

Commit b0afe69

Browse files
committed
Merge branch 'awarno/haproxy' of https://github.com/NVIDIA-NeMo/Evaluator into awarno/haproxy
2 parents df2cbfd + 4f113b7 commit b0afe69

File tree

334 files changed

+38573
-2564
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

334 files changed

+38573
-2564
lines changed

.github/actions/test-template/action.yml

Lines changed: 165 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,45 @@ inputs:
2626
description: "Failure will cancel all other tests if set to true"
2727
required: false
2828
default: "false"
29-
is_unit_test:
30-
description: "Upload coverage as unit test"
29+
type_of_test:
30+
description: "Type of test to run"
3131
required: false
32-
default: "false"
32+
default: "unit_tests"
3333
package:
34-
description: "Package to test"
35-
required: true
34+
description: "Package to test. Only required for unit and functional tests."
35+
required: false
36+
use-credentials:
37+
description: "Use credentials for test"
38+
required: false
39+
default: "false"
40+
ngc-api-user:
41+
description: "NGC API user"
42+
required: false
43+
default: "false"
44+
ngc-api-key:
45+
description: "NGC API key"
46+
required: false
47+
default: "false"
48+
cpu-only:
49+
description: "CPU only"
50+
required: false
51+
default: "true"
52+
has-azure-credentials:
53+
description: "Has Azure credentials"
54+
required: false
55+
default: "false"
56+
azure-client-id:
57+
description: "Azure client ID"
58+
required: false
59+
default: "false"
60+
azure-tenant-id:
61+
description: "Azure tenant ID"
62+
required: false
63+
default: "false"
64+
azure-subscription-id:
65+
description: "Azure subscription ID"
66+
required: false
67+
default: "false"
3668

3769
runs:
3870
using: "composite"
@@ -42,30 +74,139 @@ runs:
4274
with:
4375
path: Eval
4476

45-
- name: Build container
77+
- name: Validate type of test
78+
shell: bash
79+
run: |
80+
echo ::group::Validate type of test
81+
if [[ "${{ inputs.type_of_test }}" != "unit_tests" && "${{ inputs.type_of_test }}" != "functional_tests" && "${{ inputs.type_of_test }}" != "integration_tests" ]]; then
82+
echo "Invalid type of test: ${{ inputs.type_of_test }}"
83+
exit 1
84+
fi
85+
echo ::endgroup::
86+
87+
- name: Use credentials
88+
shell: bash
89+
if: ${{ inputs.use-credentials == 'true' }}
90+
run: |
91+
echo ::group::Use credentials
92+
echo ${{ inputs.ngc-api-key }} | docker login -u '${{ inputs.ngc-api-user }}' --password-stdin nvcr.io
93+
echo ::endgroup::
94+
95+
- name: Build container for unit and functional tests
96+
shell: bash
97+
if: ${{ inputs.type_of_test != 'integration_tests' }}
98+
run: |
99+
echo ::group::Build test container
100+
docker build -f docker/Dockerfile.ci \
101+
--build-arg PACKAGE=${{ inputs.package }} \
102+
-t eval .
103+
echo ::endgroup::
104+
105+
- name: Build container for integration tests
46106
shell: bash
107+
if: ${{ inputs.type_of_test == 'integration_tests' }}
47108
run: |
48109
echo ::group::Build test container
49-
docker build -f docker/Dockerfile.ci --build-arg PACKAGE=${{ inputs.package }} -t eval .
110+
docker build -f docker/Dockerfile.ci-integration \
111+
-t eval .
50112
echo ::endgroup::
51113
114+
- name: Azure Login
115+
if: ${{ inputs.has-azure-credentials == 'true' }}
116+
uses: azure/login@v2
117+
with:
118+
client-id: ${{ inputs.azure-client-id }}
119+
tenant-id: ${{ inputs.azure-tenant-id }}
120+
subscription-id: ${{ inputs.azure-subscription-id }}
121+
122+
- name: Azure Fileshare
123+
if: ${{ inputs.has-azure-credentials == 'true' && inputs.type_of_test != 'unit-tests' }}
124+
shell: bash
125+
id: azure-fileshare
126+
run: |
127+
echo "::group::Mount SMB drive"
128+
sudo apt update
129+
sudo apt install -y cifs-utils
130+
131+
RESOURCE_GROUP_NAME="azure-gpu-vm-runner_group"
132+
STORAGE_ACCOUNT_NAME="nemocistorageaccount2"
133+
FILE_SHARE_NAME="fileshare"
134+
135+
MNT_PATH="/mnt/datadrive"
136+
137+
sudo mkdir -p $MNT_PATH
138+
139+
# Create a folder to store the credentials for this storage account and
140+
# any other that you might set up.
141+
CREDENTIAL_ROOT="/etc/smbcredentials"
142+
sudo mkdir -p "/etc/smbcredentials"
143+
144+
# Get the storage account key for the indicated storage account.
145+
# You must be logged in with az login and your user identity must have
146+
# permissions to list the storage account keys for this command to work.
147+
STORAGE_ACCOUNT_KEY=$(az storage account keys list \
148+
--resource-group $RESOURCE_GROUP_NAME \
149+
--account-name $STORAGE_ACCOUNT_NAME \
150+
--query "[0].value" --output tsv | tr -d '"')
151+
152+
# Create the credential file for this individual storage account
153+
SMB_CREDENTIAL_FILE="$CREDENTIAL_ROOT/$STORAGE_ACCOUNT_NAME.cred"
154+
if [ ! -f $SMB_CREDENTIAL_FILE ]; then
155+
echo "username=$STORAGE_ACCOUNT_NAME" | sudo tee $SMB_CREDENTIAL_FILE > /dev/null
156+
echo "password=$STORAGE_ACCOUNT_KEY" | sudo tee -a $SMB_CREDENTIAL_FILE > /dev/null
157+
else
158+
echo "The credential file $SMB_CREDENTIAL_FILE already exists, and was not modified."
159+
fi
160+
161+
# Change permissions on the credential file so only root can read or modify the password file.
162+
sudo chmod 600 $SMB_CREDENTIAL_FILE
163+
164+
# This command assumes you have logged in with az login
165+
HTTP_ENDPOINT=$(az storage account show --resource-group $RESOURCE_GROUP_NAME --name $STORAGE_ACCOUNT_NAME --query "primaryEndpoints.file" --output tsv | tr -d '"')
166+
SMB_PATH=$(echo $HTTP_ENDPOINT | cut -c7-${#HTTP_ENDPOINT})$FILE_SHARE_NAME
167+
168+
STORAGE_ACCOUNT_KEY=$(az storage account keys list --resource-group $RESOURCE_GROUP_NAME --account-name $STORAGE_ACCOUNT_NAME --query "[0].value" --output tsv | tr -d '"')
169+
170+
sudo mount -t cifs $SMB_PATH $MNT_PATH -o credentials=$SMB_CREDENTIAL_FILE,serverino,nosharesock,actimeo=30,mfsymlinks,fsc,cache=strict
171+
172+
ls -al $MNT_PATH/TestData
173+
echo "::endgroup::"
174+
52175
- name: Start container
53176
shell: bash
177+
env:
178+
MOUNT_FS: ${{ inputs.type_of_test == 'integration_tests' || inputs.type_of_test == 'functional_tests' }}
54179
run: |
55-
echo ::group::Start test container
180+
echo "::group::Start test container"
181+
if [[ "$MOUNT_FS" == "true" ]]; then
182+
ls -al /mnt/datadrive/TestData
183+
VOLUME_ARGS="--volume /mnt/datadrive/TestData:/home/TestData"
184+
else
185+
VOLUME_ARGS=""
186+
fi
187+
188+
ARG=("")
189+
if [[ "${{ inputs.type_of_test }}" == "integration_tests" ]]; then
190+
ARG=("--runtime=nvidia --gpus all")
191+
fi
192+
56193
cmd=$(cat <<RUN_TEST_EOF
57194
#!/bin/bash
58195
docker container rm -f nemo_container_${{ github.run_id }} || true
196+
59197
docker run \
60198
--rm \
61199
-d \
62200
--name nemo_container_${{ github.run_id }} \
201+
${ARG[@]} \
63202
--shm-size=64g \
64-
--env TRANSFORMERS_OFFLINE=0 \
203+
--env TRANSFORMERS_OFFLINE=1 \
65204
--env HYDRA_FULL_ERROR=1 \
205+
--env HF_HUB_CACHE_PATH=/home/TestData/HF_HOME/hub \
66206
--env HF_HOME=/home/TestData/HF_HOME \
67207
--env RUN_ID=${{ github.run_id }} \
68208
--volume $(pwd)/Eval:/workspace \
209+
$VOLUME_ARGS \
69210
eval \
70211
bash -c "sleep $(( ${{ inputs.timeout }} * 60 + 60 ))"
71212
RUN_TEST_EOF
@@ -80,16 +221,20 @@ runs:
80221
shell: bash
81222
run: |
82223
echo ::group::Create run-script
83-
COVERAGE_PREFIX=$([[ "${{ inputs.is_unit_test }}" == "true" ]] && echo "unit-test" || echo "e2e")
224+
COVERAGE_PREFIX=$([[ "${{ inputs.type_of_test }}" == "unit_tests" ]] && echo "unit-test" || echo "e2e")
84225
echo "coverage-prefix=$COVERAGE_PREFIX" | tee -a "$GITHUB_OUTPUT"
85226
86227
cmd=$(cat <<'RUN_TEST_EOF'
87228
#!/bin/bash
88229
89230
docker exec -t nemo_container_${{ github.run_id }} bash -c '
90231
set -e
91-
cd packages/${{ inputs.package }}
92-
bash tests/${{ inputs.is_unit_test == 'true' && 'unit_tests' || 'functional_tests' }}/${{ inputs.script }}.sh
232+
if [[ "${{ inputs.type_of_test }}" != "integration_tests" ]]; then
233+
cd packages/${{ inputs.package }}
234+
bash tests/${{ inputs.type_of_test == 'unit_tests' && 'unit_tests' || 'functional_tests' }}/${{ inputs.script }}.sh
235+
else
236+
bash tests/integration_tests/Launch_Integration_Tests.sh
237+
fi
93238
'
94239
95240
RUN_TEST_EOF
@@ -115,9 +260,14 @@ runs:
115260
shell: bash
116261
run: |
117262
echo ::group::Check result
118-
docker cp nemo_container_${{ github.run_id }}:/workspace/packages/${{ inputs.package }}/.coverage .coverage.${{ inputs.package }}
119263
120-
coverage_report=coverage-${{ steps.create.outputs.coverage-prefix }}-${{ github.run_id }}-$(uuidgen)
264+
if [[ "${{ inputs.type_of_test }}" != "integration_tests" ]]; then
265+
docker cp nemo_container_${{ github.run_id }}:/workspace/packages/${{ inputs.package }}/.coverage .coverage.${{ inputs.package }}
266+
coverage_report=coverage-${{ steps.create.outputs.coverage-prefix }}-${{ github.run_id }}-$(uuidgen)
267+
else
268+
coverage_report=none
269+
fi
270+
121271
echo "coverage_report=$coverage_report" >> "$GITHUB_OUTPUT"
122272
123273
EXIT_CODE=${{ steps.run-main-script.outputs.exit_code }}
@@ -138,6 +288,7 @@ runs:
138288
139289
- name: Test coverage
140290
shell: bash -x -e -u -o pipefail {0}
291+
if: ${{ steps.check.outputs.coverage_report != 'none' }}
141292
run: |
142293
echo ::group::Test coverage
143294
docker exec -t nemo_container_${{ github.run_id }} bash -c 'cd /workspace/packages/${{ inputs.package }} && /opt/venv/bin/coverage report -i'

.github/config/requirements.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@ sphinx-copybutton # Adds a copy button for code blocks
55
nvidia-sphinx-theme # Our NVIDIA theme
66
sphinxcontrib-mermaid # For mermaid diagrams
77
myst-parser # For our markdown docs
8-
sphinx-design
9-
sphinxcontrib-mermaid
10-
swagger-plugin-for-sphinx
8+
sphinx-design # For our design elements
9+
sphinxcontrib-mermaid # For mermaid diagrams
10+
swagger-plugin-for-sphinx # For Swagger API documentation

.github/labeler.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
nemo-evaluator:
2+
- packages/nemo-evaluator/**/*
3+
- docs/libraries/nemo-evaluator/**/*
4+
5+
nemo-evaluator-launcher:
6+
- packages/nemo-evaluator-launcher/**/*
7+
- docs/libraries/nemo-evaluator-launcher/**/*
8+
9+
documentation:
10+
- docs/**/*
11+
- README.md
12+
- tutorials/**/*
13+
14+
tests:
15+
- tests/**/*
16+
- packages/nemo-evaluator/tests/**/*
17+
- packages/nemo-evaluator-launcher/tests/**/*
18+
19+
scripts:
20+
- scripts/**/*
21+
22+
CI:
23+
- .github/**/*
24+
- Dockerfile
25+
- docker/**/*
26+

.github/workflows/build-docs.yml

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,17 @@ jobs:
5050
GH_TOKEN: ${{ github.token }}
5151
RUN_ID: ${{ github.run_id }}
5252
SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' }}
53+
BUILD_DOCS_RESULT: ${{ needs.build-docs.result }}
5354
run: |
54-
FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
55-
56-
if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
57-
echo "✅ All previous jobs completed successfully"
58-
exit 0
55+
echo "build-docs job result: ${BUILD_DOCS_RESULT}"
56+
57+
if [ "${BUILD_DOCS_RESULT}" == "success" ]; then
58+
echo "✅ Build docs completed successfully"
59+
elif [ "${BUILD_DOCS_RESULT}" == "skipped" ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
60+
echo "⏭️ Build docs was skipped"
5961
else
60-
echo "❌ Found $FAILED_JOBS failed job(s)"
61-
# Show which jobs failed
62-
gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
63-
exit 1
62+
echo "⚠️ Build docs failed (allowed to fail)"
6463
fi
64+
65+
# Always exit 0 - build-docs is allowed to fail
66+
exit 0

.github/workflows/build-test-publish-wheel.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
if: |
3636
!(needs.pre-flight.outputs.docs_only == 'true'
3737
|| needs.pre-flight.outputs.is_deployment_workflow == 'true')
38-
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.60.0
38+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.64.2
3939
with:
4040
dry-run: true
4141
python-package: nemo_evaluator
@@ -70,7 +70,7 @@ jobs:
7070
if: |
7171
!(needs.pre-flight.outputs.docs_only == 'true'
7272
|| needs.pre-flight.outputs.is_deployment_workflow == 'true')
73-
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.60.0
73+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.64.2
7474
with:
7575
dry-run: true
7676
python-package: nemo_evaluator_launcher

0 commit comments

Comments
 (0)