From 0899ade4342e2841735940a666e57c7e69ceb355 Mon Sep 17 00:00:00 2001 From: runame Date: Wed, 30 Oct 2024 12:10:03 +0100 Subject: [PATCH 1/6] Add single GPU tests workflow --- .github/workflows/gpu-tests.yaml | 24 ++++++++++++++++++++++++ CONTRIBUTING.md | 2 +- makefile | 4 ++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/gpu-tests.yaml diff --git a/.github/workflows/gpu-tests.yaml b/.github/workflows/gpu-tests.yaml new file mode 100644 index 0000000..313fbc2 --- /dev/null +++ b/.github/workflows/gpu-tests.yaml @@ -0,0 +1,24 @@ +name: gpu-tests + +on: [push, pull_request] + +jobs: + gpu-tests: + name: "Python 3.10" + runs-on: 4-core-ubuntu-gpu-t4 + steps: + - uses: actions/checkout@v4 + - name: Set up and update uv. + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + uv self update + - name: Install Python. + run: uv python install 3.10 + - name: Create venv and install the package. + run: | + uv venv && source .venv/bin/activate + uv pip install -e ".[dev]" + - name: Run tests. + run: | + source .venv/bin/activate + make test-gpu diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f1ae9ba..5445f46 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,7 +11,7 @@ We actively welcome your pull requests for existing optimizers. 1. Fork the repo and create your branch from `main`. Install the package inside of your Python environment with `pip install -e ".[dev]"`. 2. If you've added code that should be tested, add tests. 3. If you've changed APIs, update the documentation. -4. Ensure the test suite passes. To run the subset of the tests that can be run on CPU use `make test`; to run the subset of tests that require four GPUs use `make test-gpu`. +4. Ensure the test suite passes. To run the subset of the tests that can be run on CPU use `make test`; to run the tests for a single GPU use `make test-gpu` and to run the subset of tests that require 2-4 GPUs use `make test-multi-gpu`. 5. Make sure your code lints. You can use `make lint` and `make format` to automatically lint and format the code where possible. 6. If you haven't already, complete the Contributor License Agreement ("CLA"). diff --git a/makefile b/makefile index 8d9dd29..3a705e2 100644 --- a/makefile +++ b/makefile @@ -14,6 +14,10 @@ test: @torchrun --standalone --nnodes=1 --nproc_per_node=2 -m unittest distributed_shampoo/utils/gpu_tests/shampoo_ddp_distributor_test.py test-gpu: + @python3 -m unittest discover -s distributed_shampoo/gpu_tests/ -p "*_test.py" + @python3 -m unittest distributed_shampoo/utils/gpu_tests/shampoo_dist_utils_test.py + +test-multi-gpu: @torchrun --standalone --nnodes=1 --nproc_per_node=2 -m unittest discover -s distributed_shampoo/gpu_tests/ -p "*_test.py" @torchrun --standalone --nnodes=1 --nproc_per_node=2 -m unittest distributed_shampoo/utils/gpu_tests/shampoo_dist_utils_test.py @torchrun --standalone --nnodes=1 --nproc_per_node=2 -m unittest distributed_shampoo/utils/gpu_tests/shampoo_ddp_distributor_test.py From 492d9bf1a0798ff55f19bdc71da9e93de8ee1f58 Mon Sep 17 00:00:00 2001 From: runame Date: Wed, 30 Oct 2024 12:15:28 +0100 Subject: [PATCH 2/6] Fix uv installation in gpu test workflow --- .github/workflows/gpu-tests.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gpu-tests.yaml b/.github/workflows/gpu-tests.yaml index 313fbc2..7ac6f2d 100644 --- a/.github/workflows/gpu-tests.yaml +++ b/.github/workflows/gpu-tests.yaml @@ -11,6 +11,7 @@ jobs: - name: Set up and update uv. run: | curl -LsSf https://astral.sh/uv/install.sh | sh + source $HOME/.cargo/env uv self update - name: Install Python. run: uv python install 3.10 From f91825c74b638a5cf39c4fada37eca04ba4472bc Mon Sep 17 00:00:00 2001 From: runame Date: Wed, 30 Oct 2024 14:48:45 +0100 Subject: [PATCH 3/6] Debuggin of triton error --- .github/workflows/gpu-tests.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/gpu-tests.yaml b/.github/workflows/gpu-tests.yaml index 7ac6f2d..ab50d71 100644 --- a/.github/workflows/gpu-tests.yaml +++ b/.github/workflows/gpu-tests.yaml @@ -19,6 +19,10 @@ jobs: run: | uv venv && source .venv/bin/activate uv pip install -e ".[dev]" + - name: Check Triton installation. + run: | + source .venv/bin/activate + python3 -c "import triton; print(triton.__version__)" - name: Run tests. run: | source .venv/bin/activate From 1151f0ade2dfa011bbdf561c0397c231b9412868 Mon Sep 17 00:00:00 2001 From: runame Date: Wed, 30 Oct 2024 15:20:36 +0100 Subject: [PATCH 4/6] More debugging --- .github/workflows/gpu-tests.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gpu-tests.yaml b/.github/workflows/gpu-tests.yaml index ab50d71..4b6c832 100644 --- a/.github/workflows/gpu-tests.yaml +++ b/.github/workflows/gpu-tests.yaml @@ -18,6 +18,7 @@ jobs: - name: Create venv and install the package. run: | uv venv && source .venv/bin/activate + uv pip install setuptools uv pip install -e ".[dev]" - name: Check Triton installation. run: | From 6c525300486c11c1d829dcd7d794fb94564f90b2 Mon Sep 17 00:00:00 2001 From: runame Date: Wed, 30 Oct 2024 15:23:05 +0100 Subject: [PATCH 5/6] Remove debugging code --- .github/workflows/gpu-tests.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/gpu-tests.yaml b/.github/workflows/gpu-tests.yaml index 4b6c832..b41338f 100644 --- a/.github/workflows/gpu-tests.yaml +++ b/.github/workflows/gpu-tests.yaml @@ -20,10 +20,6 @@ jobs: uv venv && source .venv/bin/activate uv pip install setuptools uv pip install -e ".[dev]" - - name: Check Triton installation. - run: | - source .venv/bin/activate - python3 -c "import triton; print(triton.__version__)" - name: Run tests. run: | source .venv/bin/activate From 2f8effaa919b0bb71d2f2782de5789eb5825be9a Mon Sep 17 00:00:00 2001 From: runame Date: Wed, 30 Oct 2024 15:24:30 +0100 Subject: [PATCH 6/6] Add GPU tests badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 27c8d2c..0ac482e 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ [![Python 3.10 | 3.11 | 3.12](https://img.shields.io/badge/python-3.10_|_3.11_|_3.12-blue.svg)](https://www.python.org/downloads/) ![tests](https://github.com/facebookresearch/optimizers/actions/workflows/tests.yaml/badge.svg) +![gpu-tests](https://github.com/facebookresearch/optimizers/actions/workflows/gpu-tests.yaml/badge.svg) ![lint-ruff](https://github.com/facebookresearch/optimizers/actions/workflows/lint-ruff.yaml/badge.svg) ![format-ruff](https://github.com/facebookresearch/optimizers/actions/workflows/format-ruff.yaml/badge.svg) ![format-usort](https://github.com/facebookresearch/optimizers/actions/workflows/format-usort.yaml/badge.svg)