diff --git a/.github/workflows/examples.yaml b/.github/workflows/examples.yaml index 1c9af43..1c42367 100644 --- a/.github/workflows/examples.yaml +++ b/.github/workflows/examples.yaml @@ -22,16 +22,16 @@ jobs: - name: Run default example on CPU. run: | source .venv/bin/activate - CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.default_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 30 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --batch-size 1024 + CUDA_VISIBLE_DEVICES="" python -m distributed_shampoo.examples.default_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 30 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --batch-size 1024 --momentum=0.9 - name: Run default example on GPU. run: | source .venv/bin/activate - python -m distributed_shampoo.examples.default_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 30 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --batch-size 1024 + python -m distributed_shampoo.examples.default_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 30 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --batch-size 1024 --momentum=0.9 - name: Run DDP example on CPU. run: | source .venv/bin/activate - CUDA_VISIBLE_DEVICES="" torchrun --standalone --nnodes=1 --nproc_per_node=2 -m distributed_shampoo.examples.ddp_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 15 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --local-batch-size 1024 --backend gloo + CUDA_VISIBLE_DEVICES="" torchrun --standalone --nnodes=1 --nproc_per_node=2 -m distributed_shampoo.examples.ddp_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 15 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --local-batch-size 1024 --backend gloo --momentum=0.9 - name: Run DDP example on GPU. run: | source .venv/bin/activate - torchrun --standalone --nnodes=1 --nproc_per_node=1 -m distributed_shampoo.examples.ddp_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 30 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --local-batch-size 1024 + torchrun --standalone --nnodes=1 --nproc_per_node=1 -m distributed_shampoo.examples.ddp_cifar10_example --optimizer-type DISTRIBUTED_SHAMPOO --precondition-frequency 30 --grafting-type ADAM --use-bias-correction --use-decoupled-weight-decay --use-merge-dims --epochs 1 --local-batch-size 1024 --momentum=0.9