-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun-flash.sh
More file actions
58 lines (51 loc) · 1.15 KB
/
run-flash.sh
File metadata and controls
58 lines (51 loc) · 1.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=45G
#SBATCH --partition=gpu
#SBATCH --reservation=cpsc424gpu
#SBATCH -t 20:00
#SBATCH --job-name=flashattn
#SBATCH --gpus=1
echo "***Purging module files"
echo ""
module purge
echo ""
echo "***Loading CUDA module file"
echo ""
module load CUDA
echo ""
module list
echo ""
echo "***Running nvidia-smi"
echo ""
nvidia-smi
echo ""
echo ""
echo "***Running deviceQuery"
/vast/palmer/apps/avx.grace/software/CUDAcore/11.3.1/extras/demo_suite/deviceQuery
echo ""
echo "***Building matmul"
make clean
make flash_attention
# Now run the code. Note that if you turn on the error check using a
# cpu matmul code to check the answers, you will need more time for
# the job (possibly as much as 2 hours if you run all 4 test cases)
echo ""
echo "***Running Flash Attention module (n)"
# time ./flash_attention 128
# echo ""
# time ./flash_attention 256
# echo ""
# time ./flash_attention 512
# echo ""
# time ./flash_attention 1024 # GPT2
# echo ""
# time ./flash_attention 2048
# echo ""
# time ./flash_attention 4096
# echo ""
time ./flash_attention 8192
echo ""
echo "***All Done."