flash-attention-cuda/run-flash.sh at master · Danqi7/flash-attention-cuda · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash

#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --mem-per-cpu=45G
#SBATCH --partition=gpu
#SBATCH --reservation=cpsc424gpu
#SBATCH -t 20:00
#SBATCH --job-name=flashattn
#SBATCH --gpus=1

echo "***Purging module files"
echo ""
module purge
echo ""
echo "***Loading CUDA module file"
echo ""
module load CUDA
echo ""
module list

echo ""
echo "***Running nvidia-smi"
echo ""
nvidia-smi
echo ""
echo ""

echo "***Running deviceQuery"
/vast/palmer/apps/avx.grace/software/CUDAcore/11.3.1/extras/demo_suite/deviceQuery
echo ""

echo "***Building matmul"
make clean
make flash_attention

# Now run the code. Note that if you turn on the error check using a
# cpu matmul code to check the answers, you will need more time for
# the job (possibly as much as 2 hours if you run all 4 test cases)
echo ""
echo "***Running Flash Attention module (n)"
# time ./flash_attention 128
# echo ""
# time ./flash_attention 256
# echo ""
# time ./flash_attention 512
# echo ""
# time ./flash_attention 1024 # GPT2
# echo ""
# time ./flash_attention 2048
# echo ""
# time ./flash_attention 4096
# echo ""
time ./flash_attention 8192
echo ""

echo "***All Done."