-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 3ce4a47
Showing
15 changed files
with
1,655 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# CPU_GFLOPS | ||
|
||
## 简介 | ||
|
||
测试硬件平台的峰值浮点性能。在pigirons的基础上增加了对ARM64平台的支持。 | ||
pigirons链接:https://github.com/pigirons/cpufp | ||
|
||
硬件平台:x86、ARM64 | ||
操作系统:Linux | ||
|
||
## 使用方法 | ||
|
||
x86 build: | ||
sh build_x86.sh | ||
|
||
x86 test: | ||
./cpufp_x86 num_cores | ||
|
||
|
||
ARM64 build: | ||
sh build_a64.sh | ||
|
||
ARM64 test: | ||
./cpufp_a64 num_cores | ||
|
||
clean: | ||
sh clean.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
bash clean.sh | ||
gcc -pthread -O3 -c smtl.c | ||
as -o cpufp_a64_fma.o cpufp_a64_fma.s | ||
gcc -pthread -O3 -D_USE_A64_FMA -c cpufp.c | ||
gcc -pthread -O3 -o cpufp_a64 smtl.o cpufp.o cpufp_a64_fma.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
gcc -O3 -c cpuid_x86.c | ||
gcc -O3 -o cpuid_x86 cpuid_x86.o | ||
./cpuid_x86 > gen.sh | ||
chmod 744 gen.sh | ||
./gen.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
rm -f *.o cpufp_x86 cpuid_x86 gen.sh cpufp_a64 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
#define _GNU_SOURCE | ||
#include "cpufp_x86.h" | ||
#include "cpufp_a64.h" | ||
#include "smtl.h" | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <time.h> | ||
|
||
static double get_time(struct timespec *start, | ||
struct timespec *end) | ||
{ | ||
return end->tv_sec - start->tv_sec + | ||
(end->tv_nsec - start->tv_nsec) * 1e-9; | ||
} | ||
|
||
#if defined(_USE_X86_SSE) | ||
|
||
#define FP32_COMP (0x60000000L * 64) | ||
#define FP64_COMP (0x60000000L * 32) | ||
|
||
static void thread_func_fp32(void *params) | ||
{ | ||
cpufp_x86_sse_fp32(); | ||
} | ||
|
||
static void thread_func_fp64(void *params) | ||
{ | ||
cpufp_x86_sse_fp64(); | ||
} | ||
|
||
#elif defined(_USE_X86_AVX) | ||
|
||
#define FP32_COMP (0x200000000L * 16) | ||
#define FP64_COMP (0x200000000L * 8) | ||
|
||
static void thread_func_fp32(void *params) | ||
{ | ||
cpufp_x86_avx_fp32(); | ||
} | ||
|
||
static void thread_func_fp64(void *params) | ||
{ | ||
cpufp_x86_avx_fp64(); | ||
} | ||
|
||
#elif defined(_USE_X86_FMA) | ||
|
||
#define FP32_COMP (0x80000000L * 160) | ||
#define FP64_COMP (0x80000000L * 80) | ||
|
||
static void thread_func_fp32(void *params) | ||
{ | ||
cpufp_x86_fma_fp32(); | ||
} | ||
|
||
static void thread_func_fp64(void *params) | ||
{ | ||
cpufp_x86_fma_fp64(); | ||
} | ||
|
||
#elif defined(_USE_A64_FMA) | ||
|
||
#define FP32_COMP (0x40000000L * 80) | ||
#define FP64_COMP (0x40000000L * 40) | ||
|
||
static void thread_func_fp32(void *params) | ||
{ | ||
cpufp_a64_fma_fp32(); | ||
} | ||
|
||
static void thread_func_fp64(void *params) | ||
{ | ||
cpufp_a64_fma_fp64(); | ||
} | ||
|
||
#endif | ||
|
||
static void cpufp_x86(int num_cores) | ||
{ | ||
int i; | ||
struct timespec start, end; | ||
double time_used, perf; | ||
|
||
smtl_handle sh; | ||
smtl_init(&sh, num_cores); | ||
|
||
// warm up | ||
for (i = 0; i < num_cores; i++) | ||
{ | ||
smtl_add_task(sh, thread_func_fp32, NULL); | ||
} | ||
smtl_begin_tasks(sh); | ||
smtl_wait_tasks_finished(sh); | ||
|
||
clock_gettime(CLOCK_MONOTONIC_RAW, &start); | ||
for (i = 0; i < num_cores; i++) | ||
{ | ||
smtl_add_task(sh, thread_func_fp32, NULL); | ||
} | ||
smtl_begin_tasks(sh); | ||
smtl_wait_tasks_finished(sh); | ||
clock_gettime(CLOCK_MONOTONIC_RAW, &end); | ||
|
||
time_used = get_time(&start, &end); | ||
perf = FP32_COMP * num_cores / time_used * 1e-9; | ||
printf("FP32 perf: %.4lf GFLOPS.\n", perf); | ||
|
||
// warm up | ||
for (i = 0; i < num_cores; i++) | ||
{ | ||
smtl_add_task(sh, thread_func_fp64, NULL); | ||
} | ||
smtl_begin_tasks(sh); | ||
smtl_wait_tasks_finished(sh); | ||
|
||
clock_gettime(CLOCK_MONOTONIC_RAW, &start); | ||
for (i = 0; i < num_cores; i++) | ||
{ | ||
smtl_add_task(sh, thread_func_fp64, NULL); | ||
} | ||
smtl_begin_tasks(sh); | ||
smtl_wait_tasks_finished(sh); | ||
clock_gettime(CLOCK_MONOTONIC_RAW, &end); | ||
|
||
time_used = get_time(&start, &end); | ||
perf = FP64_COMP * num_cores / time_used * 1e-9; | ||
printf("FP64 perf: %.4lf GFLOPS.\n", perf); | ||
|
||
smtl_fini(sh); | ||
} | ||
|
||
int main(int argc, char *argv[]) | ||
{ | ||
if (argc != 2) | ||
{ | ||
fprintf(stderr, "Usage: %s num_cores.\n", argv[0]); | ||
exit(0); | ||
} | ||
|
||
int num_cores = atoi(argv[1]); | ||
printf("Core used: %d\n", num_cores); | ||
|
||
cpufp_x86(num_cores); | ||
|
||
return 0; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#ifndef _CPUFP_A64_H | ||
#define _CPUFP_A64_H | ||
|
||
#ifdef _USE_A64_FMA | ||
void cpufp_a64_fma_fp32(); | ||
void cpufp_a64_fma_fp64(); | ||
#endif | ||
|
||
#endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
.globl cpufp_a64_fma_fp32 | ||
.globl cpufp_a64_fma_fp64 | ||
|
||
cpufp_a64_fma_fp32: | ||
ldr x0, =0x40000000 | ||
eor v0.16b, v0.16b, v0.16b | ||
eor v1.16b, v1.16b, v1.16b | ||
eor v2.16b, v2.16b, v2.16b | ||
eor v3.16b, v3.16b, v3.16b | ||
eor v4.16b, v4.16b, v4.16b | ||
eor v5.16b, v5.16b, v5.16b | ||
eor v6.16b, v6.16b, v6.16b | ||
eor v7.16b, v7.16b, v7.16b | ||
eor v8.16b, v8.16b, v8.16b | ||
eor v9.16b, v9.16b, v9.16b | ||
.cpufp.a64.fma.fp32.L1: | ||
fmla v0.4s, v0.4s, v0.4s | ||
fmla v1.4s, v1.4S, v1.4s | ||
fmla v2.4s, v2.4s, v2.4s | ||
fmla v3.4s, v3.4s, v3.4s | ||
fmla v4.4s, v4.4s, v4.4s | ||
subs x0, x0, #1 | ||
fmla v5.4s, v5.4s, v5.4s | ||
fmla v6.4s, v6.4s, v6.4s | ||
fmla v7.4s, v7.4s, v7.4s | ||
fmla v8.4s, v8.4s, v8.4s | ||
fmla v9.4s, v9.4s, v9.4s | ||
bne .cpufp.a64.fma.fp32.L1 | ||
ret | ||
|
||
cpufp_a64_fma_fp64: | ||
ldr x0, =0x40000000 | ||
eor v0.16b, v0.16b, v0.16b | ||
eor v1.16b, v1.16b, v1.16b | ||
eor v2.16b, v2.16b, v2.16b | ||
eor v3.16b, v3.16b, v3.16b | ||
eor v4.16b, v4.16b, v4.16b | ||
eor v5.16b, v5.16b, v5.16b | ||
eor v6.16b, v6.16b, v6.16b | ||
eor v7.16b, v7.16b, v7.16b | ||
eor v8.16b, v8.16b, v8.16b | ||
eor v9.16b, v9.16b, v9.16b | ||
.cpufp.a64.fma.fp64.L1: | ||
fmla v0.2d, v0.2d, v0.2d | ||
fmla v1.2d, v1.2d, v1.2d | ||
fmla v2.2d, v2.2d, v2.2d | ||
fmla v3.2d, v3.2d, v3.2d | ||
fmla v4.2d, v4.2d, v4.2d | ||
subs x0, x0, #1 | ||
fmla v5.2d, v5.2d, v5.2d | ||
fmla v6.2d, v6.2d, v6.2d | ||
fmla v7.2d, v7.2d, v7.2d | ||
fmla v8.2d, v8.2d, v8.2d | ||
fmla v9.2d, v9.2d, v9.2d | ||
bne .cpufp.a64.fma.fp64.L1 | ||
ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#ifndef _CPUFP_X86_H | ||
#define _CPUFP_X86_H | ||
|
||
#ifdef _USE_X86_SSE | ||
void cpufp_x86_sse_fp32(); | ||
void cpufp_x86_sse_fp64(); | ||
#endif | ||
|
||
#ifdef _USE_X86_AVX | ||
void cpufp_x86_avx_fp32(); | ||
void cpufp_x86_avx_fp64(); | ||
#endif | ||
|
||
#ifdef _USE_X86_FMA | ||
void cpufp_x86_fma_fp32(); | ||
void cpufp_x86_fma_fp64(); | ||
#endif | ||
|
||
#endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
.globl cpufp_x86_avx_fp32 | ||
.globl cpufp_x86_avx_fp64 | ||
|
||
cpufp_x86_avx_fp32: | ||
mov $0x200000000, %rax | ||
vxorps %ymm1, %ymm1, %ymm1 | ||
vxorps %ymm2, %ymm2, %ymm2 | ||
vxorps %ymm3, %ymm3, %ymm3 | ||
vxorps %ymm4, %ymm4, %ymm4 | ||
.cpufp.x86.avx.fp32.L1: | ||
vmulps %ymm2, %ymm2, %ymm1 | ||
vaddps %ymm4, %ymm4, %ymm3 | ||
sub $0x1, %rax | ||
jne .cpufp.x86.avx.fp32.L1 | ||
ret | ||
|
||
cpufp_x86_avx_fp64: | ||
mov $0x200000000, %rax | ||
vxorpd %ymm1, %ymm1, %ymm1 | ||
vxorpd %ymm2, %ymm2, %ymm2 | ||
vxorpd %ymm3, %ymm3, %ymm3 | ||
vxorpd %ymm4, %ymm4, %ymm4 | ||
.cpufp.x86.avx.fp64.L1: | ||
vmulpd %ymm2, %ymm2, %ymm1 | ||
vaddpd %ymm4, %ymm4, %ymm3 | ||
sub $0x1, %rax | ||
jne .cpufp.x86.avx.fp64.L1 | ||
ret | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
.globl cpufp_x86_fma_fp32 | ||
.globl cpufp_x86_fma_fp64 | ||
|
||
cpufp_x86_fma_fp32: | ||
mov $0x80000000, %rax | ||
vxorps %ymm0, %ymm0, %ymm0 | ||
vxorps %ymm1, %ymm1, %ymm1 | ||
vxorps %ymm2, %ymm2, %ymm2 | ||
vxorps %ymm3, %ymm3, %ymm3 | ||
vxorps %ymm4, %ymm4, %ymm4 | ||
vxorps %ymm5, %ymm5, %ymm5 | ||
vxorps %ymm6, %ymm6, %ymm6 | ||
vxorps %ymm7, %ymm7, %ymm7 | ||
vxorps %ymm8, %ymm8, %ymm8 | ||
vxorps %ymm9, %ymm9, %ymm9 | ||
.cpufp.x86.fma.fp32.L1: | ||
vfmadd132ps %ymm0, %ymm0, %ymm0 | ||
vfmadd132ps %ymm1, %ymm1, %ymm1 | ||
vfmadd132ps %ymm2, %ymm2, %ymm2 | ||
vfmadd132ps %ymm3, %ymm3, %ymm3 | ||
vfmadd132ps %ymm4, %ymm4, %ymm4 | ||
vfmadd132ps %ymm5, %ymm5, %ymm5 | ||
vfmadd132ps %ymm6, %ymm6, %ymm6 | ||
vfmadd132ps %ymm7, %ymm7, %ymm7 | ||
vfmadd132ps %ymm8, %ymm8, %ymm8 | ||
vfmadd132ps %ymm9, %ymm9, %ymm9 | ||
sub $0x1, %rax | ||
jne .cpufp.x86.fma.fp32.L1 | ||
ret | ||
|
||
cpufp_x86_fma_fp64: | ||
mov $0x80000000, %rax | ||
vxorpd %ymm0, %ymm0, %ymm0 | ||
vxorpd %ymm1, %ymm1, %ymm1 | ||
vxorpd %ymm2, %ymm2, %ymm2 | ||
vxorpd %ymm3, %ymm3, %ymm3 | ||
vxorpd %ymm4, %ymm4, %ymm4 | ||
vxorpd %ymm5, %ymm5, %ymm5 | ||
vxorpd %ymm6, %ymm6, %ymm6 | ||
vxorpd %ymm7, %ymm7, %ymm7 | ||
vxorpd %ymm8, %ymm8, %ymm8 | ||
vxorpd %ymm9, %ymm9, %ymm9 | ||
.cpufp.x86.fma.fp64.L1: | ||
vfmadd132pd %ymm0, %ymm0, %ymm0 | ||
vfmadd132pd %ymm1, %ymm1, %ymm1 | ||
vfmadd132pd %ymm2, %ymm2, %ymm2 | ||
vfmadd132pd %ymm3, %ymm3, %ymm3 | ||
vfmadd132pd %ymm4, %ymm4, %ymm4 | ||
vfmadd132pd %ymm5, %ymm5, %ymm5 | ||
vfmadd132pd %ymm6, %ymm6, %ymm6 | ||
vfmadd132pd %ymm7, %ymm7, %ymm7 | ||
vfmadd132pd %ymm8, %ymm8, %ymm8 | ||
vfmadd132pd %ymm9, %ymm9, %ymm9 | ||
sub $0x1, %rax | ||
jne .cpufp.x86.fma.fp64.L1 | ||
ret | ||
|
Oops, something went wrong.