Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
zkq committed Dec 11, 2018
0 parents commit 3ce4a47
Show file tree
Hide file tree
Showing 15 changed files with 1,655 additions and 0 deletions.
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# CPU_GFLOPS

## 简介

测试硬件平台的峰值浮点性能。在pigirons的基础上增加了对ARM64平台的支持。
pigirons链接:https://github.com/pigirons/cpufp

硬件平台:x86、ARM64
操作系统:Linux

## 使用方法

x86 build:
sh build_x86.sh

x86 test:
./cpufp_x86 num_cores


ARM64 build:
sh build_a64.sh

ARM64 test:
./cpufp_a64 num_cores

clean:
sh clean.sh
5 changes: 5 additions & 0 deletions build_a64.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bash clean.sh
gcc -pthread -O3 -c smtl.c
as -o cpufp_a64_fma.o cpufp_a64_fma.s
gcc -pthread -O3 -D_USE_A64_FMA -c cpufp.c
gcc -pthread -O3 -o cpufp_a64 smtl.o cpufp.o cpufp_a64_fma.o
5 changes: 5 additions & 0 deletions build_x86.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
gcc -O3 -c cpuid_x86.c
gcc -O3 -o cpuid_x86 cpuid_x86.o
./cpuid_x86 > gen.sh
chmod 744 gen.sh
./gen.sh
1 change: 1 addition & 0 deletions clean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rm -f *.o cpufp_x86 cpuid_x86 gen.sh cpufp_a64
149 changes: 149 additions & 0 deletions cpufp.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#define _GNU_SOURCE
#include "cpufp_x86.h"
#include "cpufp_a64.h"
#include "smtl.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

static double get_time(struct timespec *start,
struct timespec *end)
{
return end->tv_sec - start->tv_sec +
(end->tv_nsec - start->tv_nsec) * 1e-9;
}

#if defined(_USE_X86_SSE)

#define FP32_COMP (0x60000000L * 64)
#define FP64_COMP (0x60000000L * 32)

static void thread_func_fp32(void *params)
{
cpufp_x86_sse_fp32();
}

static void thread_func_fp64(void *params)
{
cpufp_x86_sse_fp64();
}

#elif defined(_USE_X86_AVX)

#define FP32_COMP (0x200000000L * 16)
#define FP64_COMP (0x200000000L * 8)

static void thread_func_fp32(void *params)
{
cpufp_x86_avx_fp32();
}

static void thread_func_fp64(void *params)
{
cpufp_x86_avx_fp64();
}

#elif defined(_USE_X86_FMA)

#define FP32_COMP (0x80000000L * 160)
#define FP64_COMP (0x80000000L * 80)

static void thread_func_fp32(void *params)
{
cpufp_x86_fma_fp32();
}

static void thread_func_fp64(void *params)
{
cpufp_x86_fma_fp64();
}

#elif defined(_USE_A64_FMA)

#define FP32_COMP (0x40000000L * 80)
#define FP64_COMP (0x40000000L * 40)

static void thread_func_fp32(void *params)
{
cpufp_a64_fma_fp32();
}

static void thread_func_fp64(void *params)
{
cpufp_a64_fma_fp64();
}

#endif

static void cpufp_x86(int num_cores)
{
int i;
struct timespec start, end;
double time_used, perf;

smtl_handle sh;
smtl_init(&sh, num_cores);

// warm up
for (i = 0; i < num_cores; i++)
{
smtl_add_task(sh, thread_func_fp32, NULL);
}
smtl_begin_tasks(sh);
smtl_wait_tasks_finished(sh);

clock_gettime(CLOCK_MONOTONIC_RAW, &start);
for (i = 0; i < num_cores; i++)
{
smtl_add_task(sh, thread_func_fp32, NULL);
}
smtl_begin_tasks(sh);
smtl_wait_tasks_finished(sh);
clock_gettime(CLOCK_MONOTONIC_RAW, &end);

time_used = get_time(&start, &end);
perf = FP32_COMP * num_cores / time_used * 1e-9;
printf("FP32 perf: %.4lf GFLOPS.\n", perf);

// warm up
for (i = 0; i < num_cores; i++)
{
smtl_add_task(sh, thread_func_fp64, NULL);
}
smtl_begin_tasks(sh);
smtl_wait_tasks_finished(sh);

clock_gettime(CLOCK_MONOTONIC_RAW, &start);
for (i = 0; i < num_cores; i++)
{
smtl_add_task(sh, thread_func_fp64, NULL);
}
smtl_begin_tasks(sh);
smtl_wait_tasks_finished(sh);
clock_gettime(CLOCK_MONOTONIC_RAW, &end);

time_used = get_time(&start, &end);
perf = FP64_COMP * num_cores / time_used * 1e-9;
printf("FP64 perf: %.4lf GFLOPS.\n", perf);

smtl_fini(sh);
}

int main(int argc, char *argv[])
{
if (argc != 2)
{
fprintf(stderr, "Usage: %s num_cores.\n", argv[0]);
exit(0);
}

int num_cores = atoi(argv[1]);
printf("Core used: %d\n", num_cores);

cpufp_x86(num_cores);

return 0;
}

10 changes: 10 additions & 0 deletions cpufp_a64.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef _CPUFP_A64_H
#define _CPUFP_A64_H

#ifdef _USE_A64_FMA
void cpufp_a64_fma_fp32();
void cpufp_a64_fma_fp64();
#endif

#endif

56 changes: 56 additions & 0 deletions cpufp_a64_fma.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
.globl cpufp_a64_fma_fp32
.globl cpufp_a64_fma_fp64

cpufp_a64_fma_fp32:
ldr x0, =0x40000000
eor v0.16b, v0.16b, v0.16b
eor v1.16b, v1.16b, v1.16b
eor v2.16b, v2.16b, v2.16b
eor v3.16b, v3.16b, v3.16b
eor v4.16b, v4.16b, v4.16b
eor v5.16b, v5.16b, v5.16b
eor v6.16b, v6.16b, v6.16b
eor v7.16b, v7.16b, v7.16b
eor v8.16b, v8.16b, v8.16b
eor v9.16b, v9.16b, v9.16b
.cpufp.a64.fma.fp32.L1:
fmla v0.4s, v0.4s, v0.4s
fmla v1.4s, v1.4S, v1.4s
fmla v2.4s, v2.4s, v2.4s
fmla v3.4s, v3.4s, v3.4s
fmla v4.4s, v4.4s, v4.4s
subs x0, x0, #1
fmla v5.4s, v5.4s, v5.4s
fmla v6.4s, v6.4s, v6.4s
fmla v7.4s, v7.4s, v7.4s
fmla v8.4s, v8.4s, v8.4s
fmla v9.4s, v9.4s, v9.4s
bne .cpufp.a64.fma.fp32.L1
ret

cpufp_a64_fma_fp64:
ldr x0, =0x40000000
eor v0.16b, v0.16b, v0.16b
eor v1.16b, v1.16b, v1.16b
eor v2.16b, v2.16b, v2.16b
eor v3.16b, v3.16b, v3.16b
eor v4.16b, v4.16b, v4.16b
eor v5.16b, v5.16b, v5.16b
eor v6.16b, v6.16b, v6.16b
eor v7.16b, v7.16b, v7.16b
eor v8.16b, v8.16b, v8.16b
eor v9.16b, v9.16b, v9.16b
.cpufp.a64.fma.fp64.L1:
fmla v0.2d, v0.2d, v0.2d
fmla v1.2d, v1.2d, v1.2d
fmla v2.2d, v2.2d, v2.2d
fmla v3.2d, v3.2d, v3.2d
fmla v4.2d, v4.2d, v4.2d
subs x0, x0, #1
fmla v5.2d, v5.2d, v5.2d
fmla v6.2d, v6.2d, v6.2d
fmla v7.2d, v7.2d, v7.2d
fmla v8.2d, v8.2d, v8.2d
fmla v9.2d, v9.2d, v9.2d
bne .cpufp.a64.fma.fp64.L1
ret
20 changes: 20 additions & 0 deletions cpufp_x86.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#ifndef _CPUFP_X86_H
#define _CPUFP_X86_H

#ifdef _USE_X86_SSE
void cpufp_x86_sse_fp32();
void cpufp_x86_sse_fp64();
#endif

#ifdef _USE_X86_AVX
void cpufp_x86_avx_fp32();
void cpufp_x86_avx_fp64();
#endif

#ifdef _USE_X86_FMA
void cpufp_x86_fma_fp32();
void cpufp_x86_fma_fp64();
#endif

#endif

29 changes: 29 additions & 0 deletions cpufp_x86_avx.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
.globl cpufp_x86_avx_fp32
.globl cpufp_x86_avx_fp64

cpufp_x86_avx_fp32:
mov $0x200000000, %rax
vxorps %ymm1, %ymm1, %ymm1
vxorps %ymm2, %ymm2, %ymm2
vxorps %ymm3, %ymm3, %ymm3
vxorps %ymm4, %ymm4, %ymm4
.cpufp.x86.avx.fp32.L1:
vmulps %ymm2, %ymm2, %ymm1
vaddps %ymm4, %ymm4, %ymm3
sub $0x1, %rax
jne .cpufp.x86.avx.fp32.L1
ret

cpufp_x86_avx_fp64:
mov $0x200000000, %rax
vxorpd %ymm1, %ymm1, %ymm1
vxorpd %ymm2, %ymm2, %ymm2
vxorpd %ymm3, %ymm3, %ymm3
vxorpd %ymm4, %ymm4, %ymm4
.cpufp.x86.avx.fp64.L1:
vmulpd %ymm2, %ymm2, %ymm1
vaddpd %ymm4, %ymm4, %ymm3
sub $0x1, %rax
jne .cpufp.x86.avx.fp64.L1
ret

57 changes: 57 additions & 0 deletions cpufp_x86_fma.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
.globl cpufp_x86_fma_fp32
.globl cpufp_x86_fma_fp64

cpufp_x86_fma_fp32:
mov $0x80000000, %rax
vxorps %ymm0, %ymm0, %ymm0
vxorps %ymm1, %ymm1, %ymm1
vxorps %ymm2, %ymm2, %ymm2
vxorps %ymm3, %ymm3, %ymm3
vxorps %ymm4, %ymm4, %ymm4
vxorps %ymm5, %ymm5, %ymm5
vxorps %ymm6, %ymm6, %ymm6
vxorps %ymm7, %ymm7, %ymm7
vxorps %ymm8, %ymm8, %ymm8
vxorps %ymm9, %ymm9, %ymm9
.cpufp.x86.fma.fp32.L1:
vfmadd132ps %ymm0, %ymm0, %ymm0
vfmadd132ps %ymm1, %ymm1, %ymm1
vfmadd132ps %ymm2, %ymm2, %ymm2
vfmadd132ps %ymm3, %ymm3, %ymm3
vfmadd132ps %ymm4, %ymm4, %ymm4
vfmadd132ps %ymm5, %ymm5, %ymm5
vfmadd132ps %ymm6, %ymm6, %ymm6
vfmadd132ps %ymm7, %ymm7, %ymm7
vfmadd132ps %ymm8, %ymm8, %ymm8
vfmadd132ps %ymm9, %ymm9, %ymm9
sub $0x1, %rax
jne .cpufp.x86.fma.fp32.L1
ret

cpufp_x86_fma_fp64:
mov $0x80000000, %rax
vxorpd %ymm0, %ymm0, %ymm0
vxorpd %ymm1, %ymm1, %ymm1
vxorpd %ymm2, %ymm2, %ymm2
vxorpd %ymm3, %ymm3, %ymm3
vxorpd %ymm4, %ymm4, %ymm4
vxorpd %ymm5, %ymm5, %ymm5
vxorpd %ymm6, %ymm6, %ymm6
vxorpd %ymm7, %ymm7, %ymm7
vxorpd %ymm8, %ymm8, %ymm8
vxorpd %ymm9, %ymm9, %ymm9
.cpufp.x86.fma.fp64.L1:
vfmadd132pd %ymm0, %ymm0, %ymm0
vfmadd132pd %ymm1, %ymm1, %ymm1
vfmadd132pd %ymm2, %ymm2, %ymm2
vfmadd132pd %ymm3, %ymm3, %ymm3
vfmadd132pd %ymm4, %ymm4, %ymm4
vfmadd132pd %ymm5, %ymm5, %ymm5
vfmadd132pd %ymm6, %ymm6, %ymm6
vfmadd132pd %ymm7, %ymm7, %ymm7
vfmadd132pd %ymm8, %ymm8, %ymm8
vfmadd132pd %ymm9, %ymm9, %ymm9
sub $0x1, %rax
jne .cpufp.x86.fma.fp64.L1
ret

Loading

0 comments on commit 3ce4a47

Please sign in to comment.