Skip to content

Commit

Permalink
Merge pull request #15 from JohnnyFFM/avx512
Browse files Browse the repository at this point in the history
Avx512
  • Loading branch information
JohnnyFFM authored Sep 20, 2018
2 parents 3d1ca44 + d0abd18 commit ffb4163
Show file tree
Hide file tree
Showing 13 changed files with 1,399 additions and 18 deletions.
12 changes: 12 additions & 0 deletions blagominer.sln
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@ Global
DebugAVX|x86 = DebugAVX|x86
DebugAVX2|x64 = DebugAVX2|x64
DebugAVX2|x86 = DebugAVX2|x86
DebugAVX512|x64 = DebugAVX512|x64
DebugAVX512|x86 = DebugAVX512|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
ReleaseAVX|x64 = ReleaseAVX|x64
ReleaseAVX|x86 = ReleaseAVX|x86
ReleaseAVX2|x64 = ReleaseAVX2|x64
ReleaseAVX2|x86 = ReleaseAVX2|x86
ReleaseAVX512|x64 = ReleaseAVX512|x64
ReleaseAVX512|x86 = ReleaseAVX512|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.Debug|x64.ActiveCfg = Debug|x64
Expand All @@ -33,6 +37,10 @@ Global
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.DebugAVX2|x64.Build.0 = DebugAVX2|x64
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.DebugAVX2|x86.ActiveCfg = DebugAVX2|Win32
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.DebugAVX2|x86.Build.0 = DebugAVX2|Win32
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.DebugAVX512|x64.ActiveCfg = DebugAVX512|x64
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.DebugAVX512|x64.Build.0 = DebugAVX512|x64
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.DebugAVX512|x86.ActiveCfg = DebugAVX512|Win32
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.DebugAVX512|x86.Build.0 = DebugAVX512|Win32
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.Release|x64.ActiveCfg = Release|x64
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.Release|x64.Build.0 = Release|x64
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.Release|x86.ActiveCfg = Release|Win32
Expand All @@ -45,6 +53,10 @@ Global
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.ReleaseAVX2|x64.Build.0 = ReleaseAVX2|x64
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.ReleaseAVX2|x86.ActiveCfg = ReleaseAVX2|Win32
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.ReleaseAVX2|x86.Build.0 = ReleaseAVX2|Win32
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.ReleaseAVX512|x64.ActiveCfg = ReleaseAVX512|x64
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.ReleaseAVX512|x64.Build.0 = ReleaseAVX512|x64
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.ReleaseAVX512|x86.ActiveCfg = ReleaseAVX512|Win32
{7E2BCD41-DBC7-4565-8C4D-E92F039C2286}.ReleaseAVX512|x86.Build.0 = ReleaseAVX512|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
Binary file modified blagominer/blagominer.cpp
Binary file not shown.
4 changes: 4 additions & 0 deletions blagominer/blagominer.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include "shabal.h"

// blago version
#ifdef __AVX512F__
char const *const version = "v1.170997_AVX512";
#else
#ifdef __AVX2__
char const *const version = "v1.170997_AVX2";
#else
Expand All @@ -15,6 +18,7 @@
// char const *const version = "v1.170997";
#endif
#endif
#endif

extern HANDLE hHeap; //heap

Expand Down
165 changes: 165 additions & 0 deletions blagominer/blagominer.vcxproj

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions blagominer/blagominer.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@
<ClCompile Include="mshabal_128.c">
<Filter>Quelldateien</Filter>
</ClCompile>
<ClCompile Include="mshabal_512.c">
<Filter>Quelldateien</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="blagominer.rc">
Expand Down
10 changes: 5 additions & 5 deletions blagominer/miner.conf
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"Mode" : "pool",
"Server" : "50-50-pool.burst.cryptoguru.org",
"Server" : "pool.dev.burst-test.net",
"Port": 8124,

"UpdaterAddr" : "50-50-pool.burst.cryptoguru.org",
"UpdaterAddr" : "pool.dev.burst-test.net",
"UpdaterPort": "8124",

"InfoAddr" : "127.0.0.1",
Expand All @@ -12,14 +12,14 @@
"EnableProxy": false,
"ProxyPort": 8126,

"Paths":["\\\\.\\PHYSICALDRIVE4", "j:\\plot"],
"Paths":["d:\\plot"],
"CacheSize" : 16384,
"CacheSize2" : 512000,

"Debug": true,
"UseHDDWakeUp": true,

"TargetDeadline": 400000,
"TargetDeadline": 32000000,

"SendInterval": 100,
"UpdateInterval": 950,
Expand All @@ -31,5 +31,5 @@
"WinSizeX": 76,
"WinSizeY": 50,

"POC2StartBlock": 502000
"POC2StartBlock": 5020
}
83 changes: 82 additions & 1 deletion blagominer/mshabal.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ extern "C" {

#define MSHABAL256_FACTOR 2

#define MSHABAL512_FACTOR 4

/*
* The context structure for a Shabal computation. Contents are
* private. Such a structure should be allocated and released by
Expand Down Expand Up @@ -151,13 +153,69 @@ extern "C" {
#pragma pack()


/*
* The context structure for a Shabal computation. Contents are
* private. Such a structure should be allocated and released by
* the caller, in any memory area.
*/
#pragma pack(1)
typedef struct {
unsigned char buf0[64];
unsigned char buf1[64];
unsigned char buf2[64];
unsigned char buf3[64];
unsigned char buf4[64];
unsigned char buf5[64];
unsigned char buf6[64];
unsigned char buf7[64];
unsigned char buf8[64];
unsigned char buf9[64];
unsigned char buf10[64];
unsigned char buf11[64];
unsigned char buf12[64];
unsigned char buf13[64];
unsigned char buf14[64];
unsigned char buf15[64];
unsigned char* xbuf0;
unsigned char* xbuf1;
unsigned char* xbuf2;
unsigned char* xbuf3;
unsigned char* xbuf4;
unsigned char* xbuf5;
unsigned char* xbuf6;
unsigned char* xbuf7;
unsigned char* xbuf8;
unsigned char* xbuf9;
unsigned char* xbuf10;
unsigned char* xbuf11;
unsigned char* xbuf12;
unsigned char* xbuf13;
unsigned char* xbuf14;
unsigned char* xbuf15;
size_t ptr;
mshabal_u32 state[(12 + 16 + 16) * 4 * MSHABAL512_FACTOR];
mshabal_u32 Whigh, Wlow;
unsigned out_size;
} mshabal512_context;


#pragma pack(1)
typedef struct {
mshabal_u32 state[(12 + 16 + 16) * 4 * MSHABAL512_FACTOR];
mshabal_u32 Whigh, Wlow;
unsigned out_size;
} mshabal512_context_fast;

#pragma pack()

/*
* Initialize a context structure. The output size must be a multiple
* of 32, between 32 and 512 (inclusive). The output size is expressed
* in bits.
*/
void simd128_mshabal_init(mshabal_context *sc, unsigned out_size);
void simd256_mshabal_init(mshabal256_context *sc, unsigned out_size);
void simd512_mshabal_init(mshabal512_context *sc, unsigned out_size);

/*
* Process some more data bytes; four chunks of data, pointed to by
Expand All @@ -176,6 +234,12 @@ extern "C" {
void *data0, void *data1, void *data2, void *data3,
void *data4, void *data5, void *data6, void *data7,
size_t len);
void simd512_mshabal(mshabal512_context *sc,
void *data0, void *data1, void *data2, void *data3,
void *data4, void *data5, void *data6, void *data7,
void *data8, void *data9, void *data10, void *data11,
void *data12, void *data13, void *data14, void *data15,
size_t len);
/*
* Terminate the Shabal computation incarnated by the provided context
* structure. "n" shall be a value between 0 and 7 (inclusive): this is
Expand Down Expand Up @@ -205,7 +269,16 @@ extern "C" {
unsigned n,
void *dst0, void *dst1, void *dst2, void *dst3,
void *dst4, void *dst5, void *dst6, void *dst7);

void simd512_mshabal_close(mshabal512_context *sc,
unsigned ub0, unsigned ub1, unsigned ub2, unsigned ub3,
unsigned ub4, unsigned ub5, unsigned ub6, unsigned ub7,
unsigned ub8, unsigned ub9, unsigned ub10, unsigned ub11,
unsigned ub12, unsigned ub13, unsigned ub14, unsigned ub15,
unsigned n,
void *dst0, void *dst1, void *dst2, void *dst3,
void *dst4, void *dst5, void *dst6, void *dst7,
void *dst8, void *dst9, void *dst10, void *dst11,
void *dst12, void *dst13, void *dst14, void *dst15);
/*
* Combined open and close routines
*/
Expand All @@ -220,6 +293,14 @@ extern "C" {
void *u1, void *u2,
void *dst0, void *dst1, void *dst2, void *dst3, void *dst4, void *dst5, void *dst6, void *dst7,
unsigned n);
void
simd512_mshabal_openclose_fast(mshabal512_context_fast *sc,
void *u1, void *u2,
void *dst0, void *dst1, void *dst2, void *dst3, void *dst4, void *dst5, void *dst6, void *dst7,
void *dst8, void *dst9, void *dst10, void *dst11, void *dst12, void *dst13, void *dst14, void *dst15,
unsigned n);


#ifdef __cplusplus
}
#endif
Expand Down
10 changes: 5 additions & 5 deletions blagominer/mshabal_128.c
Original file line number Diff line number Diff line change
Expand Up @@ -634,10 +634,10 @@ extern "C" {
}

//transfer results to ram
for (j = 0; j < 12; j++)
_mm_storeu_si128((__m128i *)sc->state + j, A[j]);
for (j = 0; j < 16; j++) {
_mm_storeu_si128((__m128i *)sc->state + j + 12, B[j]);
//for (j = 0; j < 12; j++)
// _mm_storeu_si128((__m128i *)sc->state + j, A[j]);
for (j = 8; j < 10; j++) {
// _mm_storeu_si128((__m128i *)sc->state + j + 12, B[j]);
_mm_storeu_si128((__m128i *)sc->state + j + 28, C[j]);
}
}
Expand All @@ -655,7 +655,7 @@ extern "C" {
//extract results
out_size_w32 = sc->out_size >> 5;
off = 4 * (28 + (16 - out_size_w32));
for (z = 0; z < out_size_w32; z++) {
for (z = 0; z < 2; z++) {
unsigned y = off + (z << 2);
((u32 *)dst0)[z] = sc->state[y + 0];
((u32 *)dst1)[z] = sc->state[y + 1];
Expand Down
10 changes: 5 additions & 5 deletions blagominer/mshabal_256.c
Original file line number Diff line number Diff line change
Expand Up @@ -698,10 +698,10 @@ extern "C" {
}

//transfer results to ram
for (j = 0; j < 12; j++)
_mm256_storeu_si256((__m256i *)sc->state + j, A[j]);
for (j = 0; j < 16; j++) {
_mm256_storeu_si256((__m256i *)sc->state + j + 12, B[j]);
// for (j = 0; j < 12; j++)
// _mm256_storeu_si256((__m256i *)sc->state + j, A[j]);
for (j = 8; j < 10; j++) {
//_mm256_storeu_si256((__m256i *)sc->state + j + 12, B[j]);
_mm256_storeu_si256((__m256i *)sc->state + j + 28, C[j]);
}
}
Expand All @@ -724,7 +724,7 @@ extern "C" {
//extract results
out_size_w32 = sc->out_size >> 5;
off = MSHABAL256_FACTOR * 4 * (28 + (16 - out_size_w32));
for (z = 0; z < out_size_w32; z++) {
for (z = 0; z < 2; z++) {
unsigned y = off + MSHABAL256_FACTOR * (z << 2);
((u32 *)dst0)[z] = sc->state[y + 0];
((u32 *)dst1)[z] = sc->state[y + 1];
Expand Down
Loading

0 comments on commit ffb4163

Please sign in to comment.