Skip to content

Commit d61cef3

Browse files
BoBoDaigong-flying
andcommitted
Add xxh3 riscv-v implementation from xxhash repo.
Co-authored-by: gong-flying <[email protected]>
1 parent 98d2b90 commit d61cef3

File tree

1 file changed

+142
-0
lines changed

1 file changed

+142
-0
lines changed

lib/common/xxhash.h

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3730,6 +3730,8 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can
37303730
# include <immintrin.h>
37313731
# elif defined(__SSE2__)
37323732
# include <emmintrin.h>
3733+
# elif defined(__riscv_vector)
3734+
# include <riscv_vector.h>
37333735
# endif
37343736
#endif
37353737

@@ -3852,6 +3854,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
38523854
*/
38533855
XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
38543856
XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */
3857+
XXH_RVV = 7, /*!< RVV for Riscv */
38553858
};
38563859
/*!
38573860
* @ingroup tuning
@@ -3874,6 +3877,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
38743877
# define XXH_NEON 4
38753878
# define XXH_VSX 5
38763879
# define XXH_SVE 6
3880+
# define XXH_RVV 7
38773881
#endif
38783882

38793883
#ifndef XXH_VECTOR /* can be defined on command line */
@@ -3898,6 +3902,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
38983902
|| (defined(__s390x__) && defined(__VEC__)) \
38993903
&& defined(__GNUC__) /* TODO: IBM XL */
39003904
# define XXH_VECTOR XXH_VSX
3905+
# elif defined(__riscv_vector)
3906+
# define XXH_VECTOR XXH_RVV
39013907
# else
39023908
# define XXH_VECTOR XXH_SCALAR
39033909
# endif
@@ -3935,6 +3941,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
39353941
# define XXH_ACC_ALIGN 64
39363942
# elif XXH_VECTOR == XXH_SVE /* sve */
39373943
# define XXH_ACC_ALIGN 64
3944+
# elif XXH_VECTOR == XXH_RVV /* rvv */
3945+
# define XXH_ACC_ALIGN 64 /* could be 8, but 64 may be faster */
39383946
# endif
39393947
#endif
39403948

@@ -3943,6 +3951,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
39433951
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
39443952
#elif XXH_VECTOR == XXH_SVE
39453953
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
3954+
#elif XXH_VECTOR == XXH_RVV
3955+
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
39463956
#else
39473957
# define XXH_SEC_ALIGN 8
39483958
#endif
@@ -5601,6 +5611,132 @@ XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
56015611

56025612
#endif
56035613

5614+
#if (XXH_VECTOR == XXH_RVV)
5615+
#define XXH_CONCAT2(X, Y) X ## Y
5616+
#define XXH_CONCAT(X, Y) XXH_CONCAT2(X, Y)
5617+
#if ((defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 13) || \
5618+
(defined(__clang__) && __clang_major__ < 16))
5619+
#define XXH_RVOP(op) op
5620+
#define XXH_RVCAST(op) XXH_CONCAT(vreinterpret_v_, op)
5621+
#else
5622+
#define XXH_RVOP(op) XXH_CONCAT(__riscv_, op)
5623+
#define XXH_RVCAST(op) XXH_CONCAT(__riscv_vreinterpret_v_, op)
5624+
#endif
5625+
XXH_FORCE_INLINE void
5626+
XXH3_accumulate_512_rvv( void* XXH_RESTRICT acc,
5627+
const void* XXH_RESTRICT input,
5628+
const void* XXH_RESTRICT secret)
5629+
{
5630+
XXH_ASSERT((((size_t)acc) & 63) == 0);
5631+
{
5632+
// Try to set vector lenght to 512 bits.
5633+
// If this length is unavailable, then maximum available will be used
5634+
size_t vl = XXH_RVOP(vsetvl_e64m2)(8);
5635+
5636+
uint64_t* xacc = (uint64_t*) acc;
5637+
const uint64_t* xinput = (const uint64_t*) input;
5638+
const uint64_t* xsecret = (const uint64_t*) secret;
5639+
static const uint64_t swap_mask[16] = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
5640+
vuint64m2_t xswap_mask = XXH_RVOP(vle64_v_u64m2)(swap_mask, vl);
5641+
5642+
size_t i;
5643+
for (i = 0; i < XXH_STRIPE_LEN/8; i += vl) {
5644+
/* data_vec = xinput[i]; */
5645+
vuint64m2_t data_vec = XXH_RVCAST(u8m2_u64m2)(XXH_RVOP(vle8_v_u8m2)((const uint8_t*)(xinput + i), vl * 8));
5646+
/* key_vec = xsecret[i]; */
5647+
vuint64m2_t key_vec = XXH_RVCAST(u8m2_u64m2)(XXH_RVOP(vle8_v_u8m2)((const uint8_t*)(xsecret + i), vl * 8));
5648+
/* acc_vec = xacc[i]; */
5649+
vuint64m2_t acc_vec = XXH_RVOP(vle64_v_u64m2)(xacc + i, vl);
5650+
/* data_key = data_vec ^ key_vec; */
5651+
vuint64m2_t data_key = XXH_RVOP(vxor_vv_u64m2)(data_vec, key_vec, vl);
5652+
/* data_key_hi = data_key >> 32; */
5653+
vuint64m2_t data_key_hi = XXH_RVOP(vsrl_vx_u64m2)(data_key, 32, vl);
5654+
/* data_key_lo = data_key & 0xffffffff; */
5655+
vuint64m2_t data_key_lo = XXH_RVOP(vand_vx_u64m2)(data_key, 0xffffffff, vl);
5656+
/* swap high and low halves */
5657+
vuint64m2_t data_swap = XXH_RVOP(vrgather_vv_u64m2)(data_vec, xswap_mask, vl);
5658+
/* acc_vec += data_key_lo * data_key_hi; */
5659+
acc_vec = XXH_RVOP(vmacc_vv_u64m2)(acc_vec, data_key_lo, data_key_hi, vl);
5660+
/* acc_vec += data_swap; */
5661+
acc_vec = XXH_RVOP(vadd_vv_u64m2)(acc_vec, data_swap, vl);
5662+
/* xacc[i] = acc_vec; */
5663+
XXH_RVOP(vse64_v_u64m2)(xacc + i, acc_vec, vl);
5664+
}
5665+
}
5666+
}
5667+
5668+
XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(rvv)
5669+
5670+
XXH_FORCE_INLINE void
5671+
XXH3_scrambleAcc_rvv(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
5672+
{
5673+
XXH_ASSERT((((size_t)acc) & 15) == 0);
5674+
{
5675+
size_t count = XXH_STRIPE_LEN/8;
5676+
uint64_t* xacc = (uint64_t*)acc;
5677+
const uint8_t* xsecret = (const uint8_t *)secret;
5678+
size_t vl;
5679+
for (; count > 0; count -= vl, xacc += vl, xsecret += vl*8) {
5680+
vl = XXH_RVOP(vsetvl_e64m2)(count);
5681+
{
5682+
/* key_vec = xsecret[i]; */
5683+
vuint64m2_t key_vec = XXH_RVCAST(u8m2_u64m2)(XXH_RVOP(vle8_v_u8m2)(xsecret, vl*8));
5684+
/* acc_vec = xacc[i]; */
5685+
vuint64m2_t acc_vec = XXH_RVOP(vle64_v_u64m2)(xacc, vl);
5686+
/* acc_vec ^= acc_vec >> 47; */
5687+
vuint64m2_t vsrl = XXH_RVOP(vsrl_vx_u64m2)(acc_vec, 47, vl);
5688+
acc_vec = XXH_RVOP(vxor_vv_u64m2)(acc_vec, vsrl, vl);
5689+
/* acc_vec ^= key_vec; */
5690+
acc_vec = XXH_RVOP(vxor_vv_u64m2)(acc_vec, key_vec, vl);
5691+
/* acc_vec *= XXH_PRIME32_1; */
5692+
acc_vec = XXH_RVOP(vmul_vx_u64m2)(acc_vec, XXH_PRIME32_1, vl);
5693+
/* xacc[i] *= acc_vec; */
5694+
XXH_RVOP(vse64_v_u64m2)(xacc, acc_vec, vl);
5695+
}
5696+
}
5697+
}
5698+
}
5699+
5700+
XXH_FORCE_INLINE void
5701+
XXH3_initCustomSecret_rvv(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
5702+
{
5703+
XXH_STATIC_ASSERT(XXH_SEC_ALIGN >= 8);
5704+
XXH_ASSERT(((size_t)customSecret & 7) == 0);
5705+
(void)(&XXH_writeLE64);
5706+
{
5707+
size_t count = XXH_SECRET_DEFAULT_SIZE/8;
5708+
size_t vl;
5709+
size_t VLMAX = XXH_RVOP(vsetvlmax_e64m2)();
5710+
int64_t* cSecret = (int64_t*)customSecret;
5711+
const int64_t* kSecret = (const int64_t*)(const void*)XXH3_kSecret;
5712+
5713+
#if __riscv_v_intrinsic >= 1000000
5714+
// ratified v1.0 intrinics version
5715+
vbool32_t mneg = XXH_RVCAST(u8m1_b32)(
5716+
XXH_RVOP(vmv_v_x_u8m1)(0xaa, XXH_RVOP(vsetvlmax_e8m1)()));
5717+
#else
5718+
// support pre-ratification intrinics, which lack mask to vector casts
5719+
size_t vlmax = XXH_RVOP(vsetvlmax_e8m1)();
5720+
vbool32_t mneg = XXH_RVOP(vmseq_vx_u8mf4_b32)(
5721+
XXH_RVOP(vand_vx_u8mf4)(
5722+
XXH_RVOP(vid_v_u8mf4)(vlmax), 1, vlmax), 1, vlmax);
5723+
#endif
5724+
vint64m2_t seed = XXH_RVOP(vmv_v_x_i64m2)((int64_t)seed64, VLMAX);
5725+
seed = XXH_RVOP(vneg_v_i64m2_mu)(mneg, seed, seed, VLMAX);
5726+
5727+
for (; count > 0; count -= vl, cSecret += vl, kSecret += vl) {
5728+
/* make sure vl=VLMAX until last iteration */
5729+
vl = XXH_RVOP(vsetvl_e64m2)(count < VLMAX ? count : VLMAX);
5730+
{
5731+
vint64m2_t src = XXH_RVOP(vle64_v_i64m2)(kSecret, vl);
5732+
vint64m2_t res = XXH_RVOP(vadd_vv_i64m2)(src, seed, vl);
5733+
XXH_RVOP(vse64_v_i64m2)(cSecret, res, vl);
5734+
}
5735+
}
5736+
}
5737+
}
5738+
#endif
5739+
56045740
/* scalar variants - universal */
56055741

56065742
#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
@@ -5831,6 +5967,12 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
58315967
#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
58325968
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
58335969

5970+
#elif (XXH_VECTOR == XXH_RVV)
5971+
#define XXH3_accumulate_512 XXH3_accumulate_512_rvv
5972+
#define XXH3_accumulate XXH3_accumulate_rvv
5973+
#define XXH3_scrambleAcc XXH3_scrambleAcc_rvv
5974+
#define XXH3_initCustomSecret XXH3_initCustomSecret_rvv
5975+
58345976
#else /* scalar */
58355977

58365978
#define XXH3_accumulate_512 XXH3_accumulate_512_scalar

0 commit comments

Comments
 (0)