@@ -255,39 +255,26 @@ private void Reset()
255
255
}
256
256
257
257
#if ! NETSTANDARD2_0
258
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
258
259
private unsafe int EstimateFrequencyAvx ( T value )
259
260
{
260
261
int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
261
262
int counterHash = Rehash ( blockHash ) ;
262
263
int block = ( blockHash & blockMask ) << 3 ;
263
264
264
- Vector128 < int > h = Vector128 . Create ( counterHash ) ;
265
- h = Avx2 . ShiftRightLogicalVariable ( h . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
265
+ Vector128 < int > h = Avx2 . ShiftRightLogicalVariable ( Vector128 . Create ( counterHash ) . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
266
+ Vector128 < int > index = Avx2 . ShiftLeftLogical ( Avx2 . And ( Avx2 . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 15 ) ) , 2 ) ;
267
+ Vector128 < int > blockOffset = Avx2 . Add ( Avx2 . Add ( Vector128 . Create ( block ) , Avx2 . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
266
268
267
- var index = Avx2 . ShiftRightLogical ( h , 1 ) ;
268
- index = Avx2 . And ( index , Vector128 . Create ( 15 ) ) ; // j - counter index
269
- Vector128 < int > offset = Avx2 . And ( h , Vector128 . Create ( 1 ) ) ;
270
- Vector128 < int > blockOffset = Avx2 . Add ( Vector128 . Create ( block ) , offset ) ; // i - table index
271
- blockOffset = Avx2 . Add ( blockOffset , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ; // + (i << 1)
269
+ Vector256 < ulong > indexLong = Avx2 . PermuteVar8x32 ( Vector256 . Create ( index , Vector128 < int > . Zero ) , Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ) . AsUInt64 ( ) ;
272
270
273
271
#if NET6_0_OR_GREATER
274
272
long * tablePtr = tableAddr ;
275
273
#else
276
274
fixed ( long * tablePtr = table )
277
275
#endif
278
276
{
279
- Vector256 < long > tableVector = Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) ;
280
- index = Avx2 . ShiftLeftLogical ( index , 2 ) ;
281
-
282
- // convert index from int to long via permute
283
- Vector256 < long > indexLong = Vector256 . Create ( index , Vector128 < int > . Zero ) . AsInt64 ( ) ;
284
- Vector256 < int > permuteMask2 = Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ;
285
- indexLong = Avx2 . PermuteVar8x32 ( indexLong . AsInt32 ( ) , permuteMask2 ) . AsInt64 ( ) ;
286
- tableVector = Avx2 . ShiftRightLogicalVariable ( tableVector , indexLong . AsUInt64 ( ) ) ;
287
- tableVector = Avx2 . And ( tableVector , Vector256 . Create ( 0xfL ) ) ;
288
-
289
- Vector256 < int > permuteMask = Vector256 . Create ( 0 , 2 , 4 , 6 , 1 , 3 , 5 , 7 ) ;
290
- Vector128 < ushort > count = Avx2 . PermuteVar8x32 ( tableVector . AsInt32 ( ) , permuteMask )
277
+ Vector128 < ushort > count = Avx2 . PermuteVar8x32 ( Avx2 . And ( Avx2 . ShiftRightLogicalVariable ( Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) , indexLong ) , Vector256 . Create ( 0xfL ) ) . AsInt32 ( ) , Vector256 . Create ( 0 , 2 , 4 , 6 , 1 , 3 , 5 , 7 ) )
291
278
. GetLower ( )
292
279
. AsUInt16 ( ) ;
293
280
@@ -302,52 +289,33 @@ private unsafe int EstimateFrequencyAvx(T value)
302
289
}
303
290
}
304
291
292
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
305
293
private unsafe void IncrementAvx ( T value )
306
294
{
307
295
int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
308
296
int counterHash = Rehash ( blockHash ) ;
309
297
int block = ( blockHash & blockMask ) << 3 ;
310
298
311
- Vector128 < int > h = Vector128 . Create ( counterHash ) ;
312
- h = Avx2 . ShiftRightLogicalVariable ( h . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
299
+ Vector128 < int > h = Avx2 . ShiftRightLogicalVariable ( Vector128 . Create ( counterHash ) . AsUInt32 ( ) , Vector128 . Create ( 0U , 8U , 16U , 24U ) ) . AsInt32 ( ) ;
300
+ Vector128 < int > index = Avx2 . ShiftLeftLogical ( Avx2 . And ( Avx2 . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 15 ) ) , 2 ) ;
301
+ Vector128 < int > blockOffset = Avx2 . Add ( Avx2 . Add ( Vector128 . Create ( block ) , Avx2 . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
313
302
314
- Vector128 < int > index = Avx2 . ShiftRightLogical ( h , 1 ) ;
315
- index = Avx2 . And ( index , Vector128 . Create ( 15 ) ) ; // j - counter index
316
- Vector128 < int > offset = Avx2 . And ( h , Vector128 . Create ( 1 ) ) ;
317
- Vector128 < int > blockOffset = Avx2 . Add ( Vector128 . Create ( block ) , offset ) ; // i - table index
318
- blockOffset = Avx2 . Add ( blockOffset , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ; // + (i << 1)
303
+ Vector256 < ulong > offsetLong = Avx2 . PermuteVar8x32 ( Vector256 . Create ( index , Vector128 < int > . Zero ) , Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ) . AsUInt64 ( ) ;
304
+ Vector256 < long > mask = Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 0xfL ) , offsetLong ) ;
319
305
320
306
#if NET6_0_OR_GREATER
321
307
long * tablePtr = tableAddr ;
322
308
#else
323
309
fixed ( long * tablePtr = table )
324
310
#endif
325
311
{
326
- Vector256 < long > tableVector = Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) ;
327
-
328
- // j == index
329
- index = Avx2 . ShiftLeftLogical ( index , 2 ) ;
330
- Vector256 < long > offsetLong = Vector256 . Create ( index , Vector128 < int > . Zero ) . AsInt64 ( ) ;
331
-
332
- Vector256 < int > permuteMask = Vector256 . Create ( 0 , 4 , 1 , 5 , 2 , 5 , 3 , 7 ) ;
333
- offsetLong = Avx2 . PermuteVar8x32 ( offsetLong . AsInt32 ( ) , permuteMask ) . AsInt64 ( ) ;
334
-
335
- // mask = (0xfL << offset)
336
- Vector256 < long > fifteen = Vector256 . Create ( 0xfL ) ;
337
- Vector256 < long > mask = Avx2 . ShiftLeftLogicalVariable ( fifteen , offsetLong . AsUInt64 ( ) ) ;
338
-
339
- // (table[i] & mask) != mask)
340
312
// Note masked is 'equal' - therefore use AndNot below
341
- Vector256 < long > masked = Avx2 . CompareEqual ( Avx2 . And ( tableVector , mask ) , mask ) ;
342
-
343
- // 1L << offset
344
- Vector256 < long > inc = Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 1L ) , offsetLong . AsUInt64 ( ) ) ;
313
+ Vector256 < long > masked = Avx2 . CompareEqual ( Avx2 . And ( Avx2 . GatherVector256 ( tablePtr , blockOffset , 8 ) , mask ) , mask ) ;
345
314
346
315
// Mask to zero out non matches (add zero below) - first operand is NOT then AND result (order matters)
347
- inc = Avx2 . AndNot ( masked , inc ) ;
316
+ Vector256 < long > inc = Avx2 . AndNot ( masked , Avx2 . ShiftLeftLogicalVariable ( Vector256 . Create ( 1L ) , offsetLong ) ) ;
348
317
349
- Vector256 < byte > result = Avx2 . CompareEqual ( masked . AsByte ( ) , Vector256 < byte > . Zero ) ;
350
- bool wasInc = Avx2 . MoveMask ( result . AsByte ( ) ) == unchecked ( ( int ) ( 0b1111_1111_1111_1111_1111_1111_1111_1111 ) ) ;
318
+ bool wasInc = Avx2 . MoveMask ( Avx2 . CompareEqual ( masked . AsByte ( ) , Vector256 < byte > . Zero ) . AsByte ( ) ) == unchecked ( ( int ) ( 0b1111_1111_1111_1111_1111_1111_1111_1111 ) ) ;
351
319
352
320
tablePtr [ blockOffset . GetElement ( 0 ) ] += inc . GetElement ( 0 ) ;
353
321
tablePtr [ blockOffset . GetElement ( 1 ) ] += inc . GetElement ( 1 ) ;
0 commit comments