1
1
using System ;
2
2
using System . Collections . Generic ;
3
3
using System . Diagnostics . CodeAnalysis ;
4
+ using System . Runtime . CompilerServices ;
5
+
4
6
5
7
#if NET6_0_OR_GREATER
6
8
using System . Runtime . Intrinsics ;
9
+ using System . Runtime . Intrinsics . Arm ;
7
10
using System . Runtime . Intrinsics . X86 ;
8
11
#endif
9
12
@@ -61,6 +64,12 @@ public int EstimateFrequency(T value)
61
64
{
62
65
return EstimateFrequencyAvx ( value ) ;
63
66
}
67
+ #if NET6_0_OR_GREATER
68
+ else if ( isa . IsArm64Supported )
69
+ {
70
+ return EstimateFrequencyArm ( value ) ;
71
+ }
72
+ #endif
64
73
else
65
74
{
66
75
return EstimateFrequencyStd ( value ) ;
@@ -84,6 +93,12 @@ public void Increment(T value)
84
93
{
85
94
IncrementAvx ( value ) ;
86
95
}
96
+ #if NET6_0_OR_GREATER
97
+ else if ( isa . IsArm64Supported )
98
+ {
99
+ IncrementArm ( value ) ;
100
+ }
101
+ #endif
87
102
else
88
103
{
89
104
IncrementStd ( value ) ;
@@ -314,5 +329,94 @@ private unsafe void IncrementAvx(T value)
314
329
}
315
330
}
316
331
#endif
332
+
333
+ #if NET6_0_OR_GREATER
334
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
335
+ private unsafe void IncrementArm ( T value )
336
+ {
337
+ int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
338
+ int counterHash = Rehash ( blockHash ) ;
339
+ int block = ( blockHash & blockMask ) << 3 ;
340
+
341
+ Vector128 < int > h = AdvSimd . ShiftArithmetic ( Vector128 . Create ( counterHash ) , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
342
+ Vector128 < int > index = AdvSimd . And ( AdvSimd . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 0xf ) ) ;
343
+ Vector128 < int > blockOffset = AdvSimd . Add ( AdvSimd . Add ( Vector128 . Create ( block ) , AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
344
+
345
+ fixed ( long * tablePtr = table )
346
+ {
347
+ int t0 = AdvSimd . Extract ( blockOffset , 0 ) ;
348
+ int t1 = AdvSimd . Extract ( blockOffset , 1 ) ;
349
+ int t2 = AdvSimd . Extract ( blockOffset , 2 ) ;
350
+ int t3 = AdvSimd . Extract ( blockOffset , 3 ) ;
351
+
352
+ Vector128 < long > tableVectorA = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + t0 ) , AdvSimd . LoadVector64 ( tablePtr + t1 ) ) ;
353
+ Vector128 < long > tableVectorB = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + t2 ) , AdvSimd . LoadVector64 ( tablePtr + t3 ) ) ;
354
+
355
+ index = AdvSimd . ShiftLeftLogicalSaturate ( index , 2 ) ;
356
+
357
+ Vector128 < int > longOffA = AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) , 2 , index , 1 ) ;
358
+ Vector128 < int > longOffB = AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) , 2 , index , 3 ) ;
359
+
360
+ Vector128 < long > fifteen = Vector128 . Create ( 0xfL ) ;
361
+ Vector128 < long > maskA = AdvSimd . ShiftArithmetic ( fifteen , longOffA . AsInt64 ( ) ) ;
362
+ Vector128 < long > maskB = AdvSimd . ShiftArithmetic ( fifteen , longOffB . AsInt64 ( ) ) ;
363
+
364
+ Vector128 < long > maskedA = AdvSimd . Not ( AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorA , maskA ) , maskA ) ) ;
365
+ Vector128 < long > maskedB = AdvSimd . Not ( AdvSimd . Arm64 . CompareEqual ( AdvSimd . And ( tableVectorB , maskB ) , maskB ) ) ;
366
+
367
+ var one = Vector128 . Create ( 1L ) ;
368
+ Vector128 < long > incA = AdvSimd . And ( maskedA , AdvSimd . ShiftArithmetic ( one , longOffA . AsInt64 ( ) ) ) ;
369
+ Vector128 < long > incB = AdvSimd . And ( maskedB , AdvSimd . ShiftArithmetic ( one , longOffB . AsInt64 ( ) ) ) ;
370
+
371
+ tablePtr [ t0 ] += AdvSimd . Extract ( incA , 0 ) ;
372
+ tablePtr [ t1 ] += AdvSimd . Extract ( incA , 1 ) ;
373
+ tablePtr [ t2 ] += AdvSimd . Extract ( incB , 0 ) ;
374
+ tablePtr [ t3 ] += AdvSimd . Extract ( incB , 1 ) ;
375
+
376
+ var max = AdvSimd . Arm64 . MaxAcross ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . MaxAcross ( incA . AsInt32 ( ) ) , 1 , AdvSimd . Arm64 . MaxAcross ( incB . AsInt32 ( ) ) , 0 ) . AsInt16 ( ) ) ;
377
+
378
+ if ( max . ToScalar ( ) != 0 && ( ++ size == sampleSize ) )
379
+ {
380
+ Reset ( ) ;
381
+ }
382
+ }
383
+ }
384
+
385
+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
386
+ private unsafe int EstimateFrequencyArm ( T value )
387
+ {
388
+ int blockHash = Spread ( comparer . GetHashCode ( value ) ) ;
389
+ int counterHash = Rehash ( blockHash ) ;
390
+ int block = ( blockHash & blockMask ) << 3 ;
391
+
392
+ Vector128 < int > h = AdvSimd . ShiftArithmetic ( Vector128 . Create ( counterHash ) , Vector128 . Create ( 0 , - 8 , - 16 , - 24 ) ) ;
393
+ Vector128 < int > index = AdvSimd . And ( AdvSimd . ShiftRightLogical ( h , 1 ) , Vector128 . Create ( 0xf ) ) ;
394
+ Vector128 < int > blockOffset = AdvSimd . Add ( AdvSimd . Add ( Vector128 . Create ( block ) , AdvSimd . And ( h , Vector128 . Create ( 1 ) ) ) , Vector128 . Create ( 0 , 2 , 4 , 6 ) ) ;
395
+
396
+ fixed ( long * tablePtr = table )
397
+ {
398
+ Vector128 < long > tableVectorA = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 0 ) ) , AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 1 ) ) ) ;
399
+ Vector128 < long > tableVectorB = Vector128 . Create ( AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 2 ) ) , AdvSimd . LoadVector64 ( tablePtr + AdvSimd . Extract ( blockOffset , 3 ) ) ) ;
400
+
401
+ index = AdvSimd . ShiftLeftLogicalSaturate ( index , 2 ) ;
402
+
403
+ Vector128 < int > indexA = AdvSimd . Negate ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 0 ) , 2 , index , 1 ) ) ;
404
+ Vector128 < int > indexB = AdvSimd . Negate ( AdvSimd . Arm64 . InsertSelectedScalar ( AdvSimd . Arm64 . InsertSelectedScalar ( Vector128 < int > . Zero , 0 , index , 2 ) , 2 , index , 3 ) ) ;
405
+
406
+ var fifteen = Vector128 . Create ( 0xfL ) ;
407
+ Vector128 < long > a = AdvSimd . And ( AdvSimd . ShiftArithmetic ( tableVectorA , indexA . AsInt64 ( ) ) , fifteen ) ;
408
+ Vector128 < long > b = AdvSimd . And ( AdvSimd . ShiftArithmetic ( tableVectorB , indexB . AsInt64 ( ) ) , fifteen ) ;
409
+
410
+ // Before: < 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F >
411
+ // After: < 0, 1, 2, 3, 8, 9, A, B, 4, 5, 6, 7, C, D, E, F >
412
+ var min = AdvSimd . Arm64 . VectorTableLookup ( a . AsByte ( ) , Vector128 . Create ( 0x0B0A090803020100 , 0xFFFFFFFFFFFFFFFF ) . AsByte ( ) ) ;
413
+ min = AdvSimd . Arm64 . VectorTableLookupExtension ( min , b . AsByte ( ) , Vector128 . Create ( 0xFFFFFFFFFFFFFFFF , 0x0B0A090803020100 ) . AsByte ( ) ) ;
414
+
415
+ var min32 = AdvSimd . Arm64 . MinAcross ( min . AsInt32 ( ) ) ;
416
+
417
+ return min32 . ToScalar ( ) ;
418
+ }
419
+ }
420
+ #endif
317
421
}
318
422
}
0 commit comments