Skip to content

Commit da2b19d

Browse files
authored
hide isa detection (#301)
* core * format * class per file
1 parent a751f5c commit da2b19d

File tree

6 files changed

+345
-327
lines changed

6 files changed

+345
-327
lines changed

BitFaster.Caching.Benchmarks/Lfu/SketchFrequency.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ public class SketchFrequency
1414
const int sketchSize = 1_048_576;
1515
const int iterations = 1_048_576;
1616

17-
private static CmSketch<int, DisableHardwareIntrinsics> std = new CmSketch<int, DisableHardwareIntrinsics>(sketchSize, EqualityComparer<int>.Default);
18-
private static CmSketch<int, DetectIsa> avx = new CmSketch<int, DetectIsa>(sketchSize, EqualityComparer<int>.Default);
17+
private static CmSketchCore<int, DisableHardwareIntrinsics> std = new CmSketchCore<int, DisableHardwareIntrinsics>(sketchSize, EqualityComparer<int>.Default);
18+
private static CmSketchCore<int, DetectIsa> avx = new CmSketchCore<int, DetectIsa>(sketchSize, EqualityComparer<int>.Default);
1919

2020
[GlobalSetup]
2121
public void Setup()

BitFaster.Caching.Benchmarks/Lfu/SketchIncrement.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ public class SketchIncrement
1313
{
1414
const int sketchSize = 1_048_576;
1515
const int iterations = 1_048_576;
16-
private static CmSketch<int, DisableHardwareIntrinsics> std = new CmSketch<int, DisableHardwareIntrinsics>(sketchSize, EqualityComparer<int>.Default);
17-
private static CmSketch<int, DetectIsa> avx = new CmSketch<int, DetectIsa>(sketchSize, EqualityComparer<int>.Default);
16+
private static CmSketchCore<int, DisableHardwareIntrinsics> std = new CmSketchCore<int, DisableHardwareIntrinsics>(sketchSize, EqualityComparer<int>.Default);
17+
private static CmSketchCore<int, DetectIsa> avx = new CmSketchCore<int, DetectIsa>(sketchSize, EqualityComparer<int>.Default);
1818

1919
[Benchmark(Baseline = true, OperationsPerInvoke = iterations)]
2020
public void Inc()

BitFaster.Caching.UnitTests/Lfu/CmSketchTests.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public class CmSketchTests : CmSketchTestBase<DisableHardwareIntrinsics>
1818

1919
public abstract class CmSketchTestBase<I> where I : struct, IsaProbe
2020
{
21-
private CmSketch<int, I> sketch = new CmSketch<int, I>(512, EqualityComparer<int>.Default);
21+
private CmSketchCore<int, I> sketch = new CmSketchCore<int, I>(512, EqualityComparer<int>.Default);
2222

2323
public CmSketchTestBase()
2424
{
@@ -28,7 +28,7 @@ public CmSketchTestBase()
2828
[SkippableFact]
2929
public void Repro()
3030
{
31-
sketch = new CmSketch<int, I>(1_048_576, EqualityComparer<int>.Default);
31+
sketch = new CmSketchCore<int, I>(1_048_576, EqualityComparer<int>.Default);
3232

3333
for (int i = 0; i < 1_048_576; i++)
3434
{
@@ -43,7 +43,7 @@ public void Repro()
4343
[SkippableFact]
4444
public void WhenCapacityIsZeroDefaultsSelected()
4545
{
46-
sketch = new CmSketch<int, I>(0, EqualityComparer<int>.Default);
46+
sketch = new CmSketchCore<int, I>(0, EqualityComparer<int>.Default);
4747

4848
sketch.ResetSampleSize.Should().Be(10);
4949
}
@@ -82,7 +82,7 @@ public void WhenSampleSizeExceededCountIsReset()
8282
{
8383
bool reset = false;
8484

85-
sketch = new CmSketch<int, I>(64, EqualityComparer<int>.Default);
85+
sketch = new CmSketchCore<int, I>(64, EqualityComparer<int>.Default);
8686

8787
for (int i = 1; i < 20 * 64; i++)
8888
{

BitFaster.Caching/Lfu/CmSketch.cs

Lines changed: 5 additions & 315 deletions
Original file line numberDiff line numberDiff line change
@@ -1,328 +1,18 @@
1-
using System;
2-
using System.Collections.Generic;
3-
4-
#if !NETSTANDARD2_0
5-
using System.Runtime.Intrinsics;
6-
using System.Runtime.Intrinsics.X86;
7-
#endif
1+
using System.Collections.Generic;
82

93
namespace BitFaster.Caching.Lfu
104
{
11-
/// <summary>
12-
/// A probabilistic data structure used to estimate the frequency of a given value. Periodic aging reduces the
13-
/// accumulated count across all values over time, such that a historic popular value will decay to zero frequency
14-
/// over time if it is not accessed.
15-
/// </summary>
16-
/// <remarks>
17-
/// The maximum frequency of an element is limited to 15 (4-bits). Each element is hashed to a 64 byte 'block'
18-
/// consisting of 4 segments of 32 4-bit counters. The 64 byte blocks are the same size as x64 L1 cache lines.
19-
/// While the blocks are not guaranteed to be aligned, this scheme minimizes L1 cache misses resulting in a
20-
/// significant speedup. When supported, a vectorized AVX2 code path provides a further speedup. Together, block
21-
/// and AVX2 are approximately 2x faster than the original implementation.
22-
/// </remarks>
23-
/// This is a direct C# translation of FrequencySketch in the Caffeine library by [email protected] (Ben Manes).
24-
/// https://github.com/ben-manes/caffeine
25-
public class CmSketch<T, I> where I : struct, IsaProbe
5+
/// <inheritdoc/>
6+
public sealed class CmSketch<T> : CmSketchCore<T, DetectIsa>
267
{
27-
private static readonly long ResetMask = 0x7777777777777777L;
28-
private static readonly long OneMask = 0x1111111111111111L;
29-
30-
private long[] table;
31-
private int sampleSize;
32-
private int blockMask;
33-
private int size;
34-
35-
private readonly IEqualityComparer<T> comparer;
36-
378
/// <summary>
389
/// Initializes a new instance of the CmSketch class with the specified maximum size and equality comparer.
3910
/// </summary>
4011
/// <param name="maximumSize">The maximum size.</param>
4112
/// <param name="comparer">The equality comparer.</param>
42-
public CmSketch(long maximumSize, IEqualityComparer<T> comparer)
43-
{
44-
EnsureCapacity(maximumSize);
45-
this.comparer = comparer;
46-
}
47-
48-
/// <summary>
49-
/// Gets the reset sample size.
50-
/// </summary>
51-
public int ResetSampleSize => this.sampleSize;
52-
53-
/// <summary>
54-
/// Gets the size.
55-
/// </summary>
56-
public int Size => this.size;
57-
58-
/// <summary>
59-
/// Estimate the frequency of the specified value, up to the maximum of 15.
60-
/// </summary>
61-
/// <param name="value">The value.</param>
62-
/// <returns>The estimated frequency of the value.</returns>
63-
public int EstimateFrequency(T value)
64-
{
65-
#if NETSTANDARD2_0
66-
return EstimateFrequencyStd(value);
67-
#else
68-
69-
I isa = default;
70-
71-
if (isa.IsAvx2Supported)
72-
{
73-
return EstimateFrequencyAvx(value);
74-
}
75-
else
76-
{
77-
return EstimateFrequencyStd(value);
78-
}
79-
#endif
80-
}
81-
82-
/// <summary>
83-
/// Increment the count of the specified value.
84-
/// </summary>
85-
/// <param name="value">The value.</param>
86-
public void Increment(T value)
13+
public CmSketch(long maximumSize, IEqualityComparer<T> comparer)
14+
: base(maximumSize, comparer)
8715
{
88-
#if NETSTANDARD2_0
89-
IncrementStd(value);
90-
#else
91-
92-
I isa = default;
93-
94-
if (isa.IsAvx2Supported)
95-
{
96-
IncrementAvx(value);
97-
}
98-
else
99-
{
100-
IncrementStd(value);
101-
}
102-
#endif
103-
}
104-
105-
/// <summary>
106-
/// Clears the count for all items.
107-
/// </summary>
108-
public void Clear()
109-
{
110-
table = new long[table.Length];
111-
size = 0;
112-
}
113-
114-
private void EnsureCapacity(long maximumSize)
115-
{
116-
int maximum = (int)Math.Min(maximumSize, int.MaxValue >> 1);
117-
118-
table = new long[Math.Max(BitOps.CeilingPowerOfTwo(maximum), 8)];
119-
blockMask = (int)((uint)table.Length >> 3) - 1;
120-
sampleSize = (maximumSize == 0) ? 10 : (10 * maximum);
121-
122-
size = 0;
123-
}
124-
125-
private unsafe int EstimateFrequencyStd(T value)
126-
{
127-
var count = stackalloc int[4];
128-
int blockHash = Spread(comparer.GetHashCode(value));
129-
int counterHash = Rehash(blockHash);
130-
int block = (blockHash & blockMask) << 3;
131-
132-
for (int i = 0; i < 4; i++)
133-
{
134-
int h = (int)((uint)counterHash >> (i << 3));
135-
int index = (h >> 1) & 15;
136-
int offset = h & 1;
137-
count[i] = (int)(((ulong)table[block + offset + (i << 1)] >> (index << 2)) & 0xfL);
138-
}
139-
return Math.Min(Math.Min(count[0], count[1]), Math.Min(count[2], count[3]));
140-
}
141-
142-
private unsafe void IncrementStd(T value)
143-
{
144-
var index = stackalloc int[8];
145-
int blockHash = Spread(comparer.GetHashCode(value));
146-
int counterHash = Rehash(blockHash);
147-
int block = (blockHash & blockMask) << 3;
148-
149-
for (int i = 0; i < 4; i++)
150-
{
151-
int h = (int)((uint)counterHash >> (i << 3));
152-
index[i] = (h >> 1) & 15;
153-
int offset = h & 1;
154-
index[i + 4] = block + offset + (i << 1);
155-
}
156-
157-
bool added =
158-
IncrementAt(index[4], index[0])
159-
| IncrementAt(index[5], index[1])
160-
| IncrementAt(index[6], index[2])
161-
| IncrementAt(index[7], index[3]);
162-
163-
if (added && (++size == sampleSize))
164-
{
165-
Reset();
166-
}
167-
}
168-
169-
// Applies another round of hashing for additional randomization
170-
private static int Rehash(int x)
171-
{
172-
x = (int)(x * 0x31848bab);
173-
x ^= (int)((uint)x >> 14);
174-
return x;
175-
}
176-
177-
// Applies a supplemental hash functions to defends against poor quality hash.
178-
private static int Spread(int x)
179-
{
180-
x ^= (int)((uint)x >> 17);
181-
x = (int)(x * 0xed5ad4bb);
182-
x ^= (int)((uint)x >> 11);
183-
x = (int)(x * 0xac4c1b51);
184-
x ^= (int)((uint)x >> 15);
185-
return x;
186-
}
187-
188-
private bool IncrementAt(int i, int j)
189-
{
190-
int offset = j << 2;
191-
long mask = (0xfL << offset);
192-
193-
if ((table[i] & mask) != mask)
194-
{
195-
table[i] += (1L << offset);
196-
return true;
197-
}
198-
199-
return false;
200-
}
201-
202-
private void Reset()
203-
{
204-
// unroll, almost 2x faster
205-
int count0 = 0;
206-
int count1 = 0;
207-
int count2 = 0;
208-
int count3 = 0;
209-
210-
for (int i = 0; i < table.Length; i += 4)
211-
{
212-
count0 += BitOps.BitCount(table[i] & OneMask);
213-
count1 += BitOps.BitCount(table[i + 1] & OneMask);
214-
count2 += BitOps.BitCount(table[i + 2] & OneMask);
215-
count3 += BitOps.BitCount(table[i + 3] & OneMask);
216-
217-
table[i] = (long)((ulong)table[i] >> 1) & ResetMask;
218-
table[i + 1] = (long)((ulong)table[i + 1] >> 1) & ResetMask;
219-
table[i + 2] = (long)((ulong)table[i + 2] >> 1) & ResetMask;
220-
table[i + 3] = (long)((ulong)table[i + 3] >> 1) & ResetMask;
221-
}
222-
223-
count0 = (count0 + count1) + (count2 + count3);
224-
225-
size = (size - (count0 >> 2)) >> 1;
226-
}
227-
228-
#if !NETSTANDARD2_0
229-
private unsafe int EstimateFrequencyAvx(T value)
230-
{
231-
int blockHash = Spread(comparer.GetHashCode(value));
232-
int counterHash = Rehash(blockHash);
233-
int block = (blockHash & blockMask) << 3;
234-
235-
Vector128<int> h = Vector128.Create(counterHash);
236-
h = Avx2.ShiftRightLogicalVariable(h.AsUInt32(), Vector128.Create(0U, 8U, 16U, 24U)).AsInt32();
237-
238-
var index = Avx2.ShiftRightLogical(h, 1);
239-
index = Avx2.And(index, Vector128.Create(15)); // j - counter index
240-
Vector128<int> offset = Avx2.And(h, Vector128.Create(1));
241-
Vector128<int> blockOffset = Avx2.Add(Vector128.Create(block), offset); // i - table index
242-
blockOffset = Avx2.Add(blockOffset, Vector128.Create(0, 2, 4, 6)); // + (i << 1)
243-
244-
fixed (long* tablePtr = table)
245-
{
246-
Vector256<long> tableVector = Avx2.GatherVector256(tablePtr, blockOffset, 8);
247-
index = Avx2.ShiftLeftLogical(index, 2);
248-
249-
// convert index from int to long via permute
250-
Vector256<long> indexLong = Vector256.Create(index, Vector128<int>.Zero).AsInt64();
251-
Vector256<int> permuteMask2 = Vector256.Create(0, 4, 1, 5, 2, 5, 3, 7);
252-
indexLong = Avx2.PermuteVar8x32(indexLong.AsInt32(), permuteMask2).AsInt64();
253-
tableVector = Avx2.ShiftRightLogicalVariable(tableVector, indexLong.AsUInt64());
254-
tableVector = Avx2.And(tableVector, Vector256.Create(0xfL));
255-
256-
Vector256<int> permuteMask = Vector256.Create(0, 2, 4, 6, 1, 3, 5, 7);
257-
Vector128<ushort> count = Avx2.PermuteVar8x32(tableVector.AsInt32(), permuteMask)
258-
.GetLower()
259-
.AsUInt16();
260-
261-
// set the zeroed high parts of the long value to ushort.Max
262-
#if NET6_0
263-
count = Avx2.Blend(count, Vector128<ushort>.AllBitsSet, 0b10101010);
264-
#else
265-
count = Avx2.Blend(count, Vector128.Create(ushort.MaxValue), 0b10101010);
266-
#endif
267-
268-
return Avx2.MinHorizontal(count).GetElement(0);
269-
}
270-
}
271-
272-
private unsafe void IncrementAvx(T value)
273-
{
274-
int blockHash = Spread(comparer.GetHashCode(value));
275-
int counterHash = Rehash(blockHash);
276-
int block = (blockHash & blockMask) << 3;
277-
278-
Vector128<int> h = Vector128.Create(counterHash);
279-
h = Avx2.ShiftRightLogicalVariable(h.AsUInt32(), Vector128.Create(0U, 8U, 16U, 24U)).AsInt32();
280-
281-
Vector128<int> index = Avx2.ShiftRightLogical(h, 1);
282-
index = Avx2.And(index, Vector128.Create(15)); // j - counter index
283-
Vector128<int> offset = Avx2.And(h, Vector128.Create(1));
284-
Vector128<int> blockOffset = Avx2.Add(Vector128.Create(block), offset); // i - table index
285-
blockOffset = Avx2.Add(blockOffset, Vector128.Create(0, 2, 4, 6)); // + (i << 1)
286-
287-
fixed (long* tablePtr = table)
288-
{
289-
Vector256<long> tableVector = Avx2.GatherVector256(tablePtr, blockOffset, 8);
290-
291-
// j == index
292-
index = Avx2.ShiftLeftLogical(index, 2);
293-
Vector256<long> offsetLong = Vector256.Create(index, Vector128<int>.Zero).AsInt64();
294-
295-
Vector256<int> permuteMask = Vector256.Create(0, 4, 1, 5, 2, 5, 3, 7);
296-
offsetLong = Avx2.PermuteVar8x32(offsetLong.AsInt32(), permuteMask).AsInt64();
297-
298-
// mask = (0xfL << offset)
299-
Vector256<long> fifteen = Vector256.Create(0xfL);
300-
Vector256<long> mask = Avx2.ShiftLeftLogicalVariable(fifteen, offsetLong.AsUInt64());
301-
302-
// (table[i] & mask) != mask)
303-
// Note masked is 'equal' - therefore use AndNot below
304-
Vector256<long> masked = Avx2.CompareEqual(Avx2.And(tableVector, mask), mask);
305-
306-
// 1L << offset
307-
Vector256<long> inc = Avx2.ShiftLeftLogicalVariable(Vector256.Create(1L), offsetLong.AsUInt64());
308-
309-
// Mask to zero out non matches (add zero below) - first operand is NOT then AND result (order matters)
310-
inc = Avx2.AndNot(masked, inc);
311-
312-
Vector256<byte> result = Avx2.CompareEqual(masked.AsByte(), Vector256<byte>.Zero);
313-
bool wasInc = Avx2.MoveMask(result.AsByte()) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111));
314-
315-
tablePtr[blockOffset.GetElement(0)] += inc.GetElement(0);
316-
tablePtr[blockOffset.GetElement(1)] += inc.GetElement(1);
317-
tablePtr[blockOffset.GetElement(2)] += inc.GetElement(2);
318-
tablePtr[blockOffset.GetElement(3)] += inc.GetElement(3);
319-
320-
if (wasInc && (++size == sampleSize))
321-
{
322-
Reset();
323-
}
324-
}
32516
}
326-
#endif
32717
}
32818
}

0 commit comments

Comments
 (0)