Skip to content

Commit 6fd5018

Browse files
authored
throughput test (#51)
* throughput test * add results * chart
1 parent 4b2f3a0 commit 6fd5018

File tree

5 files changed

+195
-1
lines changed

5 files changed

+195
-1
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>netcoreapp3.1</TargetFramework>
6+
</PropertyGroup>
7+
8+
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
9+
<NoWarn>1701;1702;CS8002</NoWarn>
10+
</PropertyGroup>
11+
12+
<ItemGroup>
13+
<PackageReference Include="CsvHelper" Version="15.0.5" />
14+
<PackageReference Include="MathNet.Numerics" Version="4.11.0" />
15+
</ItemGroup>
16+
17+
<ItemGroup>
18+
<ProjectReference Include="..\BitFaster.Caching\BitFaster.Caching.csproj" />
19+
</ItemGroup>
20+
21+
</Project>
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.ComponentModel;
4+
using System.Data;
5+
using System.Diagnostics;
6+
using System.Globalization;
7+
using System.IO;
8+
using System.Linq;
9+
using System.Reflection.Metadata.Ecma335;
10+
using System.Threading;
11+
using System.Threading.Tasks;
12+
using BitFaster.Caching.Lru;
13+
using CsvHelper;
14+
using MathNet.Numerics.Distributions;
15+
16+
namespace BitFaster.Caching.ThroughputAnalysis
17+
{
18+
class Program
19+
{
20+
const double s = 0.86;
21+
const int n = 500;
22+
const int capacity = 50;
23+
const int maxThreads = 52;
24+
const int sampleCount = 2000;
25+
const int repeatCount = 200;
26+
27+
private static int[] samples = new int[sampleCount];
28+
29+
static void Main(string[] args)
30+
{
31+
ThreadPool.SetMaxThreads(maxThreads, maxThreads);
32+
33+
Console.WriteLine("Generating input distribution...");
34+
samples = new int[sampleCount];
35+
Zipf.Samples(samples, s, n);
36+
37+
int[] threadCount = Enumerable.Range(1, maxThreads).ToArray();
38+
39+
// Desired output:
40+
// Class 1 2 3 4 5
41+
// Classic 5 6 7 7 8
42+
// Concurrent 5 6 7 7 8
43+
DataTable resultTable = new DataTable();
44+
resultTable.Clear();
45+
resultTable.Columns.Add("Class");
46+
foreach (var tc in threadCount)
47+
{
48+
resultTable.Columns.Add(tc.ToString());
49+
}
50+
51+
DataRow concurrentLru = resultTable.NewRow();
52+
DataRow classicLru = resultTable.NewRow();
53+
concurrentLru["Class"] = "concurrentLru";
54+
classicLru["Class"] = "classicLru";
55+
56+
foreach (int tc in threadCount)
57+
{
58+
const int warmup = 3;
59+
const int runs = 6;
60+
double[] results = new double[warmup + runs];
61+
62+
for (int i = 0; i < warmup + runs; i++)
63+
{
64+
results[i] = MeasureThroughput(new ConcurrentLru<int, int>(tc, capacity, EqualityComparer<int>.Default), tc);
65+
}
66+
double avg = AverageLast(results, runs) / 1000000;
67+
Console.WriteLine($"ConcurrLru ({tc}) {avg} million ops/sec");
68+
concurrentLru[tc.ToString()] = avg.ToString();
69+
70+
for (int i = 0; i < warmup + runs; i++)
71+
{
72+
results[i] = MeasureThroughput(new ClassicLru<int, int>(tc, capacity, EqualityComparer<int>.Default), tc);
73+
}
74+
avg = AverageLast(results, runs) / 1000000;
75+
Console.WriteLine($"ClassicLru ({tc}) {avg} million ops/sec");
76+
classicLru[tc.ToString()] = avg.ToString();
77+
}
78+
79+
resultTable.Rows.Add(concurrentLru);
80+
resultTable.Rows.Add(classicLru);
81+
82+
ExportCsv(resultTable);
83+
84+
Console.WriteLine("Done.");
85+
}
86+
87+
private static double AverageLast(double[] results, int count)
88+
{
89+
double result = 0;
90+
for (int i = results.Length - count; i < results.Length; i++)
91+
{
92+
result = results[i];
93+
}
94+
95+
return result / count;
96+
}
97+
98+
99+
private static double MeasureThroughput(ICache<int, int> cache, int threadCount)
100+
{
101+
var tasks = new Task[threadCount];
102+
var sw = Stopwatch.StartNew();
103+
104+
for (int i = 0; i < threadCount; i++)
105+
{
106+
tasks[i] = Task.Run(() => Test(cache));
107+
}
108+
109+
Task.WaitAll(tasks);
110+
111+
sw.Stop();
112+
113+
// throughput = ops/sec
114+
return (threadCount * sampleCount * repeatCount) / sw.Elapsed.TotalSeconds;
115+
}
116+
117+
private static void Test(ICache<int, int> cache)
118+
{
119+
// cache has 50 capacity
120+
// make zipf for 500 total items, 2000 samples
121+
// each thread will lookup all samples 5 times in a row, for a total of 10k GetOrAdds per thread
122+
Func<int, int> func = x => x;
123+
124+
for (int j = 0; j < repeatCount; j++)
125+
{
126+
for (int i = 0; i < sampleCount; i++)
127+
{
128+
cache.GetOrAdd(samples[i], func);
129+
}
130+
}
131+
}
132+
133+
public static void ExportCsv(DataTable results)
134+
{
135+
using (var textWriter = File.CreateText(@"Results.csv"))
136+
using (var csv = new CsvWriter(textWriter, CultureInfo.InvariantCulture))
137+
{
138+
foreach (DataColumn column in results.Columns)
139+
{
140+
csv.WriteField(column.ColumnName);
141+
}
142+
csv.NextRecord();
143+
144+
foreach (DataRow row in results.Rows)
145+
{
146+
for (var i = 0; i < results.Columns.Count; i++)
147+
{
148+
csv.WriteField(row[i]);
149+
}
150+
csv.NextRecord();
151+
}
152+
}
153+
}
154+
}
155+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Class,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52
2+
concurrentLru,0.8390250696495687,0.376202024249042,0.5288059086657011,0.7093868281940474,0.7394893604119547,0.7692742628140901,0.8868098687242683,1.1103519079940203,1.1146275546334696,1.1896956651415957,1.218291000213035,1.428007110761408,1.5604813148567545,1.7652261790607608,1.7392266215766368,1.7262235364537413,1.5551914577907315,1.2065476930204835,1.6772190225591257,1.4505442732221985,1.6554578375581996,1.6428097093936829,1.383943422226727,1.541787500246927,1.4977153849517946,1.5929945003396602,1.464912894651603,1.6771227784749316,1.693182784861802,1.5899175063352249,1.708473989654639,1.676827059628992,1.5859594719591195,1.4853919456762457,1.5125032594445238,1.5540944333826734,1.5259467447885415,1.4007292749524853,1.722761892109386,1.3916878485906248,1.5881997225453819,1.7367430064460458,1.623407206289487,1.8656657878532774,1.633866434904528,1.819263632874497,1.7351197239993046,1.7420114756094713,1.603307315418468,1.6224748329005056,1.8875801666400183,1.5992687897052116
3+
classicLru,0.9098320086179288,0.3548161187640513,0.28308098550681976,0.2685043912718486,0.2878894384794232,0.2892307322982296,0.29284371537515264,0.28980868732332427,0.30849250561160724,0.26892877721191133,0.29921794493796816,0.299734102131323,0.29605330990959544,0.3017998708361224,0.30384554825860344,0.2789464544585001,0.30199199158096063,0.315005855171333,0.3000289725345978,0.30821724908553866,0.3074555196413347,0.31380872502367835,0.3032383329166773,0.3088044252600553,0.27653983338663085,0.2719218492421865,0.3121618896532693,0.30581076829396336,0.2688720853107021,0.3063708500881169,0.3116421735018943,0.31048221579941243,0.3056369823416011,0.31830101999889526,0.2925308136601242,0.30431060537168064,0.31187804957991944,0.3194568990601275,0.32811896351813086,0.31280839486391765,0.2798944769149913,0.31559749074747057,0.3189235194959264,0.3111095381224312,0.33029303730386816,0.28871616360715247,0.3248120583007375,0.3199919522024021,0.2834168353076367,0.3140325356925769,0.3263779096986623,0.26610456988977205

BitFaster.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BitFaster.Caching.Benchmark
1616
EndProject
1717
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BitFaster.Caching.HitRateAnalysis", "BitFaster.Caching.HitRateAnalysis\BitFaster.Caching.HitRateAnalysis.csproj", "{12AAE7FB-09F5-4A87-838E-891ACEF5722B}"
1818
EndProject
19+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BitFaster.Caching.ThroughputAnalysis", "BitFaster.Caching.ThroughputAnalysis\BitFaster.Caching.ThroughputAnalysis.csproj", "{EF9968AF-10B2-4205-9C42-19A594BC98C1}"
20+
EndProject
1921
Global
2022
GlobalSection(SolutionConfigurationPlatforms) = preSolution
2123
Debug|Any CPU = Debug|Any CPU
@@ -38,6 +40,10 @@ Global
3840
{12AAE7FB-09F5-4A87-838E-891ACEF5722B}.Debug|Any CPU.Build.0 = Debug|Any CPU
3941
{12AAE7FB-09F5-4A87-838E-891ACEF5722B}.Release|Any CPU.ActiveCfg = Release|Any CPU
4042
{12AAE7FB-09F5-4A87-838E-891ACEF5722B}.Release|Any CPU.Build.0 = Release|Any CPU
43+
{EF9968AF-10B2-4205-9C42-19A594BC98C1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
44+
{EF9968AF-10B2-4205-9C42-19A594BC98C1}.Debug|Any CPU.Build.0 = Debug|Any CPU
45+
{EF9968AF-10B2-4205-9C42-19A594BC98C1}.Release|Any CPU.ActiveCfg = Release|Any CPU
46+
{EF9968AF-10B2-4205-9C42-19A594BC98C1}.Release|Any CPU.Build.0 = Release|Any CPU
4147
EndGlobalSection
4248
GlobalSection(SolutionProperties) = preSolution
4349
HideSolutionNode = FALSE

README.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ These charts summarize the percentage increase in hit rate ConcurrentLru vs LRU.
139139
</tr>
140140
</table>
141141

142-
## ConcurrentLru Benchmarks
142+
## ConcurrentLru Latency
143143

144144
In these benchmarks, a cache miss is essentially free. These tests exist purely to compare the raw execution speed of the cache bookkeeping code. In a real setting, where a cache miss is presumably quite expensive, the relative overhead of the cache will be very small.
145145

@@ -199,6 +199,15 @@ FastConcurrentLru does not allocate and is approximately 10x faster than System.
199199
| RuntimeMemoryCache | 280.16 ns | 5.607 ns | 7.486 ns | 16.59 | 0.0153 | 32 B |
200200
| ExtensionsMemoryCache | 342.72 ns | 3.729 ns | 3.114 ns | 20.29 | 0.0114 | 24 B |
201201

202+
203+
## ConcurrentLru Throughput
204+
205+
In this test, we generate 2000 samples of 500 keys with a Zipfian distribution (s = 0.86). Caches have size 50. From N concurrent threads, fetch the sample keys in sequence (each thread is using the same input keys). The principal scalability limit in concurrent applications is the exclusive resource lock. As the number of threads increases, ConcurrentLru significantly outperforms an LRU implemented with a short lived exclusive lock used to synchronize the linked list data structure.
206+
207+
This test was run on a Standard D16s v3 Azure VM (16 cpus), with .NET Core 3.1.
208+
209+
![image](https://user-images.githubusercontent.com/12851828/86203563-2f941880-bb1a-11ea-8d6a-70ece91b4362.png)
210+
202211
## Meta-programming using structs and JIT value type optimization
203212

204213
TemplateConcurrentLru features injectable behaviors defined as structs. Structs are subject to special JIT optimizations, and the .NET JIT compiler can inline, eliminate dead code and propogate JIT time constants based on structs. Using this technique, the TemplateConcurrentLru can be customized to support LRU and TLRU policies without compromising execution speed.

0 commit comments

Comments
 (0)