Skip to content

Add SharpLearning.Benchmarks #160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 42 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
d5115b5
Add SharpLearning.Benchmarks
mdabros Feb 16, 2025
f01219c
Add SharpLearning.Benchmarks with RegressionDecisionTreeLearnerBenchmark
mdabros Feb 16, 2025
7875622
Add more benchmarks
mdabros Feb 16, 2025
93e28a4
refactor
mdabros Feb 17, 2025
587ad64
Add RegressionLearnerBenchmarks
mdabros Feb 17, 2025
c126689
Refactor
mdabros Feb 17, 2025
75cdb6e
revert
mdabros Feb 17, 2025
653d5fe
rename
mdabros Feb 17, 2025
baf3d30
rename
mdabros Feb 17, 2025
947b7fa
Add ClassificationLearners
mdabros Feb 17, 2025
c5cace2
Refactor
mdabros Feb 17, 2025
2c34a9c
enable
mdabros Feb 17, 2025
f9e7085
Add ClassificationBinomialGradientBoostLearner
mdabros Feb 17, 2025
71b08c1
add comment
mdabros Feb 17, 2025
cabd0c8
increase data size
mdabros Feb 17, 2025
e2d15cd
Add ClassificationModels
mdabros Feb 17, 2025
47d577d
Decrease
mdabros Feb 17, 2025
982b5c1
Remove
mdabros Feb 21, 2025
01f68a4
Add comment
mdabros Feb 21, 2025
b1576e1
Add remaining gradient boost learners
mdabros Feb 21, 2025
65f7342
Refactor
Mar 16, 2025
1f98d65
Refactor
Mar 16, 2025
580ef00
refactor
Mar 16, 2025
f5529f7
Refactor and add Benchmarks.ClassificationModels
Mar 16, 2025
c683c14
refactor
Mar 16, 2025
623e923
Fix usings and add Benchmarks.RegressionModels
Mar 16, 2025
0458c49
refactor
Mar 16, 2025
5146105
Rename
Mar 16, 2025
5dc0561
add
Mar 16, 2025
b06e4fc
Refactor
Mar 16, 2025
c25431f
capital
Mar 16, 2025
4f3e6c3
use for
Mar 16, 2025
797707e
Refactor
Mar 16, 2025
023420c
Refactor to get model name
Mar 16, 2025
7d2d78e
use model names in model benchmarks
Mar 16, 2025
5ce5306
nits
Mar 16, 2025
af4c161
Update comment
Mar 16, 2025
4e3eb36
rename
Mar 16, 2025
7efd397
Use learner name to model name mapping
Mar 16, 2025
af7ddee
Readd and fix
Mar 16, 2025
e56bc0e
Use single row for prediction benchmark
Mar 16, 2025
124ccb7
enable
Mar 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions SharpLearning.sln
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{2204FA16-973
src\SourceLink.GitHub.props = src\SourceLink.GitHub.props
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharpLearning.Benchmarks", "src\SharpLearning.Benchmarks\SharpLearning.Benchmarks.csproj", "{81B3AA72-5F95-956D-C168-AE856C226E15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -198,6 +200,10 @@ Global
{FFD79827-ED40-47EB-9CB8-2E683DEA3606}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FFD79827-ED40-47EB-9CB8-2E683DEA3606}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FFD79827-ED40-47EB-9CB8-2E683DEA3606}.Release|Any CPU.Build.0 = Release|Any CPU
{81B3AA72-5F95-956D-C168-AE856C226E15}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{81B3AA72-5F95-956D-C168-AE856C226E15}.Debug|Any CPU.Build.0 = Debug|Any CPU
{81B3AA72-5F95-956D-C168-AE856C226E15}.Release|Any CPU.ActiveCfg = Release|Any CPU
{81B3AA72-5F95-956D-C168-AE856C226E15}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
1 change: 1 addition & 0 deletions src/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

<LibraryTargetFramework>netstandard2.0</LibraryTargetFramework>
<TestTargetFramework>net8</TestTargetFramework>
<ExecutableTargetFramework>$(TestTargetFramework)</ExecutableTargetFramework>

<LangVersion>12.0</LangVersion>
<Deterministic>true</Deterministic>
Expand Down
37 changes: 37 additions & 0 deletions src/SharpLearning.Benchmarks/Benchmarks.ClassificationLearners.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
using System.Collections.Generic;
using System.Linq;
using BenchmarkDotNet.Attributes;
using SharpLearning.Common.Interfaces;
using SharpLearning.Containers.Matrices;

namespace SharpLearning.Benchmarks;

public static partial class Benchmarks
{
[MemoryDiagnoser]
public class ClassificationLearners
{
readonly IReadOnlyDictionary<string, ILearner<double>> m_learners =
DefaultLearners.LearnerNameToLearnerClassification;

F64Matrix m_features;
double[] m_targets;

[GlobalSetup]
public void GlobalSetup()
{
(m_features, m_targets) = DataGenerator.GenerateClassificationData();
}

[Benchmark]
[ArgumentsSource(nameof(GetLearners))]
public void Learn(string learnerName)
{
var learner = m_learners[learnerName];
learner.Learn(m_features, m_targets);
}

public IReadOnlyList<string> GetLearners() =>
m_learners.Keys.ToArray();
}
}
56 changes: 56 additions & 0 deletions src/SharpLearning.Benchmarks/Benchmarks.ClassificationModels.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
using System.Collections.Generic;
using System.Linq;
using BenchmarkDotNet.Attributes;
using SharpLearning.Common.Interfaces;
using SharpLearning.Containers.Matrices;

namespace SharpLearning.Benchmarks;

public static partial class Benchmarks
{
[MemoryDiagnoser]
public class ClassificationModels
{
readonly IReadOnlyDictionary<string, ILearner<double>> m_learners =
DefaultLearners.LearnerNameToLearnerClassification;
readonly Dictionary<string, IPredictorModel<double>> m_models = new();

// For creating models.
F64Matrix m_features;
double[] m_targets;
// For prediction.
double[] m_featureRow;

[GlobalSetup]
public void GlobalSetup()
{
(m_features, m_targets) = DataGenerator.GenerateClassificationData();
m_featureRow = m_features.Row(0);
foreach (var (learnerName, learner) in m_learners)
{
var model = learner.Learn(m_features, m_targets);
var modelName = DefaultLearners.LearnerNameToModelNameClassification[learnerName];
m_models[modelName] = model;
}
}

[Benchmark]
[ArgumentsSource(nameof(GetModels))]
public void Predict(string modelName)
{
var model = m_models[modelName];
model.Predict(m_featureRow);
}

public IReadOnlyList<string> GetModels()
{
// Hack to ensure m_models is populated before call to GetModels.
// This means `GlobalSetup` will be called twice.
if (m_models.Count == 0)
{
GlobalSetup();
}
return m_models.Keys.ToArray();
}
}
}
37 changes: 37 additions & 0 deletions src/SharpLearning.Benchmarks/Benchmarks.RegressionLearners.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
using System.Collections.Generic;
using System.Linq;
using BenchmarkDotNet.Attributes;
using SharpLearning.Common.Interfaces;
using SharpLearning.Containers.Matrices;

namespace SharpLearning.Benchmarks;

public static partial class Benchmarks
{
[MemoryDiagnoser]
public class RegressionLearners
{
readonly IReadOnlyDictionary<string, ILearner<double>> m_learners =
DefaultLearners.LearnerNameToLearnerRegression;

F64Matrix m_features;
double[] m_targets;

[GlobalSetup]
public void GlobalSetup()
{
(m_features, m_targets) = DataGenerator.GenerateRegressionData();
}

[Benchmark]
[ArgumentsSource(nameof(GetLearners))]
public void Learn(string learnerName)
{
var learner = m_learners[learnerName];
learner.Learn(m_features, m_targets);
}

public IReadOnlyList<string> GetLearners() =>
m_learners.Keys.ToArray();
}
}
56 changes: 56 additions & 0 deletions src/SharpLearning.Benchmarks/Benchmarks.RegressionModels.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
using System.Collections.Generic;
using System.Linq;
using BenchmarkDotNet.Attributes;
using SharpLearning.Common.Interfaces;
using SharpLearning.Containers.Matrices;

namespace SharpLearning.Benchmarks;

public static partial class Benchmarks
{
[MemoryDiagnoser]
public class RegressionModels
{
readonly IReadOnlyDictionary<string, ILearner<double>> m_learners =
DefaultLearners.LearnerNameToLearnerRegression;
readonly Dictionary<string, IPredictorModel<double>> m_models = [];

// For creating models.
F64Matrix m_features;
double[] m_targets;
// For prediction.
double[] m_featureRow;

[GlobalSetup]
public void GlobalSetup()
{
(m_features, m_targets) = DataGenerator.GenerateRegressionData();
m_featureRow = m_features.Row(0);
foreach (var (learnerName, learner) in m_learners)
{
var model = learner.Learn(m_features, m_targets);
var modelName = DefaultLearners.LearnerNameToModelNameRegression[learnerName];
m_models[modelName] = model;
}
}

[Benchmark]
[ArgumentsSource(nameof(GetModels))]
public void Predict(string modelName)
{
var model = m_models[modelName];
model.Predict(m_featureRow);
}

public IReadOnlyList<string> GetModels()
{
// Hack to ensure m_models is populated before call to GetModels.
// This means `GlobalSetup` will be called twice.
if (m_models.Count == 0)
{
GlobalSetup();
}
return m_models.Keys.ToArray();
}
}
}
56 changes: 56 additions & 0 deletions src/SharpLearning.Benchmarks/DataGenerator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
using System;
using SharpLearning.Containers.Matrices;

namespace SharpLearning.Benchmarks;

public static class DataGenerator
{
// Default data size for benchmarks.
public const int DefaultRows = 1000;
public const int DefaultCols = 10;
public const int DefaultMinTargetValue = 0;
public const int DefaultMaxTargetValue = 10;
public const int DefaultSeed = 42;

public static (F64Matrix Features, double[] Targets) GenerateRegressionData(
int rows = DefaultRows, int cols = DefaultCols,
int seed = DefaultSeed)
{
var random = new Random(seed);
var features = GenerateRandomDoubles(rows, cols, random);
var targets = GenerateRandomDoubles(rows, 1, random);
return (new F64Matrix(features, rows, cols), targets);
}

public static (F64Matrix Features, double[] Targets) GenerateClassificationData(
int rows = DefaultRows, int cols = DefaultCols,
int minTargetValue = DefaultMinTargetValue,
int maxTargetValue = DefaultMaxTargetValue,
int seed = DefaultSeed)
{
var random = new Random(seed);
var features = GenerateRandomDoubles(rows, cols, random);
var targets = GenerateRandomIntegers(rows, 1, minTargetValue, maxTargetValue, random);
return (new F64Matrix(features, rows, cols), targets);
}

static double[] GenerateRandomDoubles(int rows, int cols, Random random)
{
var data = new double[rows * cols];
for (var i = 0; i < data.Length; i++)
{
data[i] = random.NextDouble();
}
return data;
}

static double[] GenerateRandomIntegers(int rows, int cols, int min, int max, Random random)
{
var data = new double[rows * cols];
for (var i = 0; i < data.Length; i++)
{
data[i] = random.Next(min, max);
}
return data;
}
}
69 changes: 69 additions & 0 deletions src/SharpLearning.Benchmarks/DefaultLearners.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
using System.Collections.Generic;
using SharpLearning.AdaBoost.Learners;
using SharpLearning.AdaBoost.Models;
using SharpLearning.Common.Interfaces;
using SharpLearning.DecisionTrees.Learners;
using SharpLearning.DecisionTrees.Models;
using SharpLearning.GradientBoost.Learners;
using SharpLearning.GradientBoost.Models;
using SharpLearning.RandomForest.Learners;
using SharpLearning.RandomForest.Models;

namespace SharpLearning.Benchmarks;

public static class DefaultLearners
{
const string NameSeparator = "_";

// Define classification learners here. Use default parameters for benchmarks.
public static readonly IReadOnlyDictionary<string, ILearner<double>> LearnerNameToLearnerClassification =
new Dictionary<string, ILearner<double>>()
{
{ nameof(ClassificationDecisionTreeLearner), new ClassificationDecisionTreeLearner() },
{ nameof(ClassificationAdaBoostLearner), new ClassificationAdaBoostLearner() },
{ nameof(ClassificationRandomForestLearner), new ClassificationRandomForestLearner() },
{ nameof(ClassificationExtremelyRandomizedTreesLearner), new ClassificationExtremelyRandomizedTreesLearner() },
{ nameof(ClassificationBinomialGradientBoostLearner), new ClassificationBinomialGradientBoostLearner() },
};

// Define regression learners here. Use default parameters for benchmarks.
public static readonly IReadOnlyDictionary<string, ILearner<double>> LearnerNameToLearnerRegression =
new Dictionary<string, ILearner<double>>()
{
{ nameof(RegressionDecisionTreeLearner), new RegressionDecisionTreeLearner() },
{ nameof(RegressionAdaBoostLearner), new RegressionAdaBoostLearner() },
{ nameof(RegressionRandomForestLearner), new RegressionRandomForestLearner() },
{ nameof(RegressionExtremelyRandomizedTreesLearner), new RegressionExtremelyRandomizedTreesLearner() },
{ nameof(RegressionAbsoluteLossGradientBoostLearner), new RegressionAbsoluteLossGradientBoostLearner() },
{ nameof(RegressionHuberLossGradientBoostLearner), new RegressionHuberLossGradientBoostLearner() },
{ nameof(RegressionQuantileLossGradientBoostLearner), new RegressionQuantileLossGradientBoostLearner() },
{ nameof(RegressionSquareLossGradientBoostLearner), new RegressionSquareLossGradientBoostLearner() },
};

// Map learner names to model names for classification. Some learners return the same model type,
// so suffixing with the learner name.
public static readonly IReadOnlyDictionary<string, string> LearnerNameToModelNameClassification =
new Dictionary<string, string>()
{
{ nameof(ClassificationDecisionTreeLearner), nameof(ClassificationDecisionTreeModel) },
{ nameof(ClassificationAdaBoostLearner), nameof(ClassificationAdaBoostModel) },
{ nameof(ClassificationRandomForestLearner), nameof(ClassificationForestModel) + NameSeparator + nameof(ClassificationRandomForestLearner) },
{ nameof(ClassificationExtremelyRandomizedTreesLearner), nameof(ClassificationForestModel) + NameSeparator + nameof(ClassificationExtremelyRandomizedTreesLearner)},
{ nameof(ClassificationBinomialGradientBoostLearner), nameof(ClassificationGradientBoostModel) },
};

// Map learner names to model names for regression. Some learners return the same model type,
// so suffixing with the learner name.
public static readonly IReadOnlyDictionary<string, string> LearnerNameToModelNameRegression =
new Dictionary<string, string>()
{
{ nameof(RegressionDecisionTreeLearner), nameof(RegressionDecisionTreeModel) },
{ nameof(RegressionAdaBoostLearner), nameof(RegressionAdaBoostModel) },
{ nameof(RegressionRandomForestLearner), nameof(RegressionForestModel) + NameSeparator + nameof(RegressionRandomForestLearner)},
{ nameof(RegressionExtremelyRandomizedTreesLearner), nameof(RegressionForestModel) + NameSeparator + nameof(RegressionExtremelyRandomizedTreesLearner) },
{ nameof(RegressionAbsoluteLossGradientBoostLearner), nameof(RegressionGradientBoostModel) + NameSeparator + nameof(RegressionAbsoluteLossGradientBoostLearner) },
{ nameof(RegressionHuberLossGradientBoostLearner), nameof(RegressionGradientBoostModel) + NameSeparator + nameof(RegressionHuberLossGradientBoostLearner) },
{ nameof(RegressionQuantileLossGradientBoostLearner), nameof(RegressionGradientBoostModel) + NameSeparator + nameof(RegressionQuantileLossGradientBoostLearner) },
{ nameof(RegressionSquareLossGradientBoostLearner), nameof(RegressionGradientBoostModel) + NameSeparator + nameof(RegressionSquareLossGradientBoostLearner) },
};
}
28 changes: 28 additions & 0 deletions src/SharpLearning.Benchmarks/SharpLearning.Benchmarks.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>$(ExecutableTargetFramework)</TargetFrameworks>
<IsPackable>false</IsPackable>
<DebugType>pdbonly</DebugType>
<DebugSymbols>true</DebugSymbols>
<OutputType>Exe</OutputType>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\SharpLearning.AdaBoost\SharpLearning.AdaBoost.csproj" />
<ProjectReference Include="..\SharpLearning.Containers\SharpLearning.Containers.csproj" />
<ProjectReference Include="..\SharpLearning.DecisionTrees\SharpLearning.DecisionTrees.csproj" />
<ProjectReference Include="..\SharpLearning.GradientBoost\SharpLearning.GradientBoost.csproj" />
<ProjectReference Include="..\SharpLearning.RandomForest\SharpLearning.RandomForest.csproj" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.14.0" />
<PackageReference Include="Nerdbank.GitVersioning" Version="3.6.139">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
</ItemGroup>

</Project>
22 changes: 22 additions & 0 deletions src/SharpLearning.Benchmarks/program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Type 'Program' can be sealed because it has no subtypes in its containing assembly and is not externally visible
#pragma warning disable CA1852
using System;
using System.Diagnostics;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Reports;
using BenchmarkDotNet.Running;
using SharpLearning.Benchmarks;

[assembly: System.Runtime.InteropServices.ComVisible(false)]

Action<string> log = t => { Console.WriteLine(t); Trace.WriteLine(t); };

log($"{Environment.Version} args: {args.Length}");

var config = (Debugger.IsAttached ? new DebugInProcessConfig() : DefaultConfig.Instance)
.WithSummaryStyle(SummaryStyle.Default.WithMaxParameterColumnWidth(200));

BenchmarkRunner.Run(typeof(Benchmarks.ClassificationLearners), config, args);
BenchmarkRunner.Run(typeof(Benchmarks.RegressionLearners), config, args);
BenchmarkRunner.Run(typeof(Benchmarks.ClassificationModels), config, args);
BenchmarkRunner.Run(typeof(Benchmarks.RegressionModels), config, args);