diff --git a/SharpLearning.sln b/SharpLearning.sln index 5f580ccb..ba8e45cb 100644 --- a/SharpLearning.sln +++ b/SharpLearning.sln @@ -84,6 +84,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{2204FA16-973 src\SourceLink.GitHub.props = src\SourceLink.GitHub.props EndProjectSection EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SharpLearning.Benchmarks", "src\SharpLearning.Benchmarks\SharpLearning.Benchmarks.csproj", "{81B3AA72-5F95-956D-C168-AE856C226E15}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -198,6 +200,10 @@ Global {FFD79827-ED40-47EB-9CB8-2E683DEA3606}.Debug|Any CPU.Build.0 = Debug|Any CPU {FFD79827-ED40-47EB-9CB8-2E683DEA3606}.Release|Any CPU.ActiveCfg = Release|Any CPU {FFD79827-ED40-47EB-9CB8-2E683DEA3606}.Release|Any CPU.Build.0 = Release|Any CPU + {81B3AA72-5F95-956D-C168-AE856C226E15}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {81B3AA72-5F95-956D-C168-AE856C226E15}.Debug|Any CPU.Build.0 = Debug|Any CPU + {81B3AA72-5F95-956D-C168-AE856C226E15}.Release|Any CPU.ActiveCfg = Release|Any CPU + {81B3AA72-5F95-956D-C168-AE856C226E15}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/src/Directory.Build.props b/src/Directory.Build.props index 4a5cd026..b3baaa71 100644 --- a/src/Directory.Build.props +++ b/src/Directory.Build.props @@ -15,6 +15,7 @@ netstandard2.0 net8 + $(TestTargetFramework) 12.0 true diff --git a/src/SharpLearning.Benchmarks/Benchmarks.ClassificationLearners.cs b/src/SharpLearning.Benchmarks/Benchmarks.ClassificationLearners.cs new file mode 100644 index 00000000..614e4cf0 --- /dev/null +++ b/src/SharpLearning.Benchmarks/Benchmarks.ClassificationLearners.cs @@ -0,0 +1,37 @@ +using System.Collections.Generic; +using System.Linq; +using BenchmarkDotNet.Attributes; +using SharpLearning.Common.Interfaces; +using SharpLearning.Containers.Matrices; + +namespace SharpLearning.Benchmarks; + +public static partial class Benchmarks +{ + [MemoryDiagnoser] + public class ClassificationLearners + { + readonly IReadOnlyDictionary> m_learners = + DefaultLearners.LearnerNameToLearnerClassification; + + F64Matrix m_features; + double[] m_targets; + + [GlobalSetup] + public void GlobalSetup() + { + (m_features, m_targets) = DataGenerator.GenerateClassificationData(); + } + + [Benchmark] + [ArgumentsSource(nameof(GetLearners))] + public void Learn(string learnerName) + { + var learner = m_learners[learnerName]; + learner.Learn(m_features, m_targets); + } + + public IReadOnlyList GetLearners() => + m_learners.Keys.ToArray(); + } +} diff --git a/src/SharpLearning.Benchmarks/Benchmarks.ClassificationModels.cs b/src/SharpLearning.Benchmarks/Benchmarks.ClassificationModels.cs new file mode 100644 index 00000000..f2d11248 --- /dev/null +++ b/src/SharpLearning.Benchmarks/Benchmarks.ClassificationModels.cs @@ -0,0 +1,56 @@ +using System.Collections.Generic; +using System.Linq; +using BenchmarkDotNet.Attributes; +using SharpLearning.Common.Interfaces; +using SharpLearning.Containers.Matrices; + +namespace SharpLearning.Benchmarks; + +public static partial class Benchmarks +{ + [MemoryDiagnoser] + public class ClassificationModels + { + readonly IReadOnlyDictionary> m_learners = + DefaultLearners.LearnerNameToLearnerClassification; + readonly Dictionary> m_models = new(); + + // For creating models. + F64Matrix m_features; + double[] m_targets; + // For prediction. + double[] m_featureRow; + + [GlobalSetup] + public void GlobalSetup() + { + (m_features, m_targets) = DataGenerator.GenerateClassificationData(); + m_featureRow = m_features.Row(0); + foreach (var (learnerName, learner) in m_learners) + { + var model = learner.Learn(m_features, m_targets); + var modelName = DefaultLearners.LearnerNameToModelNameClassification[learnerName]; + m_models[modelName] = model; + } + } + + [Benchmark] + [ArgumentsSource(nameof(GetModels))] + public void Predict(string modelName) + { + var model = m_models[modelName]; + model.Predict(m_featureRow); + } + + public IReadOnlyList GetModels() + { + // Hack to ensure m_models is populated before call to GetModels. + // This means `GlobalSetup` will be called twice. + if (m_models.Count == 0) + { + GlobalSetup(); + } + return m_models.Keys.ToArray(); + } + } +} diff --git a/src/SharpLearning.Benchmarks/Benchmarks.RegressionLearners.cs b/src/SharpLearning.Benchmarks/Benchmarks.RegressionLearners.cs new file mode 100644 index 00000000..b82b0797 --- /dev/null +++ b/src/SharpLearning.Benchmarks/Benchmarks.RegressionLearners.cs @@ -0,0 +1,37 @@ +using System.Collections.Generic; +using System.Linq; +using BenchmarkDotNet.Attributes; +using SharpLearning.Common.Interfaces; +using SharpLearning.Containers.Matrices; + +namespace SharpLearning.Benchmarks; + +public static partial class Benchmarks +{ + [MemoryDiagnoser] + public class RegressionLearners + { + readonly IReadOnlyDictionary> m_learners = + DefaultLearners.LearnerNameToLearnerRegression; + + F64Matrix m_features; + double[] m_targets; + + [GlobalSetup] + public void GlobalSetup() + { + (m_features, m_targets) = DataGenerator.GenerateRegressionData(); + } + + [Benchmark] + [ArgumentsSource(nameof(GetLearners))] + public void Learn(string learnerName) + { + var learner = m_learners[learnerName]; + learner.Learn(m_features, m_targets); + } + + public IReadOnlyList GetLearners() => + m_learners.Keys.ToArray(); + } +} diff --git a/src/SharpLearning.Benchmarks/Benchmarks.RegressionModels.cs b/src/SharpLearning.Benchmarks/Benchmarks.RegressionModels.cs new file mode 100644 index 00000000..e7cbd6d6 --- /dev/null +++ b/src/SharpLearning.Benchmarks/Benchmarks.RegressionModels.cs @@ -0,0 +1,56 @@ +using System.Collections.Generic; +using System.Linq; +using BenchmarkDotNet.Attributes; +using SharpLearning.Common.Interfaces; +using SharpLearning.Containers.Matrices; + +namespace SharpLearning.Benchmarks; + +public static partial class Benchmarks +{ + [MemoryDiagnoser] + public class RegressionModels + { + readonly IReadOnlyDictionary> m_learners = + DefaultLearners.LearnerNameToLearnerRegression; + readonly Dictionary> m_models = []; + + // For creating models. + F64Matrix m_features; + double[] m_targets; + // For prediction. + double[] m_featureRow; + + [GlobalSetup] + public void GlobalSetup() + { + (m_features, m_targets) = DataGenerator.GenerateRegressionData(); + m_featureRow = m_features.Row(0); + foreach (var (learnerName, learner) in m_learners) + { + var model = learner.Learn(m_features, m_targets); + var modelName = DefaultLearners.LearnerNameToModelNameRegression[learnerName]; + m_models[modelName] = model; + } + } + + [Benchmark] + [ArgumentsSource(nameof(GetModels))] + public void Predict(string modelName) + { + var model = m_models[modelName]; + model.Predict(m_featureRow); + } + + public IReadOnlyList GetModels() + { + // Hack to ensure m_models is populated before call to GetModels. + // This means `GlobalSetup` will be called twice. + if (m_models.Count == 0) + { + GlobalSetup(); + } + return m_models.Keys.ToArray(); + } + } +} diff --git a/src/SharpLearning.Benchmarks/DataGenerator.cs b/src/SharpLearning.Benchmarks/DataGenerator.cs new file mode 100644 index 00000000..fea5210b --- /dev/null +++ b/src/SharpLearning.Benchmarks/DataGenerator.cs @@ -0,0 +1,56 @@ +using System; +using SharpLearning.Containers.Matrices; + +namespace SharpLearning.Benchmarks; + +public static class DataGenerator +{ + // Default data size for benchmarks. + public const int DefaultRows = 1000; + public const int DefaultCols = 10; + public const int DefaultMinTargetValue = 0; + public const int DefaultMaxTargetValue = 10; + public const int DefaultSeed = 42; + + public static (F64Matrix Features, double[] Targets) GenerateRegressionData( + int rows = DefaultRows, int cols = DefaultCols, + int seed = DefaultSeed) + { + var random = new Random(seed); + var features = GenerateRandomDoubles(rows, cols, random); + var targets = GenerateRandomDoubles(rows, 1, random); + return (new F64Matrix(features, rows, cols), targets); + } + + public static (F64Matrix Features, double[] Targets) GenerateClassificationData( + int rows = DefaultRows, int cols = DefaultCols, + int minTargetValue = DefaultMinTargetValue, + int maxTargetValue = DefaultMaxTargetValue, + int seed = DefaultSeed) + { + var random = new Random(seed); + var features = GenerateRandomDoubles(rows, cols, random); + var targets = GenerateRandomIntegers(rows, 1, minTargetValue, maxTargetValue, random); + return (new F64Matrix(features, rows, cols), targets); + } + + static double[] GenerateRandomDoubles(int rows, int cols, Random random) + { + var data = new double[rows * cols]; + for (var i = 0; i < data.Length; i++) + { + data[i] = random.NextDouble(); + } + return data; + } + + static double[] GenerateRandomIntegers(int rows, int cols, int min, int max, Random random) + { + var data = new double[rows * cols]; + for (var i = 0; i < data.Length; i++) + { + data[i] = random.Next(min, max); + } + return data; + } +} diff --git a/src/SharpLearning.Benchmarks/DefaultLearners.cs b/src/SharpLearning.Benchmarks/DefaultLearners.cs new file mode 100644 index 00000000..3d52df76 --- /dev/null +++ b/src/SharpLearning.Benchmarks/DefaultLearners.cs @@ -0,0 +1,69 @@ +using System.Collections.Generic; +using SharpLearning.AdaBoost.Learners; +using SharpLearning.AdaBoost.Models; +using SharpLearning.Common.Interfaces; +using SharpLearning.DecisionTrees.Learners; +using SharpLearning.DecisionTrees.Models; +using SharpLearning.GradientBoost.Learners; +using SharpLearning.GradientBoost.Models; +using SharpLearning.RandomForest.Learners; +using SharpLearning.RandomForest.Models; + +namespace SharpLearning.Benchmarks; + +public static class DefaultLearners +{ + const string NameSeparator = "_"; + + // Define classification learners here. Use default parameters for benchmarks. + public static readonly IReadOnlyDictionary> LearnerNameToLearnerClassification = + new Dictionary>() + { + { nameof(ClassificationDecisionTreeLearner), new ClassificationDecisionTreeLearner() }, + { nameof(ClassificationAdaBoostLearner), new ClassificationAdaBoostLearner() }, + { nameof(ClassificationRandomForestLearner), new ClassificationRandomForestLearner() }, + { nameof(ClassificationExtremelyRandomizedTreesLearner), new ClassificationExtremelyRandomizedTreesLearner() }, + { nameof(ClassificationBinomialGradientBoostLearner), new ClassificationBinomialGradientBoostLearner() }, + }; + + // Define regression learners here. Use default parameters for benchmarks. + public static readonly IReadOnlyDictionary> LearnerNameToLearnerRegression = + new Dictionary>() + { + { nameof(RegressionDecisionTreeLearner), new RegressionDecisionTreeLearner() }, + { nameof(RegressionAdaBoostLearner), new RegressionAdaBoostLearner() }, + { nameof(RegressionRandomForestLearner), new RegressionRandomForestLearner() }, + { nameof(RegressionExtremelyRandomizedTreesLearner), new RegressionExtremelyRandomizedTreesLearner() }, + { nameof(RegressionAbsoluteLossGradientBoostLearner), new RegressionAbsoluteLossGradientBoostLearner() }, + { nameof(RegressionHuberLossGradientBoostLearner), new RegressionHuberLossGradientBoostLearner() }, + { nameof(RegressionQuantileLossGradientBoostLearner), new RegressionQuantileLossGradientBoostLearner() }, + { nameof(RegressionSquareLossGradientBoostLearner), new RegressionSquareLossGradientBoostLearner() }, + }; + + // Map learner names to model names for classification. Some learners return the same model type, + // so suffixing with the learner name. + public static readonly IReadOnlyDictionary LearnerNameToModelNameClassification = + new Dictionary() + { + { nameof(ClassificationDecisionTreeLearner), nameof(ClassificationDecisionTreeModel) }, + { nameof(ClassificationAdaBoostLearner), nameof(ClassificationAdaBoostModel) }, + { nameof(ClassificationRandomForestLearner), nameof(ClassificationForestModel) + NameSeparator + nameof(ClassificationRandomForestLearner) }, + { nameof(ClassificationExtremelyRandomizedTreesLearner), nameof(ClassificationForestModel) + NameSeparator + nameof(ClassificationExtremelyRandomizedTreesLearner)}, + { nameof(ClassificationBinomialGradientBoostLearner), nameof(ClassificationGradientBoostModel) }, + }; + + // Map learner names to model names for regression. Some learners return the same model type, + // so suffixing with the learner name. + public static readonly IReadOnlyDictionary LearnerNameToModelNameRegression = + new Dictionary() + { + { nameof(RegressionDecisionTreeLearner), nameof(RegressionDecisionTreeModel) }, + { nameof(RegressionAdaBoostLearner), nameof(RegressionAdaBoostModel) }, + { nameof(RegressionRandomForestLearner), nameof(RegressionForestModel) + NameSeparator + nameof(RegressionRandomForestLearner)}, + { nameof(RegressionExtremelyRandomizedTreesLearner), nameof(RegressionForestModel) + NameSeparator + nameof(RegressionExtremelyRandomizedTreesLearner) }, + { nameof(RegressionAbsoluteLossGradientBoostLearner), nameof(RegressionGradientBoostModel) + NameSeparator + nameof(RegressionAbsoluteLossGradientBoostLearner) }, + { nameof(RegressionHuberLossGradientBoostLearner), nameof(RegressionGradientBoostModel) + NameSeparator + nameof(RegressionHuberLossGradientBoostLearner) }, + { nameof(RegressionQuantileLossGradientBoostLearner), nameof(RegressionGradientBoostModel) + NameSeparator + nameof(RegressionQuantileLossGradientBoostLearner) }, + { nameof(RegressionSquareLossGradientBoostLearner), nameof(RegressionGradientBoostModel) + NameSeparator + nameof(RegressionSquareLossGradientBoostLearner) }, + }; +} diff --git a/src/SharpLearning.Benchmarks/SharpLearning.Benchmarks.csproj b/src/SharpLearning.Benchmarks/SharpLearning.Benchmarks.csproj new file mode 100644 index 00000000..ccb83163 --- /dev/null +++ b/src/SharpLearning.Benchmarks/SharpLearning.Benchmarks.csproj @@ -0,0 +1,28 @@ + + + + $(ExecutableTargetFramework) + false + pdbonly + true + Exe + + + + + + + + + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + diff --git a/src/SharpLearning.Benchmarks/program.cs b/src/SharpLearning.Benchmarks/program.cs new file mode 100644 index 00000000..fb7976da --- /dev/null +++ b/src/SharpLearning.Benchmarks/program.cs @@ -0,0 +1,22 @@ +// Type 'Program' can be sealed because it has no subtypes in its containing assembly and is not externally visible +#pragma warning disable CA1852 +using System; +using System.Diagnostics; +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Reports; +using BenchmarkDotNet.Running; +using SharpLearning.Benchmarks; + +[assembly: System.Runtime.InteropServices.ComVisible(false)] + +Action log = t => { Console.WriteLine(t); Trace.WriteLine(t); }; + +log($"{Environment.Version} args: {args.Length}"); + +var config = (Debugger.IsAttached ? new DebugInProcessConfig() : DefaultConfig.Instance) + .WithSummaryStyle(SummaryStyle.Default.WithMaxParameterColumnWidth(200)); + +BenchmarkRunner.Run(typeof(Benchmarks.ClassificationLearners), config, args); +BenchmarkRunner.Run(typeof(Benchmarks.RegressionLearners), config, args); +BenchmarkRunner.Run(typeof(Benchmarks.ClassificationModels), config, args); +BenchmarkRunner.Run(typeof(Benchmarks.RegressionModels), config, args);