Skip to content

Commit 3c88511

Browse files
authored
[ML] Remove computed statistics instrumentation (#1617)
Since we finished benchmarking #1313, the instrumentation of computed and avoided statistics has become unnecessary. Since this instrumentation is not "for free" I am removing the code from the codebase.
1 parent f3533db commit 3c88511

File tree

5 files changed

+5
-76
lines changed

5 files changed

+5
-76
lines changed

include/api/CDataFrameAnalysisInstrumentation.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -198,15 +198,6 @@ class API_EXPORT CDataFrameTrainBoostedTreeInstrumentation final
198198
//! \return Structure contains hyperparameters.
199199
SHyperparameters& hyperparameters() override { return m_Hyperparameters; }
200200

201-
std::size_t& statisticsComputed() override { return m_StatsComputed; }
202-
std::size_t& statisticsNotComputed() override { return m_StatsNotComputed; }
203-
virtual void rowsSkipped(std::uint32_t numberRows) override {
204-
m_RowsAccumulator.add(numberRows);
205-
}
206-
virtual std::uint32_t rowsSkipped() override {
207-
return maths::CBasicStatistics::mean(m_RowsAccumulator);
208-
}
209-
210201
protected:
211202
counter_t::ECounterTypes memoryCounterType() override;
212203

@@ -230,10 +221,6 @@ class API_EXPORT CDataFrameTrainBoostedTreeInstrumentation final
230221
std::string m_LossType;
231222
TLossVec m_LossValues;
232223
SHyperparameters m_Hyperparameters;
233-
234-
std::size_t m_StatsComputed = 0;
235-
std::size_t m_StatsNotComputed = 0;
236-
TRowsAccumulator m_RowsAccumulator;
237224
};
238225
}
239226
}

include/maths/CBoostedTreeLeafNodeStatistics.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include <maths/CBoostedTreeHyperparameters.h>
1717
#include <maths/CBoostedTreeUtils.h>
1818
#include <maths/CChecksum.h>
19-
#include <maths/CDataFrameAnalysisInstrumentationInterface.h>
2019
#include <maths/CLinearAlgebraEigen.h>
2120
#include <maths/CLinearAlgebraShims.h>
2221
#include <maths/CMathsFuncs.h>
@@ -62,7 +61,6 @@ class MATHS_EXPORT CBoostedTreeLeafNodeStatistics final {
6261
using TMemoryMappedFloatVector = CMemoryMappedDenseVector<CFloatStorage, Eigen::Aligned16>;
6362
using TMemoryMappedDoubleVector = CMemoryMappedDenseVector<double, Eigen::Aligned16>;
6463
using TMemoryMappedDoubleMatrix = CMemoryMappedDenseMatrix<double, Eigen::Aligned16>;
65-
using TAnalysisInstrumentationPtr = CDataFrameTrainBoostedTreeInstrumentationInterface*;
6664

6765
//! \brief Accumulates aggregate derivatives.
6866
class MATHS_EXPORT CDerivatives {
@@ -648,8 +646,7 @@ class MATHS_EXPORT CBoostedTreeLeafNodeStatistics final {
648646
const TRegularization& regularization,
649647
const TSizeVec& featureBag,
650648
const CBoostedTreeNode& split,
651-
CWorkspace& workspace,
652-
TAnalysisInstrumentationPtr instrumentation = nullptr);
649+
CWorkspace& workspace);
653650

654651
//! Order two leaves by decreasing gain in splitting them.
655652
bool operator<(const CBoostedTreeLeafNodeStatistics& rhs) const;

include/maths/CDataFrameAnalysisInstrumentationInterface.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,6 @@ class MATHS_EXPORT CDataFrameTrainBoostedTreeInstrumentationInterface
139139
virtual void lossValues(std::size_t fold, TDoubleVec&& lossValues) = 0;
140140
//! \return Structure contains hyperparameters.
141141
virtual SHyperparameters& hyperparameters() = 0;
142-
143-
virtual std::size_t& statisticsComputed() = 0;
144-
virtual std::size_t& statisticsNotComputed() = 0;
145-
virtual void rowsSkipped(std::uint32_t numberRows) = 0;
146-
virtual std::uint32_t rowsSkipped() = 0;
147142
};
148143

149144
//! \brief Dummies out all instrumentation for outlier detection.
@@ -174,16 +169,8 @@ class MATHS_EXPORT CDataFrameTrainBoostedTreeInstrumentationStub
174169
void lossValues(std::size_t /* fold */, TDoubleVec&& /* lossValues */) override {}
175170
SHyperparameters& hyperparameters() override { return m_Hyperparameters; }
176171

177-
std::size_t& statisticsComputed() override { return m_StubStatsComputed; }
178-
std::size_t& statisticsNotComputed() override {
179-
return m_StubStatsComputed;
180-
}
181-
virtual void rowsSkipped(std::uint32_t /*numberRows*/) override {}
182-
virtual std::uint32_t rowsSkipped() override { return 0ul; }
183-
184172
private:
185173
SHyperparameters m_Hyperparameters;
186-
std::size_t m_StubStatsComputed = 0;
187174
};
188175
}
189176
}

lib/maths/CBoostedTreeImpl.cc

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -295,16 +295,6 @@ void CBoostedTreeImpl::train(core::CDataFrame& frame,
295295
m_Instrumentation->updateProgress(1.0);
296296
m_Instrumentation->updateMemoryUsage(
297297
static_cast<std::int64_t>(this->memoryUsage()) - lastMemoryUsage);
298-
299-
if (m_Instrumentation != nullptr) {
300-
// TODO remove once performance measurements are finished
301-
LOG_INFO(<< "Statistics computed: " << m_Instrumentation->statisticsComputed()
302-
<< "\tnot computed: " << m_Instrumentation->statisticsNotComputed() << "\t saved: "
303-
<< (static_cast<double>(m_Instrumentation->statisticsNotComputed()) /
304-
(m_Instrumentation->statisticsNotComputed() +
305-
m_Instrumentation->statisticsComputed()))
306-
<< "\t avg. rows skipped: " << m_Instrumentation->rowsSkipped());
307-
}
308298
}
309299

310300
void CBoostedTreeImpl::recordState(const TTrainingStateCallback& recordTrainState) const {
@@ -905,10 +895,9 @@ CBoostedTreeImpl::trainTree(core::CDataFrame& frame,
905895

906896
TLeafNodeStatisticsPtr leftChild;
907897
TLeafNodeStatisticsPtr rightChild;
908-
std::tie(leftChild, rightChild) =
909-
leaf->split(leftChildId, rightChildId, m_NumberThreads,
910-
smallestCandidateGain, frame, *m_Encoder, m_Regularization,
911-
featureBag, tree[leaf->id()], workspace, m_Instrumentation);
898+
std::tie(leftChild, rightChild) = leaf->split(
899+
leftChildId, rightChildId, m_NumberThreads, smallestCandidateGain, frame,
900+
*m_Encoder, m_Regularization, featureBag, tree[leaf->id()], workspace);
912901

913902
// Need gain to be computed to compare here
914903
if (leftChild != nullptr && rightChild != nullptr && less(rightChild, leftChild)) {

lib/maths/CBoostedTreeLeafNodeStatistics.cc

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,6 @@ namespace {
2525
const std::size_t ASSIGN_MISSING_TO_LEFT{0};
2626
const std::size_t ASSIGN_MISSING_TO_RIGHT{1};
2727

28-
void incrementStatsComputed(CBoostedTreeLeafNodeStatistics::TAnalysisInstrumentationPtr instrumentation) {
29-
if (instrumentation != nullptr) {
30-
instrumentation->statisticsComputed() += 1;
31-
}
32-
}
33-
34-
void incrementStatsNotComputed(CBoostedTreeLeafNodeStatistics::TAnalysisInstrumentationPtr instrumentation,
35-
std::uint32_t rowsInChild) {
36-
if (instrumentation != nullptr) {
37-
instrumentation->statisticsNotComputed() += 1;
38-
instrumentation->rowsSkipped(rowsInChild);
39-
}
40-
}
41-
4228
struct SChildredGainStats {
4329
double s_MinLossLeft = -INF;
4430
double s_MinLossRight = -INF;
@@ -152,32 +138,23 @@ CBoostedTreeLeafNodeStatistics::split(std::size_t leftChildId,
152138
const TRegularization& regularization,
153139
const TSizeVec& featureBag,
154140
const CBoostedTreeNode& split,
155-
CWorkspace& workspace,
156-
TAnalysisInstrumentationPtr instrumentation) {
141+
CWorkspace& workspace) {
157142
TPtr leftChild;
158143
TPtr rightChild;
159144
if (this->leftChildHasFewerRows()) {
160145
if (this->m_BestSplit.s_LeftChildMaxGain > gainThreshold) {
161146
leftChild = std::make_shared<CBoostedTreeLeafNodeStatistics>(
162147
leftChildId, *this, numberThreads, frame, encoder, regularization,
163148
featureBag, true /*is left child*/, split, workspace);
164-
incrementStatsComputed(instrumentation);
165149
if (this->m_BestSplit.s_RightChildMaxGain > gainThreshold) {
166150
rightChild = std::make_shared<CBoostedTreeLeafNodeStatistics>(
167151
rightChildId, std::move(*this), regularization, featureBag, workspace);
168-
incrementStatsComputed(instrumentation);
169-
} else {
170-
incrementStatsNotComputed(instrumentation, this->m_BestSplit.s_RightChildRowCount);
171152
}
172153
} else {
173-
incrementStatsNotComputed(instrumentation, this->m_BestSplit.s_LeftChildRowCount);
174154
if (this->m_BestSplit.s_RightChildMaxGain > gainThreshold) {
175155
rightChild = std::make_shared<CBoostedTreeLeafNodeStatistics>(
176156
rightChildId, *this, numberThreads, frame, encoder, regularization,
177157
featureBag, false /*is left child*/, split, workspace);
178-
incrementStatsComputed(instrumentation);
179-
} else {
180-
incrementStatsNotComputed(instrumentation, this->m_BestSplit.s_RightChildRowCount);
181158
}
182159
}
183160
return {std::move(leftChild), std::move(rightChild)};
@@ -187,23 +164,15 @@ CBoostedTreeLeafNodeStatistics::split(std::size_t leftChildId,
187164
rightChild = std::make_shared<CBoostedTreeLeafNodeStatistics>(
188165
rightChildId, *this, numberThreads, frame, encoder, regularization,
189166
featureBag, false /*is left child*/, split, workspace);
190-
incrementStatsComputed(instrumentation);
191167
if (this->m_BestSplit.s_LeftChildMaxGain > gainThreshold) {
192168
leftChild = std::make_shared<CBoostedTreeLeafNodeStatistics>(
193169
leftChildId, std::move(*this), regularization, featureBag, workspace);
194-
incrementStatsComputed(instrumentation);
195-
} else {
196-
incrementStatsNotComputed(instrumentation, this->m_BestSplit.s_LeftChildRowCount);
197170
}
198171
} else {
199-
incrementStatsNotComputed(instrumentation, this->m_BestSplit.s_RightChildRowCount);
200172
if (this->m_BestSplit.s_LeftChildMaxGain > gainThreshold) {
201173
leftChild = std::make_shared<CBoostedTreeLeafNodeStatistics>(
202174
leftChildId, *this, numberThreads, frame, encoder, regularization,
203175
featureBag, true /*is left child*/, split, workspace);
204-
incrementStatsComputed(instrumentation);
205-
} else {
206-
incrementStatsNotComputed(instrumentation, this->m_BestSplit.s_LeftChildRowCount);
207176
}
208177
}
209178
return {std::move(leftChild), std::move(rightChild)};

0 commit comments

Comments
 (0)