From a7b1ce2b2320b4c2eca98db06ae2dc4503931490 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 15:39:45 +1300 Subject: [PATCH 01/22] [ML] Report the "actual" memory usage of the autodetect process Determine the actual memory usgae of the autodetect process as reported by the OS, e.g. on Linux this mould be the value of the maximum resident set size returned by a call to `getrusage`. Add this value to the model size stats record returned to the ES Java process so it can be included in the `job counts` tab for anomaly detection jobs. --- bin/autodetect/Main.cc | 6 +- include/core/CProgramCounters.h | 7 ++- include/model/CResourceMonitor.h | 3 + include/model/ModelTypes.h | 4 +- lib/api/CModelSizeStatsJsonWriter.cc | 4 ++ lib/api/unittest/CAnomalyJobLimitTest.cc | 31 +++++++++- lib/api/unittest/CJsonOutputWriterTest.cc | 59 ++++++++++--------- .../unittest/CModelSnapshotJsonWriterTest.cc | 1 + lib/core/CProcessStats_Linux.cc | 7 ++- lib/core/CProcessStats_MacOSX.cc | 9 ++- lib/core/CProcessStats_Windows.cc | 12 +++- lib/model/CResourceMonitor.cc | 6 ++ lib/model/ModelTypes.cc | 2 + lib/model/unittest/CResourceMonitorTest.cc | 14 ++++- 14 files changed, 126 insertions(+), 39 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 904920e3db..bbb90c706a 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,8 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberMemoryLimitModelCreationFailures, ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, - ml::counter_t::E_TSADOutputMemoryAllocatorUsage}; + ml::counter_t::E_TSADOutputMemoryAllocatorUsage, + ml::counter_t::E_TSADMaxResidentSetSize}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); @@ -151,6 +153,8 @@ int main(int argc, char** argv) { } cancellerThread.stop(); + LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); + // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program // statically links its own version library. diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 3c4d10269f..34d5cdbb26 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -112,6 +112,9 @@ enum ECounterTypes { //! The memory currently used by the allocators to output JSON documents, in bytes. E_TSADOutputMemoryAllocatorUsage = 30, + //! The maximum resident set size of the process, in bytes. + E_TSADMaxResidentSetSize = 31, + // Data Frame Outlier Detection //! The estimated peak memory usage for outlier detection in bytes @@ -146,7 +149,7 @@ enum ECounterTypes { // Add any new values here //! This MUST be last, increment the value for every new enum added - E_LastEnumCounter = 31 + E_LastEnumCounter = 32 }; static constexpr std::size_t NUM_COUNTERS = static_cast(E_LastEnumCounter); @@ -355,6 +358,8 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, + {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", + "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, {counter_t::E_DFOPeakMemoryUsage, "E_DFOPeakMemoryUsage", "The peak memory outlier detection used"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 5c7583888b..c9c887281f 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,6 +54,7 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; + std::size_t s_ActualMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -180,6 +181,8 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; + std::size_t actualMemoryUsage() const; + private: using TMonitoredResourcePtrSizeUMap = boost::unordered_map; diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index acbcc14c04..aeffe27e83 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,7 +719,9 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3 //!< Use highest ever actual model size + E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size + E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size + //!< of the process, as reported by the OS }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 43fef49602..75604c7f6a 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -25,6 +25,7 @@ const std::string JOB_ID{"job_id"}; const std::string MODEL_SIZE_STATS{"model_size_stats"}; const std::string MODEL_BYTES{"model_bytes"}; const std::string PEAK_MODEL_BYTES{"peak_model_bytes"}; +const std::string ACTUAL_MEMORY_USAGE_BYTES{"actual_memory_usage_bytes"}; const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"}; const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"}; const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"}; @@ -60,6 +61,9 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); + writer.onKey(ACTUAL_MEMORY_USAGE_BYTES); + writer.onUint64(results.s_ActualMemoryUsage); + writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index b003e90a53..938892589c 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -9,6 +9,7 @@ * limitation. */ #include +#include #include #include @@ -92,6 +93,10 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; + std::size_t actualUsage{0}; + std::size_t baseline{0}; + std::size_t nonLimitedAdjustedActualUsage{0}; + std::size_t limitedAdjustedActualUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -105,6 +110,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; + baseline = limits.resourceMonitor().actualMemoryUsage(); + //limits.resourceMonitor().m_ByteLimitHigh = 100000; //limits.resourceMonitor().m_ByteLimitLow = 90000; @@ -127,8 +134,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); + actualUsage = limits.resourceMonitor().actualMemoryUsage(); + nonLimitedAdjustedActualUsage = actualUsage - baseline; } } + LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); + LOG_DEBUG(<< "baseline: " << baseline); + LOG_DEBUG(<< "actualUsage: " << actualUsage); + LOG_DEBUG(<< "nonLimitedAdjustedActualUsage: " << nonLimitedAdjustedActualUsage); + BOOST_TEST_REQUIRE(nonLimitedAdjustedActualUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -138,6 +152,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { model::CAnomalyDetectorModelConfig::defaultConfig(3600); model::CLimits limits; + baseline = limits.resourceMonitor().actualMemoryUsage(); + std::stringstream outputStrm; { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -166,11 +182,18 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); + actualUsage = limits.resourceMonitor().actualMemoryUsage(); + limitedAdjustedActualUsage = actualUsage - baseline; } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); + LOG_DEBUG(<< "baseline: " << baseline); + LOG_DEBUG(<< "actualUsage: " << actualUsage); + LOG_DEBUG(<< "limitedAdjustedActualUsage: " << limitedAdjustedActualUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); + BOOST_TEST_REQUIRE(limitedAdjustedActualUsage < nonLimitedAdjustedActualUsage); + BOOST_TEST_REQUIRE(limitedAdjustedActualUsage >= limitedUsage); } } @@ -375,6 +398,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage bytes = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -384,6 +408,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -423,11 +448,12 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { } core_t::TTime startOfBucket{ maths::common::CIntegerTools::floor(time, testParam.s_BucketLength)}; - auto used = limits.resourceMonitor().createMemoryUsageReport(startOfBucket); + auto used = limits.resourceMonitor(). createMemoryUsageReport(startOfBucket); LOG_DEBUG(<< "# by = " << used.s_ByFields); LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -437,6 +463,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -479,6 +506,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -486,6 +514,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index ba44163e7c..b4b9db851a 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,21 +1728,22 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_ByFields = 5; - resourceUsage.s_PartitionFields = 6; - resourceUsage.s_OverFields = 7; - resourceUsage.s_AllocationFailures = 8; + resourceUsage.s_ActualMemoryUsage = 5; + resourceUsage.s_ByFields = 6; + resourceUsage.s_PartitionFields = 7; + resourceUsage.s_OverFields = 8; + resourceUsage.s_AllocationFailures = 9; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisCurrentModelBytes; - resourceUsage.s_BucketStartTime = 9; - resourceUsage.s_BytesExceeded = 10; - resourceUsage.s_BytesMemoryLimit = 11; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 12; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 13; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 14; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 17; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisActualMemoryUsageBytes; + resourceUsage.s_BucketStartTime = 10; + resourceUsage.s_BytesExceeded = 11; + resourceUsage.s_BytesMemoryLimit = 12; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 13; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 14; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 17; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 18; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1770,44 +1771,46 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); + BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); + BOOST_REQUIRE_EQUAL(5, sizeStats.at("actual_memory_usage_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 6, sizeStats.at("total_partition_field_count").to_number()); + 7, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(8, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 8, sizeStats.at("bucket_allocation_failures_count").to_number()); + 9, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(9000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(10000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("current_model_bytes", + BOOST_REQUIRE_EQUAL("actual_memory_usage_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(10, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(11, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 11, sizeStats.at("model_bytes_memory_limit").to_number()); + 12, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(12, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(13, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(13, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(14, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 14, sizeStats.at("frequent_category_count").to_number()); + 15, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(15, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(16, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(16, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(18, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index ff404b7f91..eb4d382b9a 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,6 +36,7 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) + 409600, // Actual memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index e5ab8cdfd0..b0c02425e9 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -87,7 +88,11 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - return static_cast(rusage.ru_maxrss * 1024L); + std::size_t maxRSS = static_cast(rusage.ru_maxrss * 1024L); + + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + + return maxRSS; } } } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 9aa1e969c9..f1c55e2aae 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -8,9 +8,11 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include +#include +#include + #include #include #include @@ -31,9 +33,10 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - + std::size_t maxRSS = static_cast(rusage.ru_maxrss); + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; // ru_maxrss is in bytes - return static_cast(rusage.ru_maxrss); + return maxRSS; } } } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 7ca2d7e6c0..78cb418d93 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -8,8 +8,10 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include + +#include +#include #include #include @@ -36,7 +38,13 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "Failed to retrieve memory info " << CWindowsError()); return 0; } - return static_cast(stats.PeakWorkingSetSize); + + std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); + + + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; + + return peakWorkingSetSize; } } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index d93b3b8bd8..a0b74ed6f3 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -382,6 +383,7 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); + res.s_ActualMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -491,5 +493,9 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } +std::size_t CResourceMonitor::actualMemoryUsage() const { + return core::CProcessStats::maxResidentSetSize(); +} + } // model } // ml diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 2fab1d1c2a..a9140a66b3 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,6 +1733,8 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; + case E_AssignmentBasisActualMemoryUsageBytes: + return "actual_memory_usage_bytes"; } return "-"; } diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index f69dccc384..f8fe8fcc89 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -536,7 +536,7 @@ BOOST_FIXTURE_TEST_CASE(testExtraMemory, CTestFixture) { } BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { - // Clear the counter so that other test cases do not interfere. + // Clear the counters so that other test cases do not interfere. core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage) = 0; CLimits limits; @@ -549,6 +549,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -556,6 +559,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(-50); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -563,12 +569,18 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); monitor.sendMemoryUsageReport(0, 1); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); + + BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From cb957cacfaa15484196a48c42467a8a5ab7c8040 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 15:53:50 +1300 Subject: [PATCH 02/22] Update changelog --- docs/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 75117ca066..0b118eb82b 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -33,6 +33,7 @@ === Enhancements * Track memory used in the hierarchical results normalizer. (See {ml-pull}2831[#2831].) +* Report the actual memory usage of the autodetect process. (See {ml-pull}2846[#2846]) === Bug Fixes From 8f73f02d7486068b1d43f4a337b0593fcdd1ef12 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 16:28:53 +1300 Subject: [PATCH 03/22] Formatting --- include/model/ModelTypes.h | 2 +- lib/api/unittest/CAnomalyJobLimitTest.cc | 2 +- lib/api/unittest/CJsonOutputWriterTest.cc | 3 ++- lib/core/CProcessStats_Windows.cc | 1 - lib/model/unittest/CResourceMonitorTest.cc | 12 ++++++++---- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index aeffe27e83..ef09536551 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -721,7 +721,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size - //!< of the process, as reported by the OS + //!< of the process, as reported by the OS }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 938892589c..f1531f46ec 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -448,7 +448,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { } core_t::TTime startOfBucket{ maths::common::CIntegerTools::floor(time, testParam.s_BucketLength)}; - auto used = limits.resourceMonitor(). createMemoryUsageReport(startOfBucket); + auto used = limits.resourceMonitor().createMemoryUsageReport(startOfBucket); LOG_DEBUG(<< "# by = " << used.s_ByFields); LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index b4b9db851a..95c5e319b8 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1772,7 +1772,8 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("actual_memory_usage_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 5, sizeStats.at("actual_memory_usage_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 78cb418d93..d6e6e4649c 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -41,7 +41,6 @@ std::size_t CProcessStats::maxResidentSetSize() { std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; return peakWorkingSetSize; diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index f8fe8fcc89..193746addc 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -550,7 +550,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(100); @@ -560,7 +561,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(-50); @@ -570,7 +572,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(100); @@ -580,7 +583,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From d3a39aedc57c7de332d81e3ce8797df5b142e409 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 7 Apr 2025 15:43:50 +1200 Subject: [PATCH 04/22] Appease SonarQube --- lib/core/CProcessStats_Linux.cc | 2 +- lib/core/CProcessStats_MacOSX.cc | 2 +- lib/core/CProcessStats_Windows.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index b0c02425e9..c858b4e589 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -88,7 +88,7 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - std::size_t maxRSS = static_cast(rusage.ru_maxrss * 1024L); + auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index f1c55e2aae..c3edc85424 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -33,7 +33,7 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - std::size_t maxRSS = static_cast(rusage.ru_maxrss); + auto maxRSS = static_cast(rusage.ru_maxrss); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; // ru_maxrss is in bytes return maxRSS; diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index d6e6e4649c..d91db5e3c9 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -39,7 +39,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } - std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); + auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; From e2d1bf5f0e2a04bd772fa77e7ea96a8582cf6f00 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 7 Apr 2025 15:46:27 +1200 Subject: [PATCH 05/22] Tweak unit test for platform portability --- lib/api/unittest/CAnomalyJobLimitTest.cc | 32 +++++++----------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index f1531f46ec..348b4162db 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -93,10 +93,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; - std::size_t actualUsage{0}; - std::size_t baseline{0}; - std::size_t nonLimitedAdjustedActualUsage{0}; - std::size_t limitedAdjustedActualUsage{0}; + std::size_t nonLimitedActualUsage{0}; + std::size_t limitedActualUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -110,10 +108,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; - baseline = limits.resourceMonitor().actualMemoryUsage(); - - //limits.resourceMonitor().m_ByteLimitHigh = 100000; - //limits.resourceMonitor().m_ByteLimitLow = 90000; { LOG_TRACE(<< "Setting up job"); @@ -134,15 +128,12 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - actualUsage = limits.resourceMonitor().actualMemoryUsage(); - nonLimitedAdjustedActualUsage = actualUsage - baseline; + nonLimitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); - LOG_DEBUG(<< "baseline: " << baseline); - LOG_DEBUG(<< "actualUsage: " << actualUsage); - LOG_DEBUG(<< "nonLimitedAdjustedActualUsage: " << nonLimitedAdjustedActualUsage); - BOOST_TEST_REQUIRE(nonLimitedAdjustedActualUsage >= nonLimitedUsage); + LOG_DEBUG(<< "nonLimitedActualUsage: " << nonLimitedActualUsage); + BOOST_TEST_REQUIRE(nonLimitedActualUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -152,8 +143,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { model::CAnomalyDetectorModelConfig::defaultConfig(3600); model::CLimits limits; - baseline = limits.resourceMonitor().actualMemoryUsage(); - std::stringstream outputStrm; { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -182,18 +171,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - actualUsage = limits.resourceMonitor().actualMemoryUsage(); - limitedAdjustedActualUsage = actualUsage - baseline; + limitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); - LOG_DEBUG(<< "baseline: " << baseline); - LOG_DEBUG(<< "actualUsage: " << actualUsage); - LOG_DEBUG(<< "limitedAdjustedActualUsage: " << limitedAdjustedActualUsage); + LOG_DEBUG(<< "Non-limited Actual Usage: " << nonLimitedActualUsage); + LOG_DEBUG(<< "Limited Actual Usage: " << limitedActualUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); - BOOST_TEST_REQUIRE(limitedAdjustedActualUsage < nonLimitedAdjustedActualUsage); - BOOST_TEST_REQUIRE(limitedAdjustedActualUsage >= limitedUsage); + BOOST_TEST_REQUIRE(limitedActualUsage >= limitedUsage); } } From 1a9a99ae5e69956976caea9d0963c70658efb25d Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 9 Apr 2025 16:01:06 +1200 Subject: [PATCH 06/22] Attend to review comments * ActualMemory -> SystemMemory * Report current resident set size as well as max --- bin/autodetect/Main.cc | 1 + include/core/CProgramCounters.h | 9 ++- include/model/CResourceMonitor.h | 9 ++- include/model/ModelTypes.h | 4 +- lib/api/CModelSizeStatsJsonWriter.cc | 10 ++- lib/api/unittest/CAnomalyJobLimitTest.cc | 33 +++++----- lib/api/unittest/CJsonOutputWriterTest.cc | 66 ++++++++++--------- .../unittest/CModelSnapshotJsonWriterTest.cc | 7 +- lib/core/CProcessStats_Linux.cc | 2 + lib/core/CProcessStats_MacOSX.cc | 1 + lib/core/CProcessStats_Windows.cc | 6 +- lib/model/CResourceMonitor.cc | 10 ++- lib/model/ModelTypes.cc | 6 +- lib/model/unittest/CResourceMonitorTest.cc | 16 ++--- 14 files changed, 110 insertions(+), 70 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index bbb90c706a..11f7674634 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -85,6 +85,7 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, ml::counter_t::E_TSADOutputMemoryAllocatorUsage, + ml::counter_t::E_TSADResidentSetSize, ml::counter_t::E_TSADMaxResidentSetSize}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 34d5cdbb26..9bad82389a 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -112,8 +112,11 @@ enum ECounterTypes { //! The memory currently used by the allocators to output JSON documents, in bytes. E_TSADOutputMemoryAllocatorUsage = 30, + //! The resident set size of the process, in bytes. + E_TSADResidentSetSize = 31, + //! The maximum resident set size of the process, in bytes. - E_TSADMaxResidentSetSize = 31, + E_TSADMaxResidentSetSize = 32, // Data Frame Outlier Detection @@ -149,7 +152,7 @@ enum ECounterTypes { // Add any new values here //! This MUST be last, increment the value for every new enum added - E_LastEnumCounter = 32 + E_LastEnumCounter = 33 }; static constexpr std::size_t NUM_COUNTERS = static_cast(E_LastEnumCounter); @@ -358,6 +361,8 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, + {counter_t::E_TSADResidentSetSize, "E_TSADResidentSetSize", + "The resident set size of the process, in bytes"}, {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index c9c887281f..8cb9d5f3ac 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,7 +54,8 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; - std::size_t s_ActualMemoryUsage{0}; + std::size_t s_SystemMemoryUsage{0}; + std::size_t s_MaxSystemMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -181,7 +182,11 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - std::size_t actualMemoryUsage() const; + //! Returns the current physical memory of the process as reported by the system + std::size_t systemMemory() const; + + //! Returns the maximum physical memory of the processs as reported by the system + std::size_t maxSystemMemory() const; private: using TMonitoredResourcePtrSizeUMap = diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index ef09536551..66674ed39d 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -720,8 +720,8 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size - E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size - //!< of the process, as reported by the OS + E_AssignmentBasisSystemMemoryBytes = 4, //!< Use the current system memory size + E_AssignmentBasisMaxSystemMemoryBytes = 5 //!< Use the highest ever system memory size }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 75604c7f6a..12858914af 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -25,7 +25,8 @@ const std::string JOB_ID{"job_id"}; const std::string MODEL_SIZE_STATS{"model_size_stats"}; const std::string MODEL_BYTES{"model_bytes"}; const std::string PEAK_MODEL_BYTES{"peak_model_bytes"}; -const std::string ACTUAL_MEMORY_USAGE_BYTES{"actual_memory_usage_bytes"}; +const std::string SYSTEM_MEMORY_BYTES{"system_memory_bytes"}; +const std::string MAX_SYSTEM_MEMORY_BYTES{"max_system_memory_bytes"}; const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"}; const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"}; const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"}; @@ -61,8 +62,11 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); - writer.onKey(ACTUAL_MEMORY_USAGE_BYTES); - writer.onUint64(results.s_ActualMemoryUsage); + writer.onKey(SYSTEM_MEMORY_BYTES); + writer.onUint64(results.s_SystemMemoryUsage); + + writer.onKey(MAX_SYSTEM_MEMORY_BYTES); + writer.onUint64(results.s_MaxSystemMemoryUsage); writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 348b4162db..cc85baaf99 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -93,8 +93,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; - std::size_t nonLimitedActualUsage{0}; - std::size_t limitedActualUsage{0}; + std::size_t nonLimitedMaxSystemUsage{0}; + std::size_t limitedMaxSystemUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -128,12 +128,12 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - nonLimitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); + nonLimitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); - LOG_DEBUG(<< "nonLimitedActualUsage: " << nonLimitedActualUsage); - BOOST_TEST_REQUIRE(nonLimitedActualUsage >= nonLimitedUsage); + LOG_DEBUG(<< "nonLimitedMaxSystemUsage: " << nonLimitedMaxSystemUsage); + BOOST_TEST_REQUIRE(nonLimitedMaxSystemUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -171,15 +171,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - limitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); + limitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); - LOG_DEBUG(<< "Non-limited Actual Usage: " << nonLimitedActualUsage); - LOG_DEBUG(<< "Limited Actual Usage: " << limitedActualUsage); + LOG_DEBUG(<< "Non-limited System Usage: " << nonLimitedMaxSystemUsage); + LOG_DEBUG(<< "Limited System Usage: " << limitedMaxSystemUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); - BOOST_TEST_REQUIRE(limitedActualUsage >= limitedUsage); + BOOST_TEST_REQUIRE(limitedMaxSystemUsage >= limitedUsage); } } @@ -384,7 +384,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage bytes = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage bytes = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage bytes = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -394,7 +395,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -439,7 +440,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -449,7 +451,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -492,7 +494,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -500,7 +503,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 95c5e319b8..82c11c31f2 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,22 +1728,23 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_ActualMemoryUsage = 5; - resourceUsage.s_ByFields = 6; - resourceUsage.s_PartitionFields = 7; - resourceUsage.s_OverFields = 8; - resourceUsage.s_AllocationFailures = 9; + resourceUsage.s_SystemMemoryUsage = 5; + resourceUsage.s_MaxSystemMemoryUsage = 6; + resourceUsage.s_ByFields = 7; + resourceUsage.s_PartitionFields = 8; + resourceUsage.s_OverFields = 9; + resourceUsage.s_AllocationFailures = 10; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisActualMemoryUsageBytes; - resourceUsage.s_BucketStartTime = 10; - resourceUsage.s_BytesExceeded = 11; - resourceUsage.s_BytesMemoryLimit = 12; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 13; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 14; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 17; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 18; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisSystemMemoryBytes; + resourceUsage.s_BucketStartTime = 11; + resourceUsage.s_BytesExceeded = 12; + resourceUsage.s_BytesMemoryLimit = 13; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 14; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 17; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 18; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 19; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1771,47 +1772,50 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); - BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); + BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); BOOST_REQUIRE_EQUAL( - 5, sizeStats.at("actual_memory_usage_bytes").to_number()); + 5, sizeStats.at("system_memory_bytes").to_number()); + BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); + BOOST_REQUIRE_EQUAL( + 6, sizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 7, sizeStats.at("total_partition_field_count").to_number()); + 8, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(8, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(9, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 9, sizeStats.at("bucket_allocation_failures_count").to_number()); + 10, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(10000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(11000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("actual_memory_usage_bytes", + BOOST_REQUIRE_EQUAL("system_memory_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(11, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(12, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 12, sizeStats.at("model_bytes_memory_limit").to_number()); + 13, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(13, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(14, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(14, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(15, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 15, sizeStats.at("frequent_category_count").to_number()); + 16, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(16, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(18, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(18, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(19, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index eb4d382b9a..c3c32aa022 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,7 +36,8 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) - 409600, // Actual memory used (max rss) + 409600, // System memory used (rss) + 413696, // Max system memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields @@ -117,6 +118,10 @@ BOOST_AUTO_TEST_CASE(testWrite) { BOOST_TEST_REQUIRE(modelSizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL( 60000, modelSizeStats.at("peak_model_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 409600, modelSizeStats.at("system_memory_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 413696, modelSizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(modelSizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL( 3, modelSizeStats.at("total_by_field_count").to_number()); diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index c858b4e589..6511a209f1 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,6 +76,8 @@ std::size_t CProcessStats::residentSetSize() { } } + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = rss; + return rss; } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index c3edc85424..855b0bdca9 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,6 +23,7 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = 0; return 0; } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index d91db5e3c9..e4e2baa163 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -29,7 +29,11 @@ std::size_t CProcessStats::residentSetSize() { return 0; } - return static_cast(stats.WorkingSetSize); + auto workingSetSize = static_cast(stats.WorkingSetSize); + + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = workingSetSize; + + return workingSetSize; } std::size_t CProcessStats::maxResidentSetSize() { diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index a0b74ed6f3..69354f5629 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -383,7 +383,8 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); - res.s_ActualMemoryUsage = core::CProcessStats::maxResidentSetSize(); + res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); + res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -493,9 +494,12 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } -std::size_t CResourceMonitor::actualMemoryUsage() const { - return core::CProcessStats::maxResidentSetSize(); +std::size_t CResourceMonitor::systemMemory() const { + return core::CProcessStats::residentSetSize(); } +std::size_t CResourceMonitor::maxSystemMemory() const { + return core::CProcessStats::maxResidentSetSize(); +} } // model } // ml diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index a9140a66b3..1bddf8bae8 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,8 +1733,10 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; - case E_AssignmentBasisActualMemoryUsageBytes: - return "actual_memory_usage_bytes"; + case E_AssignmentBasisSystemMemoryBytes: + return "system_memory_bytes"; + case E_AssignmentBasisMaxSystemMemoryBytes: + return "max_system_memory_bytes"; } return "-"; } diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index 193746addc..c17c644c80 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -549,9 +549,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_SystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_SystemMemoryUsage); monitor.addExtraMemory(100); @@ -560,9 +560,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); monitor.addExtraMemory(-50); @@ -571,9 +571,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); monitor.addExtraMemory(100); @@ -582,9 +582,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From 5ae22cb50c89f8176df8eca696684d294f16cf69 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:08:27 +1200 Subject: [PATCH 07/22] Update bin/autodetect/Main.cc Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- bin/autodetect/Main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 11f7674634..93cb77e5d7 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -155,7 +155,7 @@ int main(int argc, char** argv) { cancellerThread.stop(); LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); - +LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize()); // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program // statically links its own version library. From fe6f1fac7df9daf4b6a7a8f42c0ea3ce0f668a32 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:08:49 +1200 Subject: [PATCH 08/22] Update include/model/CResourceMonitor.h Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- include/model/CResourceMonitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 8cb9d5f3ac..d1815efa81 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -182,7 +182,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - //! Returns the current physical memory of the process as reported by the system + //! Returns the current physical memory of the process (rss) as reported by the system std::size_t systemMemory() const; //! Returns the maximum physical memory of the processs as reported by the system From 582430ea4fbfbd0d5cf486af35fc4f469e0c50d0 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:09:09 +1200 Subject: [PATCH 09/22] Update include/model/CResourceMonitor.h Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- include/model/CResourceMonitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index d1815efa81..ff156d007d 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -185,7 +185,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the current physical memory of the process (rss) as reported by the system std::size_t systemMemory() const; - //! Returns the maximum physical memory of the processs as reported by the system + //! Returns the maximum physical memory of the process (max rss) as reported by the system std::size_t maxSystemMemory() const; private: From 9476edee78422a088b9c94d104680203d1c21bf2 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 14:59:43 +1200 Subject: [PATCH 10/22] Attend to review comments --- bin/autodetect/Main.cc | 6 +++--- include/core/CProgramCounters.h | 8 ++++---- include/model/CResourceMonitor.h | 2 +- include/model/ModelTypes.h | 4 +--- lib/api/unittest/CJsonOutputWriterTest.cc | 6 +++--- lib/core/CProcessStats_Linux.cc | 4 ++-- lib/core/CProcessStats_MacOSX.cc | 4 ++-- lib/core/CProcessStats_Windows.cc | 4 ++-- lib/model/ModelTypes.cc | 4 ---- 9 files changed, 18 insertions(+), 24 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 11f7674634..6037d4ba43 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -85,8 +85,8 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, ml::counter_t::E_TSADOutputMemoryAllocatorUsage, - ml::counter_t::E_TSADResidentSetSize, - ml::counter_t::E_TSADMaxResidentSetSize}; + ml::counter_t::E_TSADSystemMemoryUsage, + ml::counter_t::E_TSADMaxSystemMemoryUsage}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); @@ -154,7 +154,7 @@ int main(int argc, char** argv) { } cancellerThread.stop(); - LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); + LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize()); // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 9bad82389a..fd9c949a5d 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -113,10 +113,10 @@ enum ECounterTypes { E_TSADOutputMemoryAllocatorUsage = 30, //! The resident set size of the process, in bytes. - E_TSADResidentSetSize = 31, + E_TSADSystemMemoryUsage = 31, //! The maximum resident set size of the process, in bytes. - E_TSADMaxResidentSetSize = 32, + E_TSADMaxSystemMemoryUsage = 32, // Data Frame Outlier Detection @@ -361,9 +361,9 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, - {counter_t::E_TSADResidentSetSize, "E_TSADResidentSetSize", + {counter_t::E_TSADSystemMemoryUsage, "E_TSADResidentSetSize", "The resident set size of the process, in bytes"}, - {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", + {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxResidentSetSize", "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 8cb9d5f3ac..cd8ea1ad58 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -182,7 +182,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - //! Returns the current physical memory of the process as reported by the system + //! Returns the current physical memory (rss) of the process as reported by the system std::size_t systemMemory() const; //! Returns the maximum physical memory of the processs as reported by the system diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index 66674ed39d..9494935b92 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,9 +719,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size - E_AssignmentBasisSystemMemoryBytes = 4, //!< Use the current system memory size - E_AssignmentBasisMaxSystemMemoryBytes = 5 //!< Use the highest ever system memory size + E_AssignmentBasisPeakModelBytes = 3 }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 82c11c31f2..495fd77a52 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1717,7 +1717,7 @@ BOOST_AUTO_TEST_CASE(testPersistNormalizer) { BOOST_TEST_REQUIRE(quantileState.contains("timestamp")); } -BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { +BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { std::ostringstream sstream; { ml::core::CJsonOutputStreamWrapper outputStream(sstream); @@ -1735,7 +1735,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_OverFields = 9; resourceUsage.s_AllocationFailures = 10; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisSystemMemoryBytes; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisPeakModelBytes; resourceUsage.s_BucketStartTime = 11; resourceUsage.s_BytesExceeded = 12; resourceUsage.s_BytesMemoryLimit = 13; @@ -1793,7 +1793,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("system_memory_bytes", + BOOST_REQUIRE_EQUAL("peak_model_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index 6511a209f1..ecaaf7f272 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,7 +76,7 @@ std::size_t CProcessStats::residentSetSize() { } } - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = rss; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = rss; return rss; } @@ -92,7 +92,7 @@ std::size_t CProcessStats::maxResidentSetSize() { // ru_maxrss is in kilobytes auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; return maxRSS; } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 855b0bdca9..d308463c4e 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,7 +23,7 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = 0; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = 0; return 0; } @@ -35,7 +35,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } auto maxRSS = static_cast(rusage.ru_maxrss); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; // ru_maxrss is in bytes return maxRSS; } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index e4e2baa163..cc88daad75 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -31,7 +31,7 @@ std::size_t CProcessStats::residentSetSize() { auto workingSetSize = static_cast(stats.WorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = workingSetSize; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = workingSetSize; return workingSetSize; } @@ -45,7 +45,7 @@ std::size_t CProcessStats::maxResidentSetSize() { auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = peakWorkingSetSize; return peakWorkingSetSize; } diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 1bddf8bae8..2fab1d1c2a 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,10 +1733,6 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; - case E_AssignmentBasisSystemMemoryBytes: - return "system_memory_bytes"; - case E_AssignmentBasisMaxSystemMemoryBytes: - return "max_system_memory_bytes"; } return "-"; } From 475fef1aec8111be815bae561bdc1b82d7bb37fb Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 15:12:47 +1200 Subject: [PATCH 11/22] Formatting --- lib/api/unittest/CJsonOutputWriterTest.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 495fd77a52..ea16f9a19d 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1717,7 +1717,7 @@ BOOST_AUTO_TEST_CASE(testPersistNormalizer) { BOOST_TEST_REQUIRE(quantileState.contains("timestamp")); } -BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { +BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { std::ostringstream sstream; { ml::core::CJsonOutputStreamWrapper outputStream(sstream); @@ -1773,8 +1773,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); - BOOST_REQUIRE_EQUAL( - 5, sizeStats.at("system_memory_bytes").to_number()); + BOOST_REQUIRE_EQUAL(5, sizeStats.at("system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); BOOST_REQUIRE_EQUAL( 6, sizeStats.at("max_system_memory_bytes").to_number()); From 6945d3fd1d439581526320677a5009d8425c5add Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 May 2025 15:50:37 +1200 Subject: [PATCH 12/22] Attend to failing unit tests (hopefully) * Address failing unit tests * More accurate, meaningful description of new program counters --- include/core/CProgramCounters.h | 8 ++++---- include/model/CResourceMonitor.h | 4 ++-- include/model/ModelTypes.h | 2 +- lib/api/CAnomalyJob.cc | 2 ++ lib/core/CProcessStats_Linux.cc | 8 +------- lib/core/CProcessStats_MacOSX.cc | 6 ++---- lib/core/CProcessStats_Windows.cc | 12 ++---------- lib/model/CResourceMonitor.cc | 4 ++-- 8 files changed, 16 insertions(+), 30 deletions(-) diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index fd9c949a5d..4fde6670ff 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -361,10 +361,10 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, - {counter_t::E_TSADSystemMemoryUsage, "E_TSADResidentSetSize", - "The resident set size of the process, in bytes"}, - {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxResidentSetSize", - "The maximum resident set size of the process, in bytes"}, + {counter_t::E_TSADSystemMemoryUsage, "E_TSADSystemMemoryUsage", + "The amount of system memory used by the process, in bytes"}, + {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxSystemMemoryUsage", + "The maximum amount of system memory used by the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, {counter_t::E_DFOPeakMemoryUsage, "E_DFOPeakMemoryUsage", "The peak memory outlier detection used"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index ff156d007d..cea75148ed 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -183,10 +183,10 @@ class MODEL_EXPORT CResourceMonitor { std::size_t totalMemory() const; //! Returns the current physical memory of the process (rss) as reported by the system - std::size_t systemMemory() const; + static std::size_t systemMemory() ; //! Returns the maximum physical memory of the process (max rss) as reported by the system - std::size_t maxSystemMemory() const; + static std::size_t maxSystemMemory(); private: using TMonitoredResourcePtrSizeUMap = diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index 9494935b92..acbcc14c04 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,7 +719,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3 + E_AssignmentBasisPeakModelBytes = 3 //!< Use highest ever actual model size }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 3a8c06be2a..cc590d370b 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -205,6 +205,8 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields, TOptionalTime t } ++core::CProgramCounters::counter(counter_t::E_TSADNumberApiRecordsHandled); + core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = model::CResourceMonitor::systemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = model::CResourceMonitor::maxSystemMemory(); ++m_NumRecordsHandled; m_LatestRecordTime = std::max(m_LatestRecordTime, *time); diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index ecaaf7f272..e3635e279f 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,8 +76,6 @@ std::size_t CProcessStats::residentSetSize() { } } - CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = rss; - return rss; } @@ -90,11 +88,7 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); - - CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; - - return maxRSS; + return static_cast(rusage.ru_maxrss * 1024L); } } } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index d308463c4e..ce86647aed 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,7 +23,6 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx - CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = 0; return 0; } @@ -34,10 +33,9 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - auto maxRSS = static_cast(rusage.ru_maxrss); - CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; + // ru_maxrss is in bytes - return maxRSS; + return static_cast(rusage.ru_maxrss);; } } } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index cc88daad75..933952e1f2 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -29,11 +29,7 @@ std::size_t CProcessStats::residentSetSize() { return 0; } - auto workingSetSize = static_cast(stats.WorkingSetSize); - - CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = workingSetSize; - - return workingSetSize; + return static_cast(stats.WorkingSetSize); } std::size_t CProcessStats::maxResidentSetSize() { @@ -43,11 +39,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } - auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - - CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = peakWorkingSetSize; - - return peakWorkingSetSize; + return static_cast(stats.PeakWorkingSetSize); } } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 69354f5629..7d71df1afb 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -494,11 +494,11 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } -std::size_t CResourceMonitor::systemMemory() const { +std::size_t CResourceMonitor::systemMemory() { return core::CProcessStats::residentSetSize(); } -std::size_t CResourceMonitor::maxSystemMemory() const { +std::size_t CResourceMonitor::maxSystemMemory() { return core::CProcessStats::maxResidentSetSize(); } } // model From 3b69b72f8227388ed654ee0de9d381adec4adf88 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 May 2025 16:11:25 +1200 Subject: [PATCH 13/22] Formatting.. grr --- include/model/CResourceMonitor.h | 2 +- lib/api/CAnomalyJob.cc | 6 ++++-- lib/core/CProcessStats_MacOSX.cc | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index cea75148ed..68758d4f09 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -183,7 +183,7 @@ class MODEL_EXPORT CResourceMonitor { std::size_t totalMemory() const; //! Returns the current physical memory of the process (rss) as reported by the system - static std::size_t systemMemory() ; + static std::size_t systemMemory(); //! Returns the maximum physical memory of the process (max rss) as reported by the system static std::size_t maxSystemMemory(); diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index cc590d370b..a334fe34dc 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -205,8 +205,10 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields, TOptionalTime t } ++core::CProgramCounters::counter(counter_t::E_TSADNumberApiRecordsHandled); - core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = model::CResourceMonitor::systemMemory(); - core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = model::CResourceMonitor::maxSystemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = + model::CResourceMonitor::systemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = + model::CResourceMonitor::maxSystemMemory(); ++m_NumRecordsHandled; m_LatestRecordTime = std::max(m_LatestRecordTime, *time); diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index ce86647aed..61dc9fca66 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -35,7 +35,7 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in bytes - return static_cast(rusage.ru_maxrss);; + return static_cast(rusage.ru_maxrss); } } } From 1bafa8cdf1663636f7bb59ffbd2f31488e90835b Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 4 Jun 2025 16:24:50 +1200 Subject: [PATCH 14/22] On Linux only, use the value of the system memory usage (max resident set size) for the "model memory usage" and "peak model memory usage" fields reported to Java. --- include/model/CSystemMemoryUsage.h | 43 +++++++++++++++++++ lib/api/CModelSizeStatsJsonWriter.cc | 6 --- lib/api/unittest/CJsonOutputWriterTest.cc | 5 --- .../unittest/CModelSnapshotJsonWriterTest.cc | 4 -- lib/model/CMakeLists.txt | 1 + lib/model/CResourceMonitor.cc | 6 ++- lib/model/CSystemMemoryUsage.cc | 23 ++++++++++ lib/model/CSystemMemoryUsage_Linux.cc | 24 +++++++++++ 8 files changed, 95 insertions(+), 17 deletions(-) create mode 100644 include/model/CSystemMemoryUsage.h create mode 100644 lib/model/CSystemMemoryUsage.cc create mode 100644 lib/model/CSystemMemoryUsage_Linux.cc diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h new file mode 100644 index 0000000000..62adea2f29 --- /dev/null +++ b/include/model/CSystemMemoryUsage.h @@ -0,0 +1,43 @@ +/* +* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ +#ifndef INCLUDED_ml_model_CSystemMemoryUsage_h +#define INCLUDED_ml_model_CSystemMemoryUsage_h + +#include + +#include + +namespace ml { +namespace model { + +//! \brief Determines the memory used by the current process. +//! +//! DESCRIPTION:\n +//! Determines the memory used by the current process based on the operating system. +class MODEL_EXPORT CSystemMemoryUsage { +public: + CSystemMemoryUsage() = default; + ~CSystemMemoryUsage() = default; + + CSystemMemoryUsage(const CSystemMemoryUsage&) = delete; + CSystemMemoryUsage(CSystemMemoryUsage&&) = delete; + CSystemMemoryUsage& operator=(const CSystemMemoryUsage&) = delete; + CSystemMemoryUsage& operator=(CSystemMemoryUsage&&) = delete; + + //! Return the system memory used by the current process. + //! \param memSize An estimate of the process memory + //! \return The system memory usage based on the current OS. + std::size_t operator()(std::size_t memSize); +}; +} +} + +#endif //INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 12858914af..68b5b80c88 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -62,12 +62,6 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); - writer.onKey(SYSTEM_MEMORY_BYTES); - writer.onUint64(results.s_SystemMemoryUsage); - - writer.onKey(MAX_SYSTEM_MEMORY_BYTES); - writer.onUint64(results.s_MaxSystemMemoryUsage); - writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index ea16f9a19d..19caa4dfc1 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1772,11 +1772,6 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); - BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("system_memory_bytes").to_number()); - BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); - BOOST_REQUIRE_EQUAL( - 6, sizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index c3c32aa022..f2c22a41de 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -118,10 +118,6 @@ BOOST_AUTO_TEST_CASE(testWrite) { BOOST_TEST_REQUIRE(modelSizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL( 60000, modelSizeStats.at("peak_model_bytes").to_number()); - BOOST_REQUIRE_EQUAL( - 409600, modelSizeStats.at("system_memory_bytes").to_number()); - BOOST_REQUIRE_EQUAL( - 413696, modelSizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(modelSizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL( 3, modelSizeStats.at("total_by_field_count").to_number()); diff --git a/lib/model/CMakeLists.txt b/lib/model/CMakeLists.txt index c53eec9fb0..37c8c1cc5c 100644 --- a/lib/model/CMakeLists.txt +++ b/lib/model/CMakeLists.txt @@ -75,6 +75,7 @@ ml_add_library(MlModel SHARED CSampleCounts.cc CSearchKey.cc CSimpleCountDetector.cc + CSystemMemoryUsage.cc CTokenListCategory.cc CTokenListDataCategorizerBase.cc CTokenListReverseSearchCreator.cc diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 7d71df1afb..1e131ce0b2 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -378,11 +379,12 @@ void CResourceMonitor::sendMemoryUsageReport(core_t::TTime bucketStartTime, CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; + CSystemMemoryUsage systemMemoryUsage; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); + res.s_AdjustedUsage = systemMemoryUsage(this->adjustedUsage(res.s_Usage)); res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); + res.s_AdjustedPeakUsage = systemMemoryUsage(this->adjustedUsage(res.s_PeakUsage)); res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc new file mode 100644 index 0000000000..ae275beb34 --- /dev/null +++ b/lib/model/CSystemMemoryUsage.cc @@ -0,0 +1,23 @@ +/* +* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +#include + +namespace ml { +namespace model { +// On platforms other than Linux the system memory usage is that provided - the estimated size of the models. +std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) { + return memSize; +} +} +} diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc new file mode 100644 index 0000000000..a32026f2a5 --- /dev/null +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -0,0 +1,24 @@ +/* +* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +#include + +namespace ml { +namespace model { +// On Linux the system memory usage is actually that determined by the OS. +// The estimated value provided is ignored. +std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) { + return core::CProcessStats::maxResidentSetSize(); +} +} +} From efc311b5f432e3ed6183a5f7b74c28300973cbd5 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 4 Jun 2025 16:36:50 +1200 Subject: [PATCH 15/22] Fix copyright headers --- include/model/CSystemMemoryUsage.h | 3 ++- lib/model/CSystemMemoryUsage.cc | 2 +- lib/model/CSystemMemoryUsage_Linux.cc | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h index 62adea2f29..6e78091521 100644 --- a/include/model/CSystemMemoryUsage.h +++ b/include/model/CSystemMemoryUsage.h @@ -1,5 +1,5 @@ /* -* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when @@ -8,6 +8,7 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ + #ifndef INCLUDED_ml_model_CSystemMemoryUsage_h #define INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc index ae275beb34..8fd926f65b 100644 --- a/lib/model/CSystemMemoryUsage.cc +++ b/lib/model/CSystemMemoryUsage.cc @@ -1,5 +1,5 @@ /* -* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc index a32026f2a5..f3b21522d5 100644 --- a/lib/model/CSystemMemoryUsage_Linux.cc +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -1,5 +1,5 @@ /* -* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when From 6be6395223ebe9ac0434c9cec57d6adada30a2a9 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 4 Jun 2025 16:50:11 +1200 Subject: [PATCH 16/22] Nits in test code --- lib/api/unittest/CAnomalyJobLimitTest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index cc85baaf99..20e422134f 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - nonLimitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); + nonLimitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); @@ -171,7 +171,7 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - limitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); + limitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } LOG_TRACE(<< outputStrm.str()); From 134a4940771904e1eeb1922dd7f8835852189dac Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 9 Jun 2025 16:06:26 +1200 Subject: [PATCH 17/22] Attend to code review comments On Linux, use systemMemoryUsage for the value of totalMem. Do not "adjust" this value as is done for the estimated usage, as it is unnecessary. --- include/model/CSystemMemoryUsage.h | 12 ++++++++-- lib/model/CResourceMonitor.cc | 32 ++++++++++++++++----------- lib/model/CSystemMemoryUsage.cc | 9 +++++--- lib/model/CSystemMemoryUsage_Linux.cc | 7 +++++- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h index 6e78091521..066fc2e152 100644 --- a/include/model/CSystemMemoryUsage.h +++ b/include/model/CSystemMemoryUsage.h @@ -14,7 +14,7 @@ #include -#include +#include namespace ml { namespace model { @@ -24,6 +24,8 @@ namespace model { //! DESCRIPTION:\n //! Determines the memory used by the current process based on the operating system. class MODEL_EXPORT CSystemMemoryUsage { +public: + using TMemoryAdjuster = std::function; public: CSystemMemoryUsage() = default; ~CSystemMemoryUsage() = default; @@ -36,7 +38,13 @@ class MODEL_EXPORT CSystemMemoryUsage { //! Return the system memory used by the current process. //! \param memSize An estimate of the process memory //! \return The system memory usage based on the current OS. - std::size_t operator()(std::size_t memSize); + std::size_t operator()(std::size_t memSize) const; + + //! Potentially adjust the provided memory usage value, depending on the current platform. + //! \param usage The memory usage of the current process. + //! \param memAdjuster Function to adjust the memory usage, if needed. + //! \return The (potentially adjusted) system memory usage. + static std::size_t maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& memAdjuster); }; } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 1e131ce0b2..e9b365ab97 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -379,12 +379,11 @@ void CResourceMonitor::sendMemoryUsageReport(core_t::TTime bucketStartTime, CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; - CSystemMemoryUsage systemMemoryUsage; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = systemMemoryUsage(this->adjustedUsage(res.s_Usage)); + res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = systemMemoryUsage(this->adjustedUsage(res.s_PeakUsage)); + res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; @@ -406,16 +405,22 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { } std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const { - // We scale the reported memory usage by the inverse of the byte limit margin. - // This gives the user a fairer indication of how close the job is to hitting - // the model memory limit in a concise manner (as the limit is scaled down by - // the margin during the beginning period of the job's existence). - std::size_t adjustedUsage{ - static_cast(static_cast(usage) / m_ByteLimitMargin)}; + const std::size_t persistenceMemoryIncreaseFactor = this->persistenceMemoryIncreaseFactor(); + + return CSystemMemoryUsage::maybeAdjustUsage( + usage, [&byteLimitMargin=m_ByteLimitMargin, &persistenceMemoryIncreaseFactor](std::size_t usage_) { + // On platforms that estimate the memory usage, it is scaled by the inverse of the byte limit margin. + // This gives the user a fairer indication of how close the job is to hitting + // the model memory limit in a concise manner (as the limit is scaled down by + // the margin during the beginning period of the job's existence). + std::size_t adjustedUsage{ + static_cast(static_cast(usage_) / byteLimitMargin) + }; - adjustedUsage *= this->persistenceMemoryIncreaseFactor(); + adjustedUsage *= persistenceMemoryIncreaseFactor; - return adjustedUsage; + return adjustedUsage; + }); } std::size_t CResourceMonitor::persistenceMemoryIncreaseFactor() const { @@ -491,9 +496,10 @@ std::size_t CResourceMonitor::lowLimit() const { } std::size_t CResourceMonitor::totalMemory() const { - return m_MonitoredResourceCurrentMemory + m_ExtraMemory + + CSystemMemoryUsage systemMemoryUsage; + return systemMemoryUsage(m_MonitoredResourceCurrentMemory + m_ExtraMemory + static_cast(core::CProgramCounters::counter( - counter_t::E_TSADOutputMemoryAllocatorUsage)); + counter_t::E_TSADOutputMemoryAllocatorUsage))); } std::size_t CResourceMonitor::systemMemory() { diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc index 8fd926f65b..b79d9a0a4c 100644 --- a/lib/model/CSystemMemoryUsage.cc +++ b/lib/model/CSystemMemoryUsage.cc @@ -11,13 +11,16 @@ #include -#include - namespace ml { namespace model { // On platforms other than Linux the system memory usage is that provided - the estimated size of the models. -std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) { +std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) const { return memSize; } + +// On platforms other than Linux the system memory is estimated. It must be adjusted before being reported +std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& memAdjuster) { + return memAdjuster(usage); +} } } diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc index f3b21522d5..0b25eef8e1 100644 --- a/lib/model/CSystemMemoryUsage_Linux.cc +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -17,8 +17,13 @@ namespace ml { namespace model { // On Linux the system memory usage is actually that determined by the OS. // The estimated value provided is ignored. -std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) { +std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) const { return core::CProcessStats::maxResidentSetSize(); } + +std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& /*memAdjuster*/) { + // On Linux we use the actual system memory usage, rather tha an estimated value, so there is no need to adjust it. + return usage; +} } } From ed426aca39cfb491c366142c2a3fe3de68aa0fcc Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 9 Jun 2025 16:15:10 +1200 Subject: [PATCH 18/22] Formatting --- include/model/CSystemMemoryUsage.h | 1 + lib/model/CResourceMonitor.cc | 31 ++++++++++++++------------- lib/model/CSystemMemoryUsage.cc | 3 ++- lib/model/CSystemMemoryUsage_Linux.cc | 3 ++- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h index 066fc2e152..3d0b83bf40 100644 --- a/include/model/CSystemMemoryUsage.h +++ b/include/model/CSystemMemoryUsage.h @@ -26,6 +26,7 @@ namespace model { class MODEL_EXPORT CSystemMemoryUsage { public: using TMemoryAdjuster = std::function; + public: CSystemMemoryUsage() = default; ~CSystemMemoryUsage() = default; diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index e9b365ab97..fff68a0014 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -405,22 +405,23 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { } std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const { - const std::size_t persistenceMemoryIncreaseFactor = this->persistenceMemoryIncreaseFactor(); + const std::size_t persistenceMemoryIncreaseFactor = + this->persistenceMemoryIncreaseFactor(); - return CSystemMemoryUsage::maybeAdjustUsage( - usage, [&byteLimitMargin=m_ByteLimitMargin, &persistenceMemoryIncreaseFactor](std::size_t usage_) { - // On platforms that estimate the memory usage, it is scaled by the inverse of the byte limit margin. - // This gives the user a fairer indication of how close the job is to hitting - // the model memory limit in a concise manner (as the limit is scaled down by - // the margin during the beginning period of the job's existence). - std::size_t adjustedUsage{ - static_cast(static_cast(usage_) / byteLimitMargin) - }; + return CSystemMemoryUsage::maybeAdjustUsage(usage, [ + &byteLimitMargin = m_ByteLimitMargin, &persistenceMemoryIncreaseFactor + ](std::size_t usage_) { + // On platforms that estimate the memory usage, it is scaled by the inverse of the byte limit margin. + // This gives the user a fairer indication of how close the job is to hitting + // the model memory limit in a concise manner (as the limit is scaled down by + // the margin during the beginning period of the job's existence). + std::size_t adjustedUsage{ + static_cast(static_cast(usage_) / byteLimitMargin)}; - adjustedUsage *= persistenceMemoryIncreaseFactor; + adjustedUsage *= persistenceMemoryIncreaseFactor; - return adjustedUsage; - }); + return adjustedUsage; + }); } std::size_t CResourceMonitor::persistenceMemoryIncreaseFactor() const { @@ -498,8 +499,8 @@ std::size_t CResourceMonitor::lowLimit() const { std::size_t CResourceMonitor::totalMemory() const { CSystemMemoryUsage systemMemoryUsage; return systemMemoryUsage(m_MonitoredResourceCurrentMemory + m_ExtraMemory + - static_cast(core::CProgramCounters::counter( - counter_t::E_TSADOutputMemoryAllocatorUsage))); + static_cast(core::CProgramCounters::counter( + counter_t::E_TSADOutputMemoryAllocatorUsage))); } std::size_t CResourceMonitor::systemMemory() { diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc index b79d9a0a4c..ee86e22f44 100644 --- a/lib/model/CSystemMemoryUsage.cc +++ b/lib/model/CSystemMemoryUsage.cc @@ -19,7 +19,8 @@ std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) const { } // On platforms other than Linux the system memory is estimated. It must be adjusted before being reported -std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& memAdjuster) { +std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, + const TMemoryAdjuster& memAdjuster) { return memAdjuster(usage); } } diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc index 0b25eef8e1..76a3ca87a8 100644 --- a/lib/model/CSystemMemoryUsage_Linux.cc +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -21,7 +21,8 @@ std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) const { return core::CProcessStats::maxResidentSetSize(); } -std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& /*memAdjuster*/) { +std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, + const TMemoryAdjuster& /*memAdjuster*/) { // On Linux we use the actual system memory usage, rather tha an estimated value, so there is no need to adjust it. return usage; } From 4c8bf8bd63a675fc2cb9c09b8d651daad89e44c0 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 11 Jun 2025 17:11:41 +1200 Subject: [PATCH 19/22] Attend to code review comments * Roll back a previous change that returned system memory from CResourceManager::totalMemory * Remove unneeded code * Reworked platform dependent code to make the intent mor explicit --- include/model/CProcessMemoryUsage.h | 48 +++++++++++++++ include/model/CResourceMonitor.h | 5 +- include/model/CSystemMemoryUsage.h | 53 ----------------- lib/api/unittest/CAnomalyJobLimitTest.cc | 18 ------ lib/api/unittest/CJsonOutputWriterTest.cc | 54 ++++++++--------- .../unittest/CModelSnapshotJsonWriterTest.cc | 2 - lib/core/CProcessStats_Linux.cc | 1 - lib/core/CProcessStats_MacOSX.cc | 1 - lib/core/CProcessStats_Windows.cc | 1 - lib/core/unittest/CLoggerTest.cc | 2 +- lib/core/unittest/CNamedPipeFactoryTest.cc | 59 ++++++++++--------- lib/model/CMakeLists.txt | 2 +- ...mMemoryUsage.cc => CProcessMemoryUsage.cc} | 14 ++--- lib/model/CProcessMemoryUsage_Linux.cc | 21 +++++++ lib/model/CResourceMonitor.cc | 56 ++++++++++-------- lib/model/CSystemMemoryUsage_Linux.cc | 30 ---------- lib/model/unittest/CResourceMonitorTest.cc | 16 ----- 17 files changed, 165 insertions(+), 218 deletions(-) create mode 100644 include/model/CProcessMemoryUsage.h delete mode 100644 include/model/CSystemMemoryUsage.h rename lib/model/{CSystemMemoryUsage.cc => CProcessMemoryUsage.cc} (52%) create mode 100644 lib/model/CProcessMemoryUsage_Linux.cc delete mode 100644 lib/model/CSystemMemoryUsage_Linux.cc diff --git a/include/model/CProcessMemoryUsage.h b/include/model/CProcessMemoryUsage.h new file mode 100644 index 0000000000..6a449ebf84 --- /dev/null +++ b/include/model/CProcessMemoryUsage.h @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#ifndef INCLUDED_ml_model_CSystemMemoryUsage_h +#define INCLUDED_ml_model_CSystemMemoryUsage_h + +#include + +#include + +namespace ml { +namespace model { + +//! \brief Determines how to calculate the memory used by the current process. +//! +//! DESCRIPTION:\n +//! Determines how to calculate the memory used by the current process based on the operating system. +//! On some OS's (Mac, Windows) we use the estimated memory usage of the models, +//! while on others (Linux) we use the actual memory of the process as provided by system calls. +class MODEL_EXPORT CProcessMemoryUsage { +public: + enum class EMemoryStrategy { E_Estimated, E_System }; + +public: + CProcessMemoryUsage() = default; + ~CProcessMemoryUsage() = default; + + CProcessMemoryUsage(const CProcessMemoryUsage&) = delete; + CProcessMemoryUsage(CProcessMemoryUsage&&) = delete; + CProcessMemoryUsage& operator=(const CProcessMemoryUsage&) = delete; + CProcessMemoryUsage& operator=(CProcessMemoryUsage&&) = delete; + + //! Return the strategy used to calculate the memory used by the current process. + //! \return Strategy used to calculate the process memory based on the current OS. + static EMemoryStrategy memoryStrategy(); +}; +} +} + +#endif //INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 68758d4f09..411c3671e9 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,8 +54,6 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; - std::size_t s_SystemMemoryUsage{0}; - std::size_t s_MaxSystemMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -237,6 +235,9 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the amount by which reported memory usage is scaled depending on the type of persistence in use std::size_t persistenceMemoryIncreaseFactor() const; + //! Modify the supplied usage value depending on a platform dependent strategy. + std::size_t applyMemoryStrategy(std::size_t usage) const; + private: //! The registered collection of components TMonitoredResourcePtrSizeUMap m_Resources; diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h deleted file mode 100644 index 3d0b83bf40..0000000000 --- a/include/model/CSystemMemoryUsage.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#ifndef INCLUDED_ml_model_CSystemMemoryUsage_h -#define INCLUDED_ml_model_CSystemMemoryUsage_h - -#include - -#include - -namespace ml { -namespace model { - -//! \brief Determines the memory used by the current process. -//! -//! DESCRIPTION:\n -//! Determines the memory used by the current process based on the operating system. -class MODEL_EXPORT CSystemMemoryUsage { -public: - using TMemoryAdjuster = std::function; - -public: - CSystemMemoryUsage() = default; - ~CSystemMemoryUsage() = default; - - CSystemMemoryUsage(const CSystemMemoryUsage&) = delete; - CSystemMemoryUsage(CSystemMemoryUsage&&) = delete; - CSystemMemoryUsage& operator=(const CSystemMemoryUsage&) = delete; - CSystemMemoryUsage& operator=(CSystemMemoryUsage&&) = delete; - - //! Return the system memory used by the current process. - //! \param memSize An estimate of the process memory - //! \return The system memory usage based on the current OS. - std::size_t operator()(std::size_t memSize) const; - - //! Potentially adjust the provided memory usage value, depending on the current platform. - //! \param usage The memory usage of the current process. - //! \param memAdjuster Function to adjust the memory usage, if needed. - //! \return The (potentially adjusted) system memory usage. - static std::size_t maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& memAdjuster); -}; -} -} - -#endif //INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 20e422134f..e4cffde084 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -93,8 +93,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; - std::size_t nonLimitedMaxSystemUsage{0}; - std::size_t limitedMaxSystemUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -128,12 +126,9 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - nonLimitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); - LOG_DEBUG(<< "nonLimitedMaxSystemUsage: " << nonLimitedMaxSystemUsage); - BOOST_TEST_REQUIRE(nonLimitedMaxSystemUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -171,15 +166,11 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - limitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); - LOG_DEBUG(<< "Non-limited System Usage: " << nonLimitedMaxSystemUsage); - LOG_DEBUG(<< "Limited System Usage: " << limitedMaxSystemUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); - BOOST_TEST_REQUIRE(limitedMaxSystemUsage >= limitedUsage); } } @@ -384,8 +375,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); - LOG_DEBUG(<< "System memory usage bytes = " << used.s_SystemMemoryUsage); - LOG_DEBUG(<< "Max system memory usage bytes = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -395,7 +384,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -440,8 +428,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); - LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -451,7 +437,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -494,8 +479,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); - LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -503,7 +486,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 19caa4dfc1..16772158f8 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,23 +1728,21 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_SystemMemoryUsage = 5; - resourceUsage.s_MaxSystemMemoryUsage = 6; - resourceUsage.s_ByFields = 7; - resourceUsage.s_PartitionFields = 8; - resourceUsage.s_OverFields = 9; - resourceUsage.s_AllocationFailures = 10; + resourceUsage.s_ByFields = 5; + resourceUsage.s_PartitionFields = 6; + resourceUsage.s_OverFields = 7; + resourceUsage.s_AllocationFailures = 8; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisPeakModelBytes; - resourceUsage.s_BucketStartTime = 11; - resourceUsage.s_BytesExceeded = 12; - resourceUsage.s_BytesMemoryLimit = 13; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 14; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 17; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 18; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 19; + resourceUsage.s_BucketStartTime = 9; + resourceUsage.s_BytesExceeded = 10; + resourceUsage.s_BytesMemoryLimit = 11; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 12; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 13; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 14; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 17; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1773,17 +1771,17 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(5, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 8, sizeStats.at("total_partition_field_count").to_number()); + 6, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(9, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 10, sizeStats.at("bucket_allocation_failures_count").to_number()); + 8, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(11000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(9000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); @@ -1793,23 +1791,23 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(12, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(10, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 13, sizeStats.at("model_bytes_memory_limit").to_number()); + 11, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(14, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(12, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(15, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(13, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 16, sizeStats.at("frequent_category_count").to_number()); + 14, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(15, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(18, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(16, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(19, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index f2c22a41de..ff404b7f91 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,8 +36,6 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) - 409600, // System memory used (rss) - 413696, // Max system memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index e3635e279f..e5ab8cdfd0 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 61dc9fca66..2f1d02c2e2 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 933952e1f2..7c612270b3 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include diff --git a/lib/core/unittest/CLoggerTest.cc b/lib/core/unittest/CLoggerTest.cc index 1abec95da0..8667aa1430 100644 --- a/lib/core/unittest/CLoggerTest.cc +++ b/lib/core/unittest/CLoggerTest.cc @@ -62,7 +62,7 @@ std::function makeReader(std::ostringstream& loggedData) { return; } } - BOOST_FAIL("Failed to connect to logging pipe within a reasonable time"); + BOOST_TEST_CHECK(false, "Failed to connect to logging pipe within a reasonable time"); }; } diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index 39aef5e07e..446c978558 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -158,35 +158,36 @@ BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); } -BOOST_AUTO_TEST_CASE(testServerIsCWriter) { - ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; - BOOST_TEST_REQUIRE(threadReader.start()); - - std::atomic_bool dummy{false}; - ml::core::CNamedPipeFactory::TFileP file{ - ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; - BOOST_TEST_REQUIRE(file); - - std::size_t charsLeft{TEST_SIZE}; - std::size_t blockSize{7}; - while (charsLeft > 0) { - if (blockSize > charsLeft) { - blockSize = charsLeft; - } - BOOST_TEST_REQUIRE(std::fputs(std::string(blockSize, TEST_CHAR).c_str(), - file.get()) >= 0); - charsLeft -= blockSize; - } - - file.reset(); - - BOOST_TEST_REQUIRE(threadReader.waitForFinish()); - BOOST_TEST_REQUIRE(threadReader.attemptsTaken() <= MAX_ATTEMPTS); - BOOST_TEST_REQUIRE(threadReader.streamWentBad() == false); - - BOOST_REQUIRE_EQUAL(TEST_SIZE, threadReader.data().length()); - BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); -} +// BOOST_AUTO_TEST_CASE(testServerIsCWriter) { +// ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; +// BOOST_TEST_REQUIRE(threadReader.start()); +// +// std::atomic_bool dummy{false}; +// ml::core::CNamedPipeFactory::TFileP file{ +// ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; +// BOOST_TEST_REQUIRE(file); +// +// sleep(1); +// std::size_t charsLeft{TEST_SIZE}; +// std::size_t blockSize{7}; +// while (charsLeft > 0) { +// if (blockSize > charsLeft) { +// blockSize = charsLeft; +// } +// BOOST_TEST_REQUIRE(std::fputs(std::string(blockSize, TEST_CHAR).c_str(), +// file.get()) >= 0); +// charsLeft -= blockSize; +// } +// +// file.reset(); +// +// BOOST_TEST_REQUIRE(threadReader.waitForFinish()); +// BOOST_TEST_REQUIRE(threadReader.attemptsTaken() <= MAX_ATTEMPTS); +// BOOST_TEST_REQUIRE(threadReader.streamWentBad() == false); +// +// BOOST_REQUIRE_EQUAL(TEST_SIZE, threadReader.data().length()); +// BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); +// } BOOST_AUTO_TEST_CASE(testCancelBlock) { CThreadBlockCanceller cancellerThread{ml::core::CThread::currentThreadId()}; diff --git a/lib/model/CMakeLists.txt b/lib/model/CMakeLists.txt index 37c8c1cc5c..73dd74e4d0 100644 --- a/lib/model/CMakeLists.txt +++ b/lib/model/CMakeLists.txt @@ -75,7 +75,7 @@ ml_add_library(MlModel SHARED CSampleCounts.cc CSearchKey.cc CSimpleCountDetector.cc - CSystemMemoryUsage.cc + CProcessMemoryUsage.cc CTokenListCategory.cc CTokenListDataCategorizerBase.cc CTokenListReverseSearchCreator.cc diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CProcessMemoryUsage.cc similarity index 52% rename from lib/model/CSystemMemoryUsage.cc rename to lib/model/CProcessMemoryUsage.cc index ee86e22f44..37fb296345 100644 --- a/lib/model/CSystemMemoryUsage.cc +++ b/lib/model/CProcessMemoryUsage.cc @@ -9,19 +9,13 @@ * limitation. */ -#include +#include namespace ml { namespace model { -// On platforms other than Linux the system memory usage is that provided - the estimated size of the models. -std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) const { - return memSize; -} - -// On platforms other than Linux the system memory is estimated. It must be adjusted before being reported -std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, - const TMemoryAdjuster& memAdjuster) { - return memAdjuster(usage); +// On platforms other than Linux the process memory usage is the estimated size of the models. +CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::memoryStrategy() { + return EMemoryStrategy::E_Estimated; } } } diff --git a/lib/model/CProcessMemoryUsage_Linux.cc b/lib/model/CProcessMemoryUsage_Linux.cc new file mode 100644 index 0000000000..8c5306e069 --- /dev/null +++ b/lib/model/CProcessMemoryUsage_Linux.cc @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +namespace ml { +namespace model { +// On Linux the process memory usage is determined by the OS. +CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::memoryStrategy() { + return EMemoryStrategy::E_System; +} +} +} diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index fff68a0014..6529fa4656 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include @@ -380,13 +380,11 @@ CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); + res.s_AdjustedUsage = this->applyMemoryStrategy(res.s_Usage); res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); - res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); - res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); - res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; + res.s_AdjustedPeakUsage = this->applyMemoryStrategy(res.s_PeakUsage); + res.s_BytesMemoryLimit = this->getBytesMemoryLimit(); res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; std::uint64_t assignmentMemoryBasis{ @@ -404,24 +402,33 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { return res; } -std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const { - const std::size_t persistenceMemoryIncreaseFactor = - this->persistenceMemoryIncreaseFactor(); +std::size_t CResourceMonitor::applyMemoryStrategy(std::size_t usage) const { + std::size_t modifiedUsage{0}; + switch (CProcessMemoryUsage::memoryStrategy()) { + case CProcessMemoryUsage::EMemoryStrategy::E_Estimated: { + modifiedUsage = this->adjustedUsage(usage); + break; + } + case CProcessMemoryUsage::EMemoryStrategy::E_System: { + modifiedUsage = core::CProcessStats::maxResidentSetSize(); + break; + } + default: { LOG_WARN(<< "Unknown memory strategy"); } + } + return modifiedUsage; +} - return CSystemMemoryUsage::maybeAdjustUsage(usage, [ - &byteLimitMargin = m_ByteLimitMargin, &persistenceMemoryIncreaseFactor - ](std::size_t usage_) { - // On platforms that estimate the memory usage, it is scaled by the inverse of the byte limit margin. - // This gives the user a fairer indication of how close the job is to hitting - // the model memory limit in a concise manner (as the limit is scaled down by - // the margin during the beginning period of the job's existence). - std::size_t adjustedUsage{ - static_cast(static_cast(usage_) / byteLimitMargin)}; +std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const { + // We scale the reported memory usage by the inverse of the byte limit margin. + // This gives the user a fairer indication of how close the job is to hitting + // the model memory limit in a concise manner (as the limit is scaled down by + // the margin during the beginning period of the job's existence). + std::size_t adjustedUsage{ + static_cast(static_cast(usage) / m_ByteLimitMargin)}; - adjustedUsage *= persistenceMemoryIncreaseFactor; + adjustedUsage *= this->persistenceMemoryIncreaseFactor(); - return adjustedUsage; - }); + return adjustedUsage; } std::size_t CResourceMonitor::persistenceMemoryIncreaseFactor() const { @@ -497,10 +504,9 @@ std::size_t CResourceMonitor::lowLimit() const { } std::size_t CResourceMonitor::totalMemory() const { - CSystemMemoryUsage systemMemoryUsage; - return systemMemoryUsage(m_MonitoredResourceCurrentMemory + m_ExtraMemory + - static_cast(core::CProgramCounters::counter( - counter_t::E_TSADOutputMemoryAllocatorUsage))); + return m_MonitoredResourceCurrentMemory + m_ExtraMemory + + static_cast(core::CProgramCounters::counter( + counter_t::E_TSADOutputMemoryAllocatorUsage)); } std::size_t CResourceMonitor::systemMemory() { diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc deleted file mode 100644 index 76a3ca87a8..0000000000 --- a/lib/model/CSystemMemoryUsage_Linux.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#include - -#include - -namespace ml { -namespace model { -// On Linux the system memory usage is actually that determined by the OS. -// The estimated value provided is ignored. -std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) const { - return core::CProcessStats::maxResidentSetSize(); -} - -std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, - const TMemoryAdjuster& /*memAdjuster*/) { - // On Linux we use the actual system memory usage, rather tha an estimated value, so there is no need to adjust it. - return usage; -} -} -} diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index c17c644c80..b11c79aca5 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -549,10 +549,6 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_SystemMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_SystemMemoryUsage); - monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -560,10 +556,6 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); - monitor.addExtraMemory(-50); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -571,20 +563,12 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); - monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); monitor.sendMemoryUsageReport(0, 1); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); - - BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From fa9c4fab8e46f12fa291bf4588687da9ae28e4fa Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 12 Jun 2025 14:46:20 +1200 Subject: [PATCH 20/22] Small tidy up of CProcessMemoryUsage.cc * Replace static method with static const value. --- include/model/CProcessMemoryUsage.h | 14 +++----------- lib/model/CProcessMemoryUsage.cc | 4 +--- lib/model/CProcessMemoryUsage_Linux.cc | 5 ++--- lib/model/CResourceMonitor.cc | 2 +- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/include/model/CProcessMemoryUsage.h b/include/model/CProcessMemoryUsage.h index 6a449ebf84..8ea1a2057c 100644 --- a/include/model/CProcessMemoryUsage.h +++ b/include/model/CProcessMemoryUsage.h @@ -29,18 +29,10 @@ class MODEL_EXPORT CProcessMemoryUsage { public: enum class EMemoryStrategy { E_Estimated, E_System }; -public: - CProcessMemoryUsage() = default; - ~CProcessMemoryUsage() = default; - - CProcessMemoryUsage(const CProcessMemoryUsage&) = delete; - CProcessMemoryUsage(CProcessMemoryUsage&&) = delete; - CProcessMemoryUsage& operator=(const CProcessMemoryUsage&) = delete; - CProcessMemoryUsage& operator=(CProcessMemoryUsage&&) = delete; + static const EMemoryStrategy MEMORY_STRATEGY; - //! Return the strategy used to calculate the memory used by the current process. - //! \return Strategy used to calculate the process memory based on the current OS. - static EMemoryStrategy memoryStrategy(); +public: + CProcessMemoryUsage() = delete; }; } } diff --git a/lib/model/CProcessMemoryUsage.cc b/lib/model/CProcessMemoryUsage.cc index 37fb296345..6fafd92323 100644 --- a/lib/model/CProcessMemoryUsage.cc +++ b/lib/model/CProcessMemoryUsage.cc @@ -14,8 +14,6 @@ namespace ml { namespace model { // On platforms other than Linux the process memory usage is the estimated size of the models. -CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::memoryStrategy() { - return EMemoryStrategy::E_Estimated; -} +const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{EMemoryStrategy::E_Estimated}; } } diff --git a/lib/model/CProcessMemoryUsage_Linux.cc b/lib/model/CProcessMemoryUsage_Linux.cc index 8c5306e069..95ef5af712 100644 --- a/lib/model/CProcessMemoryUsage_Linux.cc +++ b/lib/model/CProcessMemoryUsage_Linux.cc @@ -13,9 +13,8 @@ namespace ml { namespace model { + // On Linux the process memory usage is determined by the OS. -CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::memoryStrategy() { - return EMemoryStrategy::E_System; -} +const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{EMemoryStrategy::E_System}; } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 6529fa4656..205fd76fa7 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -404,7 +404,7 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { std::size_t CResourceMonitor::applyMemoryStrategy(std::size_t usage) const { std::size_t modifiedUsage{0}; - switch (CProcessMemoryUsage::memoryStrategy()) { + switch (CProcessMemoryUsage::MEMORY_STRATEGY) { case CProcessMemoryUsage::EMemoryStrategy::E_Estimated: { modifiedUsage = this->adjustedUsage(usage); break; From 014be1ccb22d59995e7054c54c3987f6331cdc95 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 13 Jun 2025 10:44:11 +1200 Subject: [PATCH 21/22] Small tidy up * uncomment test code * formatting --- lib/core/unittest/CNamedPipeFactoryTest.cc | 60 +++++++++++----------- lib/model/CProcessMemoryUsage.cc | 3 +- lib/model/CProcessMemoryUsage_Linux.cc | 3 +- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index 446c978558..6ad24c5829 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -158,36 +158,36 @@ BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); } -// BOOST_AUTO_TEST_CASE(testServerIsCWriter) { -// ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; -// BOOST_TEST_REQUIRE(threadReader.start()); -// -// std::atomic_bool dummy{false}; -// ml::core::CNamedPipeFactory::TFileP file{ -// ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; -// BOOST_TEST_REQUIRE(file); -// -// sleep(1); -// std::size_t charsLeft{TEST_SIZE}; -// std::size_t blockSize{7}; -// while (charsLeft > 0) { -// if (blockSize > charsLeft) { -// blockSize = charsLeft; -// } -// BOOST_TEST_REQUIRE(std::fputs(std::string(blockSize, TEST_CHAR).c_str(), -// file.get()) >= 0); -// charsLeft -= blockSize; -// } -// -// file.reset(); -// -// BOOST_TEST_REQUIRE(threadReader.waitForFinish()); -// BOOST_TEST_REQUIRE(threadReader.attemptsTaken() <= MAX_ATTEMPTS); -// BOOST_TEST_REQUIRE(threadReader.streamWentBad() == false); -// -// BOOST_REQUIRE_EQUAL(TEST_SIZE, threadReader.data().length()); -// BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); -// } +BOOST_AUTO_TEST_CASE(testServerIsCWriter) { + ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; + BOOST_TEST_REQUIRE(threadReader.start()); + + std::atomic_bool dummy{false}; + ml::core::CNamedPipeFactory::TFileP file{ + ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; + BOOST_TEST_REQUIRE(file); + + sleep(1); + std::size_t charsLeft{TEST_SIZE}; + std::size_t blockSize{7}; + while (charsLeft > 0) { + if (blockSize > charsLeft) { + blockSize = charsLeft; + } + BOOST_TEST_REQUIRE(std::fputs(std::string(blockSize, TEST_CHAR).c_str(), + file.get()) >= 0); + charsLeft -= blockSize; + } + + file.reset(); + + BOOST_TEST_REQUIRE(threadReader.waitForFinish()); + BOOST_TEST_REQUIRE(threadReader.attemptsTaken() <= MAX_ATTEMPTS); + BOOST_TEST_REQUIRE(threadReader.streamWentBad() == false); + + BOOST_REQUIRE_EQUAL(TEST_SIZE, threadReader.data().length()); + BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); +} BOOST_AUTO_TEST_CASE(testCancelBlock) { CThreadBlockCanceller cancellerThread{ml::core::CThread::currentThreadId()}; diff --git a/lib/model/CProcessMemoryUsage.cc b/lib/model/CProcessMemoryUsage.cc index 6fafd92323..176a9825d2 100644 --- a/lib/model/CProcessMemoryUsage.cc +++ b/lib/model/CProcessMemoryUsage.cc @@ -14,6 +14,7 @@ namespace ml { namespace model { // On platforms other than Linux the process memory usage is the estimated size of the models. -const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{EMemoryStrategy::E_Estimated}; +const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{ + EMemoryStrategy::E_Estimated}; } } diff --git a/lib/model/CProcessMemoryUsage_Linux.cc b/lib/model/CProcessMemoryUsage_Linux.cc index 95ef5af712..ff3a52151b 100644 --- a/lib/model/CProcessMemoryUsage_Linux.cc +++ b/lib/model/CProcessMemoryUsage_Linux.cc @@ -15,6 +15,7 @@ namespace ml { namespace model { // On Linux the process memory usage is determined by the OS. -const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{EMemoryStrategy::E_System}; +const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{ + EMemoryStrategy::E_System}; } } From 41c02defea5209dc5ef6e387308665785291bdf7 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 17 Jun 2025 15:43:30 +1200 Subject: [PATCH 22/22] On Linux, return system memory (max resident set size) from CResourceMonitor.totalMemory() While this does seem the correct thing to do, it does lead to a large number of broken unit test cases on Linux. These test failures fall largely into two groups * Those that fail due to the assumption that totalMemory returns an estimate of model memory, and as such totalMemory can go up and down in the space of a test. Compare this system memory, which by its nature will never decrease over the lifetime of a process. * And those that fail, not due to any inherent assumptions of there own, but due to the effects of other tests run prior in the process. These prior run tests increase the system memory value such that memory resource limits are now hit. In other words the test cases are now not truly independent of one another. Commiting this change now for full visibility and for a point of further discussion. --- lib/model/CResourceMonitor.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 205fd76fa7..83ec624b39 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -380,10 +380,10 @@ CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = this->applyMemoryStrategy(res.s_Usage); + res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = this->applyMemoryStrategy(res.s_PeakUsage); + res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); res.s_BytesMemoryLimit = this->getBytesMemoryLimit(); res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -406,7 +406,7 @@ std::size_t CResourceMonitor::applyMemoryStrategy(std::size_t usage) const { std::size_t modifiedUsage{0}; switch (CProcessMemoryUsage::MEMORY_STRATEGY) { case CProcessMemoryUsage::EMemoryStrategy::E_Estimated: { - modifiedUsage = this->adjustedUsage(usage); + modifiedUsage = usage; break; } case CProcessMemoryUsage::EMemoryStrategy::E_System: { @@ -504,9 +504,9 @@ std::size_t CResourceMonitor::lowLimit() const { } std::size_t CResourceMonitor::totalMemory() const { - return m_MonitoredResourceCurrentMemory + m_ExtraMemory + - static_cast(core::CProgramCounters::counter( - counter_t::E_TSADOutputMemoryAllocatorUsage)); + return this->applyMemoryStrategy(m_MonitoredResourceCurrentMemory + m_ExtraMemory + + static_cast(core::CProgramCounters::counter( + counter_t::E_TSADOutputMemoryAllocatorUsage))); } std::size_t CResourceMonitor::systemMemory() {