@@ -111,6 +111,7 @@ std::mutex s_task_signatures_mtx;
111111std::unordered_map<TTaskType::type, std::unordered_set<int64_t >> s_task_signatures;
112112
113113std::atomic_ulong s_report_version (time(nullptr ) * 100000);
114+ std::atomic<int64_t > s_tablet_report_failure_start_time (0 );
114115
115116void increase_report_version () {
116117 s_report_version.fetch_add (1 , std::memory_order_relaxed);
@@ -1181,13 +1182,21 @@ void report_tablet_callback(StorageEngine& engine, const ClusterInfo* cluster_in
11811182 }
11821183 }
11831184
1184- if (report_version < s_report_version) {
1185+ if (report_version < s_report_version ||
1186+ UNLIKELY (config::enable_debug_points &&
1187+ DebugPoints::instance ()->is_enable (
1188+ " WorkPoolReportTablet.report_tablet_callback.skip" ))) {
11851189 // TODO llj This can only reduce the possibility for report error, but can't avoid it.
11861190 // If FE create a tablet in FE meta and send CREATE task to this BE, the tablet may not be included in this
11871191 // report, and the report version has a small probability that it has not been updated in time. When FE
11881192 // receives this report, it is possible to delete the new tablet.
11891193 LOG (WARNING) << " report version " << report_version << " change to " << s_report_version;
11901194 DorisMetrics::instance ()->report_all_tablets_requests_skip ->increment (1 );
1195+ int64_t expected = 0 ;
1196+ int64_t current_time = time (nullptr );
1197+ s_tablet_report_failure_start_time.compare_exchange_strong (expected, current_time);
1198+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (
1199+ current_time - s_tablet_report_failure_start_time);
11911200 return ;
11921201 }
11931202
@@ -1227,6 +1236,14 @@ void report_tablet_callback(StorageEngine& engine, const ClusterInfo* cluster_in
12271236 report_tablet_total << 1 ;
12281237 if (!succ) [[unlikely]] {
12291238 report_tablet_failed << 1 ;
1239+ int64_t expected = 0 ;
1240+ int64_t current_time = time (nullptr );
1241+ s_tablet_report_failure_start_time.compare_exchange_strong (expected, current_time);
1242+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (
1243+ current_time - s_tablet_report_failure_start_time);
1244+ } else {
1245+ s_tablet_report_failure_start_time.store (0 );
1246+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (0 );
12301247 }
12311248}
12321249
@@ -1254,9 +1271,17 @@ void report_tablet_callback(CloudStorageEngine& engine, const ClusterInfo* clust
12541271 }
12551272 }
12561273
1257- if (report_version < s_report_version) {
1274+ if (report_version < s_report_version ||
1275+ UNLIKELY (config::enable_debug_points &&
1276+ DebugPoints::instance ()->is_enable (
1277+ " WorkPoolCloudReportTablet.report_tablet_callback.skip" ))) {
12581278 LOG (WARNING) << " report version " << report_version << " change to " << s_report_version;
12591279 DorisMetrics::instance ()->report_all_tablets_requests_skip ->increment (1 );
1280+ int64_t expected = 0 ;
1281+ int64_t current_time = time (nullptr );
1282+ s_tablet_report_failure_start_time.compare_exchange_strong (expected, current_time);
1283+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (
1284+ current_time - s_tablet_report_failure_start_time);
12601285 return ;
12611286 }
12621287
@@ -1267,6 +1292,14 @@ void report_tablet_callback(CloudStorageEngine& engine, const ClusterInfo* clust
12671292 report_tablet_total << 1 ;
12681293 if (!succ) [[unlikely]] {
12691294 report_tablet_failed << 1 ;
1295+ int64_t expected = 0 ;
1296+ int64_t current_time = time (nullptr );
1297+ s_tablet_report_failure_start_time.compare_exchange_strong (expected, current_time);
1298+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (
1299+ current_time - s_tablet_report_failure_start_time);
1300+ } else {
1301+ s_tablet_report_failure_start_time.store (0 );
1302+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (0 );
12701303 }
12711304}
12721305
0 commit comments