@@ -111,6 +111,7 @@ std::mutex s_task_signatures_mtx;
111111std::unordered_map<TTaskType::type, std::unordered_set<int64_t >> s_task_signatures;
112112
113113std::atomic_ulong s_report_version (time(nullptr ) * 100000);
114+ std::atomic<int64_t > s_tablet_report_failure_start_time (0 );
114115
115116void increase_report_version () {
116117 s_report_version.fetch_add (1 , std::memory_order_relaxed);
@@ -1181,13 +1182,20 @@ void report_tablet_callback(StorageEngine& engine, const ClusterInfo* cluster_in
11811182 }
11821183 }
11831184
1184- if (report_version < s_report_version) {
1185+ if (report_version < s_report_version || UNLIKELY (config::enable_debug_points &&
1186+ DebugPoints::instance ()->is_enable (" WorkPoolReportTablet.report_tablet_callback.skip" ))) {
11851187 // TODO llj This can only reduce the possibility for report error, but can't avoid it.
11861188 // If FE create a tablet in FE meta and send CREATE task to this BE, the tablet may not be included in this
11871189 // report, and the report version has a small probability that it has not been updated in time. When FE
11881190 // receives this report, it is possible to delete the new tablet.
11891191 LOG (WARNING) << " report version " << report_version << " change to " << s_report_version;
11901192 DorisMetrics::instance ()->report_all_tablets_requests_skip ->increment (1 );
1193+ int64_t current_time = time (nullptr );
1194+ if (s_tablet_report_failure_start_time.load () == 0 ) {
1195+ s_tablet_report_failure_start_time.store (current_time);
1196+ }
1197+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (
1198+ current_time - s_tablet_report_failure_start_time);
11911199 return ;
11921200 }
11931201
@@ -1227,6 +1235,15 @@ void report_tablet_callback(StorageEngine& engine, const ClusterInfo* cluster_in
12271235 report_tablet_total << 1 ;
12281236 if (!succ) [[unlikely]] {
12291237 report_tablet_failed << 1 ;
1238+ int64_t current_time = time (nullptr );
1239+ if (s_tablet_report_failure_start_time.load () == 0 ) {
1240+ s_tablet_report_failure_start_time.store (current_time);
1241+ }
1242+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (
1243+ current_time - s_tablet_report_failure_start_time);
1244+ } else {
1245+ s_tablet_report_failure_start_time.store (0 );
1246+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (0 );
12301247 }
12311248}
12321249
@@ -1254,9 +1271,16 @@ void report_tablet_callback(CloudStorageEngine& engine, const ClusterInfo* clust
12541271 }
12551272 }
12561273
1257- if (report_version < s_report_version) {
1274+ if (report_version < s_report_version || UNLIKELY (config::enable_debug_points &&
1275+ DebugPoints::instance ()->is_enable (" WorkPoolCloudReportTablet.report_tablet_callback.skip" ))) {
12581276 LOG (WARNING) << " report version " << report_version << " change to " << s_report_version;
12591277 DorisMetrics::instance ()->report_all_tablets_requests_skip ->increment (1 );
1278+ int64_t current_time = time (nullptr );
1279+ if (s_tablet_report_failure_start_time.load () == 0 ) {
1280+ s_tablet_report_failure_start_time.store (current_time);
1281+ }
1282+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (
1283+ current_time - s_tablet_report_failure_start_time);
12601284 return ;
12611285 }
12621286
@@ -1267,6 +1291,15 @@ void report_tablet_callback(CloudStorageEngine& engine, const ClusterInfo* clust
12671291 report_tablet_total << 1 ;
12681292 if (!succ) [[unlikely]] {
12691293 report_tablet_failed << 1 ;
1294+ int64_t current_time = time (nullptr );
1295+ if (s_tablet_report_failure_start_time.load () == 0 ) {
1296+ s_tablet_report_failure_start_time.store (current_time);
1297+ }
1298+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (
1299+ current_time - s_tablet_report_failure_start_time);
1300+ } else {
1301+ s_tablet_report_failure_start_time.store (0 );
1302+ DorisMetrics::instance ()->tablet_report_continuous_failure_duration_s ->set_value (0 );
12701303 }
12711304}
12721305
0 commit comments