@@ -82,6 +82,7 @@ type BlocksCleaner struct {
82
82
remainingPlannedCompactions * prometheus.GaugeVec
83
83
inProgressCompactions * prometheus.GaugeVec
84
84
oldestPartitionGroupOffset * prometheus.GaugeVec
85
+ enqueueJobFailed * prometheus.CounterVec
85
86
}
86
87
87
88
func NewBlocksCleaner (
@@ -186,6 +187,10 @@ func NewBlocksCleaner(
186
187
remainingPlannedCompactions : remainingPlannedCompactions ,
187
188
inProgressCompactions : inProgressCompactions ,
188
189
oldestPartitionGroupOffset : oldestPartitionGroupOffset ,
190
+ enqueueJobFailed : promauto .With (reg ).NewCounterVec (prometheus.CounterOpts {
191
+ Name : "cortex_compactor_enqueue_cleaner_job_failed_total" ,
192
+ Help : "Total number of cleaner jobs failed to be enqueued." ,
193
+ }, []string {"user_status" }),
189
194
}
190
195
191
196
c .Service = services .NewBasicService (c .starting , c .loop , nil )
@@ -243,13 +248,25 @@ func (c *BlocksCleaner) loop(ctx context.Context) error {
243
248
continue
244
249
}
245
250
cleanJobTimestamp := time .Now ().Unix ()
246
- usersChan <- & cleanerJob {
251
+
252
+ select {
253
+ case usersChan <- & cleanerJob {
247
254
users : activeUsers ,
248
255
timestamp : cleanJobTimestamp ,
256
+ }:
257
+ default :
258
+ level .Warn (c .logger ).Log ("msg" , "unable to push cleaning job to usersChan" )
259
+ c .enqueueJobFailed .WithLabelValues (activeStatus ).Inc ()
249
260
}
250
- deleteChan <- & cleanerJob {
261
+
262
+ select {
263
+ case deleteChan <- & cleanerJob {
251
264
users : deletedUsers ,
252
265
timestamp : cleanJobTimestamp ,
266
+ }:
267
+ default :
268
+ level .Warn (c .logger ).Log ("msg" , "unable to push deletion job to deleteChan" )
269
+ c .enqueueJobFailed .WithLabelValues (deletedStatus ).Inc ()
253
270
}
254
271
255
272
case <- ctx .Done ():
@@ -392,10 +409,18 @@ func (c *BlocksCleaner) obtainVisitMarkerManager(ctx context.Context, userLogger
392
409
}
393
410
394
411
// Remove blocks and remaining data for tenant marked for deletion.
395
- func (c * BlocksCleaner ) deleteUserMarkedForDeletion (ctx context.Context , userLogger log.Logger , userBucket objstore.InstrumentedBucket , userID string ) error {
396
-
412
+ func (c * BlocksCleaner ) deleteUserMarkedForDeletion (ctx context.Context , userLogger log.Logger , userBucket objstore.InstrumentedBucket , userID string ) ( returnErr error ) {
413
+ startTime := time . Now ()
397
414
level .Info (userLogger ).Log ("msg" , "deleting blocks for tenant marked for deletion" )
415
+ defer func () {
416
+ if returnErr != nil {
417
+ level .Warn (userLogger ).Log ("msg" , "failed deleting tenant marked for deletion" , "err" , returnErr )
418
+ } else {
419
+ level .Info (userLogger ).Log ("msg" , "completed deleting tenant marked for deletion" , "duration" , time .Since (startTime ), "duration_ms" , time .Since (startTime ).Milliseconds ())
420
+ }
421
+ }()
398
422
423
+ begin := time .Now ()
399
424
// We immediately delete the bucket index, to signal to its consumers that
400
425
// the tenant has "no blocks" in the storage.
401
426
if err := bucketindex .DeleteIndex (ctx , c .bucketClient , userID , c .cfgProvider ); err != nil {
@@ -465,6 +490,7 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userLog
465
490
if deletedBlocks .Load () > 0 {
466
491
level .Info (userLogger ).Log ("msg" , "deleted blocks for tenant marked for deletion" , "deletedBlocks" , deletedBlocks .Load ())
467
492
}
493
+ level .Info (userLogger ).Log ("msg" , "completed deleting blocks for tenant marked for deletion" , "duration" , time .Since (begin ), "duration_ms" , time .Since (begin ).Milliseconds ())
468
494
469
495
mark , err := cortex_tsdb .ReadTenantDeletionMark (ctx , c .bucketClient , userID )
470
496
if err != nil {
@@ -491,10 +517,11 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userLog
491
517
return err
492
518
}
493
519
494
- if deleted , err := bucket .DeletePrefix (ctx , userBucket , bucketindex .MarkersPathname , userLogger ); err != nil {
520
+ begin = time .Now ()
521
+ if deleted , err := bucket .DeletePrefix (ctx , userBucket , bucketindex .MarkersPathname , userLogger , defaultDeleteBlocksConcurrency ); err != nil {
495
522
return errors .Wrap (err , "failed to delete marker files" )
496
523
} else if deleted > 0 {
497
- level .Info (userLogger ).Log ("msg" , "deleted marker files for tenant marked for deletion" , "count" , deleted )
524
+ level .Info (userLogger ).Log ("msg" , "deleted marker files for tenant marked for deletion" , "count" , deleted , "duration" , time . Since ( begin ), "duration_ms" , time . Since ( begin ). Milliseconds () )
498
525
}
499
526
if err := cortex_tsdb .DeleteTenantDeletionMark (ctx , c .bucketClient , userID ); err != nil {
500
527
return errors .Wrap (err , "failed to delete tenant deletion mark" )
@@ -503,18 +530,20 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userLog
503
530
}
504
531
505
532
func (c * BlocksCleaner ) deleteNonDataFiles (ctx context.Context , userLogger log.Logger , userBucket objstore.InstrumentedBucket ) error {
506
- if deleted , err := bucket .DeletePrefix (ctx , userBucket , block .DebugMetas , userLogger ); err != nil {
533
+ begin := time .Now ()
534
+ if deleted , err := bucket .DeletePrefix (ctx , userBucket , block .DebugMetas , userLogger , defaultDeleteBlocksConcurrency ); err != nil {
507
535
return errors .Wrap (err , "failed to delete " + block .DebugMetas )
508
536
} else if deleted > 0 {
509
- level .Info (userLogger ).Log ("msg" , "deleted files under " + block .DebugMetas + " for tenant marked for deletion" , "count" , deleted )
537
+ level .Info (userLogger ).Log ("msg" , "deleted files under " + block .DebugMetas + " for tenant marked for deletion" , "count" , deleted , "duration" , time . Since ( begin ), "duration_ms" , time . Since ( begin ). Milliseconds () )
510
538
}
511
539
512
540
if c .cfg .CompactionStrategy == util .CompactionStrategyPartitioning {
541
+ begin = time .Now ()
513
542
// Clean up partitioned group info files
514
- if deleted , err := bucket .DeletePrefix (ctx , userBucket , PartitionedGroupDirectory , userLogger ); err != nil {
543
+ if deleted , err := bucket .DeletePrefix (ctx , userBucket , PartitionedGroupDirectory , userLogger , defaultDeleteBlocksConcurrency ); err != nil {
515
544
return errors .Wrap (err , "failed to delete " + PartitionedGroupDirectory )
516
545
} else if deleted > 0 {
517
- level .Info (userLogger ).Log ("msg" , "deleted files under " + PartitionedGroupDirectory + " for tenant marked for deletion" , "count" , deleted )
546
+ level .Info (userLogger ).Log ("msg" , "deleted files under " + PartitionedGroupDirectory + " for tenant marked for deletion" , "count" , deleted , "duration" , time . Since ( begin ), "duration_ms" , time . Since ( begin ). Milliseconds () )
518
547
}
519
548
}
520
549
return nil
@@ -531,7 +560,7 @@ func (c *BlocksCleaner) cleanUser(ctx context.Context, userLogger log.Logger, us
531
560
if returnErr != nil {
532
561
level .Warn (userLogger ).Log ("msg" , "failed blocks cleanup and maintenance" , "err" , returnErr )
533
562
} else {
534
- level .Info (userLogger ).Log ("msg" , "completed blocks cleanup and maintenance" , "duration" , time .Since (startTime ))
563
+ level .Info (userLogger ).Log ("msg" , "completed blocks cleanup and maintenance" , "duration" , time .Since (startTime ), "duration_ms" , time . Since ( startTime ). Milliseconds () )
535
564
}
536
565
c .tenantCleanDuration .WithLabelValues (userID ).Set (time .Since (startTime ).Seconds ())
537
566
}()
@@ -771,7 +800,7 @@ func (c *BlocksCleaner) cleanPartitionedGroupInfo(ctx context.Context, userBucke
771
800
772
801
if extraInfo .status .CanDelete || extraInfo .status .DeleteVisitMarker {
773
802
// Remove partition visit markers
774
- if _ , err := bucket .DeletePrefix (ctx , userBucket , GetPartitionVisitMarkerDirectoryPath (partitionedGroupInfo .PartitionedGroupID ), userLogger ); err != nil {
803
+ if _ , err := bucket .DeletePrefix (ctx , userBucket , GetPartitionVisitMarkerDirectoryPath (partitionedGroupInfo .PartitionedGroupID ), userLogger , defaultDeleteBlocksConcurrency ); err != nil {
775
804
level .Warn (userLogger ).Log ("msg" , "failed to delete partition visit markers for partitioned group" , "partitioned_group_info" , partitionedGroupInfoFile , "err" , err )
776
805
} else {
777
806
level .Info (userLogger ).Log ("msg" , "deleted partition visit markers for partitioned group" , "partitioned_group_info" , partitionedGroupInfoFile )
0 commit comments