@@ -88,6 +88,7 @@ use std::fmt::Display;
88
88
use std:: intrinsics:: unlikely;
89
89
use std:: path:: Path ;
90
90
use std:: sync:: Arc ;
91
+ use std:: sync:: atomic:: { AtomicU64 , Ordering } ;
91
92
use std:: time:: { Duration , Instant } ;
92
93
use std:: { fs, process} ;
93
94
@@ -117,6 +118,7 @@ bitflags::bitflags! {
117
118
118
119
const DEFAULT = Self :: GENERIC_ACTIVITIES . bits( ) |
119
120
Self :: QUERY_PROVIDERS . bits( ) |
121
+ Self :: QUERY_CACHE_HITS . bits( ) |
120
122
Self :: QUERY_BLOCKED . bits( ) |
121
123
Self :: INCR_CACHE_LOADS . bits( ) |
122
124
Self :: INCR_RESULT_HASHING . bits( ) |
@@ -145,6 +147,7 @@ const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
145
147
] ;
146
148
147
149
/// Something that uniquely identifies a query invocation.
150
+ #[ derive( PartialEq , Eq , Hash ) ]
148
151
pub struct QueryInvocationId ( pub u32 ) ;
149
152
150
153
/// Which format to use for `-Z time-passes`
@@ -411,10 +414,7 @@ impl SelfProfilerRef {
411
414
#[ inline( never) ]
412
415
#[ cold]
413
416
fn cold_call ( profiler_ref : & SelfProfilerRef , query_invocation_id : QueryInvocationId ) {
414
- profiler_ref. instant_query_event (
415
- |profiler| profiler. query_cache_hit_event_kind ,
416
- query_invocation_id,
417
- ) ;
417
+ profiler_ref. profiler . as_ref ( ) . unwrap ( ) . increment_query_cache_hit ( query_invocation_id) ;
418
418
}
419
419
420
420
if unlikely ( self . event_filter_mask . contains ( EventFilter :: QUERY_CACHE_HITS ) ) {
@@ -459,22 +459,6 @@ impl SelfProfilerRef {
459
459
} )
460
460
}
461
461
462
- #[ inline( always) ]
463
- fn instant_query_event (
464
- & self ,
465
- event_kind : fn ( & SelfProfiler ) -> StringId ,
466
- query_invocation_id : QueryInvocationId ,
467
- ) {
468
- let event_id = StringId :: new_virtual ( query_invocation_id. 0 ) ;
469
- let thread_id = get_thread_id ( ) ;
470
- let profiler = self . profiler . as_ref ( ) . unwrap ( ) ;
471
- profiler. profiler . record_instant_event (
472
- event_kind ( profiler) ,
473
- EventId :: from_virtual ( event_id) ,
474
- thread_id,
475
- ) ;
476
- }
477
-
478
462
pub fn with_profiler ( & self , f : impl FnOnce ( & SelfProfiler ) ) {
479
463
if let Some ( profiler) = & self . profiler {
480
464
f ( profiler)
@@ -489,6 +473,30 @@ impl SelfProfilerRef {
489
473
self . profiler . as_ref ( ) . map ( |p| p. get_or_alloc_cached_string ( s) )
490
474
}
491
475
476
+ /// Store query cache hits to the self-profile log.
477
+ /// Should be called once at the end of the compilation session.
478
+ ///
479
+ /// The cache hits are stored per **query invocation**, not **per query kind/type**.
480
+ /// `analyzeme` can later deduplicate individual query labels from the QueryInvocationId event
481
+ /// IDs.
482
+ pub fn store_query_cache_hits ( & self ) {
483
+ if self . event_filter_mask . contains ( EventFilter :: QUERY_CACHE_HITS ) {
484
+ let profiler = self . profiler . as_ref ( ) . unwrap ( ) ;
485
+ let query_hits = profiler. query_hits . read ( ) ;
486
+ let builder = EventIdBuilder :: new ( & profiler. profiler ) ;
487
+ let thread_id = get_thread_id ( ) ;
488
+ for ( query_invocation, hit_count) in query_hits. iter ( ) {
489
+ let event_id = builder. from_label ( StringId :: new_virtual ( query_invocation. 0 ) ) ;
490
+ profiler. profiler . record_integer_event (
491
+ profiler. query_cache_hit_count_event_kind ,
492
+ event_id,
493
+ thread_id,
494
+ hit_count. load ( Ordering :: Relaxed ) ,
495
+ ) ;
496
+ }
497
+ }
498
+ }
499
+
492
500
#[ inline]
493
501
pub fn enabled ( & self ) -> bool {
494
502
self . profiler . is_some ( )
@@ -537,13 +545,24 @@ pub struct SelfProfiler {
537
545
538
546
string_cache : RwLock < FxHashMap < String , StringId > > ,
539
547
548
+ /// Recording individual query cache hits as "instant" measureme events
549
+ /// is incredibly expensive. Instead of doing that, we simply aggregate
550
+ /// cache hit *counts* per query invocation, and then store the final count
551
+ /// of cache hits per invocation at the end of the compilation session.
552
+ ///
553
+ /// With this approach, we don't know the individual thread IDs and timestamps
554
+ /// of cache hits, but it has very little overhead on top of `-Zself-profile`.
555
+ /// Recording the cache hits as individual events made compilation 3-5x slower.
556
+ query_hits : RwLock < FxHashMap < QueryInvocationId , AtomicU64 > > ,
557
+
540
558
query_event_kind : StringId ,
541
559
generic_activity_event_kind : StringId ,
542
560
incremental_load_result_event_kind : StringId ,
543
561
incremental_result_hashing_event_kind : StringId ,
544
562
query_blocked_event_kind : StringId ,
545
- query_cache_hit_event_kind : StringId ,
546
563
artifact_size_event_kind : StringId ,
564
+ /// Total cache hits per query invocation
565
+ query_cache_hit_count_event_kind : StringId ,
547
566
}
548
567
549
568
impl SelfProfiler {
@@ -571,8 +590,8 @@ impl SelfProfiler {
571
590
let incremental_result_hashing_event_kind =
572
591
profiler. alloc_string ( "IncrementalResultHashing" ) ;
573
592
let query_blocked_event_kind = profiler. alloc_string ( "QueryBlocked" ) ;
574
- let query_cache_hit_event_kind = profiler. alloc_string ( "QueryCacheHit" ) ;
575
593
let artifact_size_event_kind = profiler. alloc_string ( "ArtifactSize" ) ;
594
+ let query_cache_hit_count_event_kind = profiler. alloc_string ( "QueryCacheHitCount" ) ;
576
595
577
596
let mut event_filter_mask = EventFilter :: empty ( ) ;
578
597
@@ -616,8 +635,9 @@ impl SelfProfiler {
616
635
incremental_load_result_event_kind,
617
636
incremental_result_hashing_event_kind,
618
637
query_blocked_event_kind,
619
- query_cache_hit_event_kind,
620
638
artifact_size_event_kind,
639
+ query_cache_hit_count_event_kind,
640
+ query_hits : Default :: default ( ) ,
621
641
} )
622
642
}
623
643
@@ -627,6 +647,21 @@ impl SelfProfiler {
627
647
self . profiler . alloc_string ( s)
628
648
}
629
649
650
+ /// Store a cache hit of a query invocation
651
+ pub fn increment_query_cache_hit ( & self , id : QueryInvocationId ) {
652
+ // Fast path: assume that the query was already encountered before, and just record
653
+ // a cache hit
654
+ let mut guard = self . query_hits . upgradable_read ( ) ;
655
+ if let Some ( counter) = guard. get ( & id) {
656
+ // We only want the count, no other synchronization is required
657
+ counter. fetch_add ( 1 , Ordering :: Relaxed ) ;
658
+ return ;
659
+ }
660
+ guard. with_upgraded ( |map| {
661
+ map. insert ( id, AtomicU64 :: from ( 1 ) ) ;
662
+ } ) ;
663
+ }
664
+
630
665
/// Gets a `StringId` for the given string. This method makes sure that
631
666
/// any strings going through it will only be allocated once in the
632
667
/// profiling data.
0 commit comments