@@ -13,6 +13,8 @@ use errors::{struct_span_err, Diagnostic, DiagnosticBuilder, FatalError, Handler
13
13
#[ cfg( not( parallel_compiler) ) ]
14
14
use rustc_data_structures:: cold_path;
15
15
use rustc_data_structures:: fx:: { FxHashMap , FxHasher } ;
16
+ #[ cfg( parallel_compiler) ]
17
+ use rustc_data_structures:: profiling:: TimingGuard ;
16
18
use rustc_data_structures:: sharded:: Sharded ;
17
19
use rustc_data_structures:: sync:: { Lock , Lrc } ;
18
20
use rustc_data_structures:: thin_vec:: ThinVec ;
@@ -82,6 +84,19 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
82
84
/// for some compile-time benchmarks.
83
85
#[ inline( always) ]
84
86
pub ( super ) fn try_get ( tcx : TyCtxt < ' tcx > , span : Span , key : & Q :: Key ) -> TryGetJob < ' a , ' tcx , Q > {
87
+ // Handling the `query_blocked_prof_timer` is a bit weird because of the
88
+ // control flow in this function: Blocking is implemented by
89
+ // awaiting a running job and, once that is done, entering the loop below
90
+ // again from the top. In that second iteration we will hit the
91
+ // cache which provides us with the information we need for
92
+ // finishing the "query-blocked" event.
93
+ //
94
+ // We thus allocate `query_blocked_prof_timer` outside the loop,
95
+ // initialize it during the first iteration and finish it during the
96
+ // second iteration.
97
+ #[ cfg( parallel_compiler) ]
98
+ let mut query_blocked_prof_timer: Option < TimingGuard < ' _ > > = None ;
99
+
85
100
let cache = Q :: query_cache ( tcx) ;
86
101
loop {
87
102
// We compute the key's hash once and then use it for both the
@@ -95,7 +110,17 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
95
110
if let Some ( ( _, value) ) =
96
111
lock. results . raw_entry ( ) . from_key_hashed_nocheck ( key_hash, key)
97
112
{
98
- tcx. prof . query_cache_hit ( value. index . into ( ) ) ;
113
+ if unlikely ! ( tcx. prof. enabled( ) ) {
114
+ tcx. prof . query_cache_hit ( value. index . into ( ) ) ;
115
+
116
+ #[ cfg( parallel_compiler) ]
117
+ {
118
+ if let Some ( prof_timer) = query_blocked_prof_timer. take ( ) {
119
+ prof_timer. finish_with_query_invocation_id ( value. index . into ( ) ) ;
120
+ }
121
+ }
122
+ }
123
+
99
124
let result = ( value. value . clone ( ) , value. index ) ;
100
125
#[ cfg( debug_assertions) ]
101
126
{
@@ -104,9 +129,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
104
129
return TryGetJob :: JobCompleted ( result) ;
105
130
}
106
131
107
- #[ cfg( parallel_compiler) ]
108
- let query_blocked_prof_timer;
109
-
110
132
let job = match lock. active . entry ( ( * key) . clone ( ) ) {
111
133
Entry :: Occupied ( entry) => {
112
134
match * entry. get ( ) {
@@ -116,7 +138,7 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
116
138
// self-profiler.
117
139
#[ cfg( parallel_compiler) ]
118
140
{
119
- query_blocked_prof_timer = tcx. prof . query_blocked ( Q :: NAME ) ;
141
+ query_blocked_prof_timer = Some ( tcx. prof . query_blocked ( ) ) ;
120
142
}
121
143
122
144
job. clone ( )
@@ -153,11 +175,6 @@ impl<'a, 'tcx, Q: QueryDescription<'tcx>> JobOwner<'a, 'tcx, Q> {
153
175
{
154
176
let result = job. r#await ( tcx, span) ;
155
177
156
- // This `drop()` is not strictly necessary as the binding
157
- // would go out of scope anyway. But it's good to have an
158
- // explicit marker of how far the measurement goes.
159
- drop ( query_blocked_prof_timer) ;
160
-
161
178
if let Err ( cycle) = result {
162
179
return TryGetJob :: Cycle ( Q :: handle_cycle_error ( tcx, cycle) ) ;
163
180
}
0 commit comments