Skip to content

Commit 9529cbd

Browse files
Aaron1011Mark-Simulacrum
authored andcommitted
Preserve SyntaxContext for invalid/dummy spans in crate metadata
Fixes rust-lang#85197 We already preserved the `SyntaxContext` for invalid/dummy spans in the incremental cache, but we weren't doing the same for crate metadata. If an invalid (lo/hi from different files) span is written to the incremental cache, we will decode it with a 'dummy' location, but keep the original `SyntaxContext`. Since the crate metadata encoder was only checking for `DUMMY_SP` (dummy location + root `SyntaxContext`), the metadata encoder would treat it as a normal span, encoding the `SyntaxContext`. As a result, the final span encoded to the metadata would change across sessions, even if the crate itself was unchanged. This PR updates our encoding of spans in the crate metadata to mirror the encoding of spans into the incremental cache. We now always encode a `SyntaxContext`, and encode location information for spans with a non-dummy location.
1 parent f724ee4 commit 9529cbd

File tree

7 files changed

+113
-45
lines changed

7 files changed

+113
-45
lines changed

compiler/rustc_metadata/src/rmeta/decoder.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -406,17 +406,17 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for ExpnId {
406406

407407
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
408408
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Result<Span, String> {
409+
let ctxt = SyntaxContext::decode(decoder)?;
409410
let tag = u8::decode(decoder)?;
410411

411-
if tag == TAG_INVALID_SPAN {
412-
return Ok(DUMMY_SP);
412+
if tag == TAG_PARTIAL_SPAN {
413+
return Ok(DUMMY_SP.with_ctxt(ctxt));
413414
}
414415

415416
debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
416417

417418
let lo = BytePos::decode(decoder)?;
418419
let len = BytePos::decode(decoder)?;
419-
let ctxt = SyntaxContext::decode(decoder)?;
420420
let hi = lo + len;
421421

422422
let sess = if let Some(sess) = decoder.sess {

compiler/rustc_metadata/src/rmeta/encoder.rs

+41-41
Original file line numberDiff line numberDiff line change
@@ -184,11 +184,48 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for ExpnId {
184184

185185
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
186186
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) -> opaque::EncodeResult {
187-
if *self == rustc_span::DUMMY_SP {
188-
return TAG_INVALID_SPAN.encode(s);
187+
let span = self.data();
188+
189+
// Don't serialize any `SyntaxContext`s from a proc-macro crate,
190+
// since we don't load proc-macro dependencies during serialization.
191+
// This means that any hygiene information from macros used *within*
192+
// a proc-macro crate (e.g. invoking a macro that expands to a proc-macro
193+
// definition) will be lost.
194+
//
195+
// This can show up in two ways:
196+
//
197+
// 1. Any hygiene information associated with identifier of
198+
// a proc macro (e.g. `#[proc_macro] pub fn $name`) will be lost.
199+
// Since proc-macros can only be invoked from a different crate,
200+
// real code should never need to care about this.
201+
//
202+
// 2. Using `Span::def_site` or `Span::mixed_site` will not
203+
// include any hygiene information associated with the definition
204+
// site. This means that a proc-macro cannot emit a `$crate`
205+
// identifier which resolves to one of its dependencies,
206+
// which also should never come up in practice.
207+
//
208+
// Additionally, this affects `Span::parent`, and any other
209+
// span inspection APIs that would otherwise allow traversing
210+
// the `SyntaxContexts` associated with a span.
211+
//
212+
// None of these user-visible effects should result in any
213+
// cross-crate inconsistencies (getting one behavior in the same
214+
// crate, and a different behavior in another crate) due to the
215+
// limited surface that proc-macros can expose.
216+
//
217+
// IMPORTANT: If this is ever changed, be sure to update
218+
// `rustc_span::hygiene::raw_encode_expn_id` to handle
219+
// encoding `ExpnData` for proc-macro crates.
220+
if s.is_proc_macro {
221+
SyntaxContext::root().encode(s)?;
222+
} else {
223+
span.ctxt.encode(s)?;
189224
}
190225

191-
let span = self.data();
226+
if self.is_dummy() {
227+
return TAG_PARTIAL_SPAN.encode(s);
228+
}
192229

193230
// The Span infrastructure should make sure that this invariant holds:
194231
debug_assert!(span.lo <= span.hi);
@@ -203,7 +240,7 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
203240
if !s.source_file_cache.0.contains(span.hi) {
204241
// Unfortunately, macro expansion still sometimes generates Spans
205242
// that malformed in this way.
206-
return TAG_INVALID_SPAN.encode(s);
243+
return TAG_PARTIAL_SPAN.encode(s);
207244
}
208245

209246
let source_files = s.required_source_files.as_mut().expect("Already encoded SourceMap!");
@@ -259,43 +296,6 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
259296
let len = hi - lo;
260297
len.encode(s)?;
261298

262-
// Don't serialize any `SyntaxContext`s from a proc-macro crate,
263-
// since we don't load proc-macro dependencies during serialization.
264-
// This means that any hygiene information from macros used *within*
265-
// a proc-macro crate (e.g. invoking a macro that expands to a proc-macro
266-
// definition) will be lost.
267-
//
268-
// This can show up in two ways:
269-
//
270-
// 1. Any hygiene information associated with identifier of
271-
// a proc macro (e.g. `#[proc_macro] pub fn $name`) will be lost.
272-
// Since proc-macros can only be invoked from a different crate,
273-
// real code should never need to care about this.
274-
//
275-
// 2. Using `Span::def_site` or `Span::mixed_site` will not
276-
// include any hygiene information associated with the definition
277-
// site. This means that a proc-macro cannot emit a `$crate`
278-
// identifier which resolves to one of its dependencies,
279-
// which also should never come up in practice.
280-
//
281-
// Additionally, this affects `Span::parent`, and any other
282-
// span inspection APIs that would otherwise allow traversing
283-
// the `SyntaxContexts` associated with a span.
284-
//
285-
// None of these user-visible effects should result in any
286-
// cross-crate inconsistencies (getting one behavior in the same
287-
// crate, and a different behavior in another crate) due to the
288-
// limited surface that proc-macros can expose.
289-
//
290-
// IMPORTANT: If this is ever changed, be sure to update
291-
// `rustc_span::hygiene::raw_encode_expn_id` to handle
292-
// encoding `ExpnData` for proc-macro crates.
293-
if s.is_proc_macro {
294-
SyntaxContext::root().encode(s)?;
295-
} else {
296-
span.ctxt.encode(s)?;
297-
}
298-
299299
if tag == TAG_VALID_SPAN_FOREIGN {
300300
// This needs to be two lines to avoid holding the `s.source_file_cache`
301301
// while calling `cnum.encode(s)`

compiler/rustc_metadata/src/rmeta/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -450,4 +450,4 @@ struct GeneratorData<'tcx> {
450450
// Tags used for encoding Spans:
451451
const TAG_VALID_SPAN_LOCAL: u8 = 0;
452452
const TAG_VALID_SPAN_FOREIGN: u8 = 1;
453-
const TAG_INVALID_SPAN: u8 = 2;
453+
const TAG_PARTIAL_SPAN: u8 = 2;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// revisions: rpass1 rpass2
2+
3+
extern crate respan;
4+
5+
#[macro_use]
6+
#[path = "invalid-span-helper-mod.rs"]
7+
mod invalid_span_helper_mod;
8+
9+
// Invoke a macro from a different file - this
10+
// allows us to get tokens with spans from different files
11+
helper!(1);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#[macro_export]
2+
macro_rules! helper {
3+
// Use `:tt` instead of `:ident` so that we don't get a `None`-delimited group
4+
($first:tt) => {
5+
pub fn foo<T>() {
6+
// The span of `$first` comes from another file,
7+
// so the expression `1 + $first` ends up with an
8+
// 'invalid' span that starts and ends in different files.
9+
// We use the `respan!` macro to give all tokens the same
10+
// `SyntaxContext`, so that the parser will try to merge the spans.
11+
respan::respan!(let a = 1 + $first;);
12+
}
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// force-host
2+
// no-prefer-dynamic
3+
4+
#![crate_type = "proc-macro"]
5+
6+
extern crate proc_macro;
7+
use proc_macro::TokenStream;
8+
9+
10+
/// Copies the resolution information (the `SyntaxContext`) of the first
11+
/// token to all other tokens in the stream. Does not recurse into groups.
12+
#[proc_macro]
13+
pub fn respan(input: TokenStream) -> TokenStream {
14+
let first_span = input.clone().into_iter().next().unwrap().span();
15+
input.into_iter().map(|mut tree| {
16+
tree.set_span(tree.span().resolved_at(first_span));
17+
tree
18+
}).collect()
19+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// revisions: rpass1 rpass2
2+
// aux-build:respan.rs
3+
// aux-build:invalid-span-helper-lib.rs
4+
5+
// This issue has several different parts. The high level idea is:
6+
// 1. We create an 'invalid' span with the help of the `respan` proc-macro,
7+
// The compiler attempts to prevent the creation of invalid spans by
8+
// refusing to join spans with different `SyntaxContext`s. We work around
9+
// this by applying the same `SyntaxContext` to the span of every token,
10+
// using `Span::resolved_at`
11+
// 2. We using this invalid span in the body of a function, causing it to get
12+
// encoded into the `optimized_mir`
13+
// 3. We call the function from a different crate - since the function is generic,
14+
// monomorphization runs, causing `optimized_mir` to get called.
15+
// 4. We re-run compilation using our populated incremental cache, but without
16+
// making any changes. When we recompile the crate containing our generic function
17+
// (`invalid_span_helper_lib`), we load the span from the incremental cache, and
18+
// write it into the crate metadata.
19+
20+
extern crate invalid_span_helper_lib;
21+
22+
fn main() {
23+
invalid_span_helper_lib::foo::<u8>();
24+
}

0 commit comments

Comments
 (0)