Skip to content

Commit 5d5dcae

Browse files
committed
Auto merge of #44601 - alexcrichton:lower-attributes-in-hir, r=nrc
rustc: Forbid interpolated tokens in the HIR Right now the HIR contains raw `syntax::ast::Attribute` structure but nowadays these can contain arbitrary tokens. One variant of the `Token` enum is an "interpolated" token which basically means to shove all the tokens for a nonterminal in this position. A "nonterminal" in this case is roughly analagous to a macro argument: macro_rules! foo { ($a:expr) => { // $a is a nonterminal as an expression } } Currently nonterminals contain namely items and expressions, and this poses a problem for incremental compilation! With incremental we want a stable hash of all HIR items, but this means we may transitively need a stable hash *of the entire AST*, which is certainly not stable w/ node ids and whatnot. Hence today there's a "bug" where the "stable hash" of an AST is just the raw hash value of the AST, and this only arises with interpolated nonterminals. The downside of this approach, however, is that a bunch of errors get spewed out during compilation about how this isn't a great idea. This PR is focused at fixing these warnings, basically deleting them from the compiler. The implementation here is to alter attributes as they're lowered from the AST to HIR, expanding all nonterminals in-place as we see them. This code for expanding a nonterminal to a token stream already exists for the `proc_macro` crate, so we basically just reuse the same implementation there. After this PR it's considered a bug to have an `Interpolated` token and hence the stable hash implementation simply uses `bug!` in this location. Closes #40946
2 parents 9a00f3c + 0694e4f commit 5d5dcae

File tree

4 files changed

+143
-91
lines changed

4 files changed

+143
-91
lines changed

src/libproc_macro/lib.rs

+5-71
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,7 @@ use std::str::FromStr;
5454

5555
use syntax::ast;
5656
use syntax::errors::DiagnosticBuilder;
57-
use syntax::parse::{self, token, parse_stream_from_source_str};
58-
use syntax::print::pprust;
57+
use syntax::parse::{self, token};
5958
use syntax::symbol::Symbol;
6059
use syntax::tokenstream;
6160
use syntax_pos::DUMMY_SP;
@@ -525,47 +524,10 @@ impl TokenTree {
525524
Ident(ident) | Lifetime(ident) => TokenNode::Term(Term(ident.name)),
526525
Literal(..) | DocComment(..) => TokenNode::Literal(self::Literal(token)),
527526

528-
Interpolated(ref nt) => {
529-
// An `Interpolated` token means that we have a `Nonterminal`
530-
// which is often a parsed AST item. At this point we now need
531-
// to convert the parsed AST to an actual token stream, e.g.
532-
// un-parse it basically.
533-
//
534-
// Unfortunately there's not really a great way to do that in a
535-
// guaranteed lossless fashion right now. The fallback here is
536-
// to just stringify the AST node and reparse it, but this loses
537-
// all span information.
538-
//
539-
// As a result, some AST nodes are annotated with the token
540-
// stream they came from. Attempt to extract these lossless
541-
// token streams before we fall back to the stringification.
542-
let mut tokens = None;
543-
544-
match nt.0 {
545-
Nonterminal::NtItem(ref item) => {
546-
tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
547-
}
548-
Nonterminal::NtTraitItem(ref item) => {
549-
tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
550-
}
551-
Nonterminal::NtImplItem(ref item) => {
552-
tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
553-
}
554-
_ => {}
555-
}
556-
557-
tokens.map(|tokens| {
558-
TokenNode::Group(Delimiter::None,
559-
TokenStream(tokens.clone()))
560-
}).unwrap_or_else(|| {
561-
__internal::with_sess(|(sess, _)| {
562-
TokenNode::Group(Delimiter::None, TokenStream(nt.1.force(|| {
563-
// FIXME(jseyfried): Avoid this pretty-print + reparse hack
564-
let name = "<macro expansion>".to_owned();
565-
let source = pprust::token_to_string(&token);
566-
parse_stream_from_source_str(name, source, sess, Some(span))
567-
})))
568-
})
527+
Interpolated(_) => {
528+
__internal::with_sess(|(sess, _)| {
529+
let tts = token.interpolated_to_tokenstream(sess, span);
530+
TokenNode::Group(Delimiter::None, TokenStream(tts))
569531
})
570532
}
571533

@@ -631,34 +593,6 @@ impl TokenTree {
631593
}
632594
}
633595

634-
fn prepend_attrs(attrs: &[ast::Attribute],
635-
tokens: Option<&tokenstream::TokenStream>,
636-
span: syntax_pos::Span)
637-
-> Option<tokenstream::TokenStream>
638-
{
639-
let tokens = match tokens {
640-
Some(tokens) => tokens,
641-
None => return None,
642-
};
643-
if attrs.len() == 0 {
644-
return Some(tokens.clone())
645-
}
646-
let mut builder = tokenstream::TokenStreamBuilder::new();
647-
for attr in attrs {
648-
assert_eq!(attr.style, ast::AttrStyle::Outer,
649-
"inner attributes should prevent cached tokens from existing");
650-
let stream = __internal::with_sess(|(sess, _)| {
651-
// FIXME: Avoid this pretty-print + reparse hack as bove
652-
let name = "<macro expansion>".to_owned();
653-
let source = pprust::attr_to_string(attr);
654-
parse_stream_from_source_str(name, source, sess, Some(span))
655-
});
656-
builder.push(stream);
657-
}
658-
builder.push(tokens.clone());
659-
Some(builder.build())
660-
}
661-
662596
/// Permanently unstable internal implementation details of this crate. This
663597
/// should not be used.
664598
///

src/librustc/hir/lowering.rs

+48-2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ use syntax::ptr::P;
6464
use syntax::codemap::{self, respan, Spanned, CompilerDesugaringKind};
6565
use syntax::std_inject;
6666
use syntax::symbol::{Symbol, keywords};
67+
use syntax::tokenstream::{TokenStream, TokenTree, Delimited};
68+
use syntax::parse::token::{Token, DelimToken};
6769
use syntax::util::small_vector::SmallVector;
6870
use syntax::visit::{self, Visitor};
6971
use syntax_pos::Span;
@@ -589,7 +591,50 @@ impl<'a> LoweringContext<'a> {
589591
}
590592

591593
fn lower_attrs(&mut self, attrs: &Vec<Attribute>) -> hir::HirVec<Attribute> {
592-
attrs.clone().into()
594+
attrs.iter().map(|a| self.lower_attr(a)).collect::<Vec<_>>().into()
595+
}
596+
597+
fn lower_attr(&mut self, attr: &Attribute) -> Attribute {
598+
Attribute {
599+
id: attr.id,
600+
style: attr.style,
601+
path: attr.path.clone(),
602+
tokens: self.lower_token_stream(attr.tokens.clone()),
603+
is_sugared_doc: attr.is_sugared_doc,
604+
span: attr.span,
605+
}
606+
}
607+
608+
fn lower_token_stream(&mut self, tokens: TokenStream) -> TokenStream {
609+
tokens.into_trees().map(|tree| self.lower_token_tree(tree)).collect()
610+
}
611+
612+
fn lower_token_tree(&mut self, tree: TokenTree) -> TokenTree {
613+
match tree {
614+
TokenTree::Token(span, token) => {
615+
self.lower_token(token, span)
616+
}
617+
TokenTree::Delimited(span, delimited) => {
618+
TokenTree::Delimited(span, Delimited {
619+
delim: delimited.delim,
620+
tts: self.lower_token_stream(delimited.tts.into()).into(),
621+
})
622+
}
623+
}
624+
}
625+
626+
fn lower_token(&mut self, token: Token, span: Span) -> TokenTree {
627+
match token {
628+
Token::Interpolated(_) => {}
629+
other => return TokenTree::Token(span, other),
630+
}
631+
632+
let tts = token.interpolated_to_tokenstream(&self.sess.parse_sess, span);
633+
let tts = self.lower_token_stream(tts);
634+
TokenTree::Delimited(span, Delimited {
635+
delim: DelimToken::NoDelim,
636+
tts: tts.into(),
637+
})
593638
}
594639

595640
fn lower_arm(&mut self, arm: &Arm) -> hir::Arm {
@@ -1625,13 +1670,14 @@ impl<'a> LoweringContext<'a> {
16251670
let attrs = self.lower_attrs(&i.attrs);
16261671
if let ItemKind::MacroDef(ref def) = i.node {
16271672
if !def.legacy || i.attrs.iter().any(|attr| attr.path == "macro_export") {
1673+
let body = self.lower_token_stream(def.stream());
16281674
self.exported_macros.push(hir::MacroDef {
16291675
name,
16301676
vis,
16311677
attrs,
16321678
id: i.id,
16331679
span: i.span,
1634-
body: def.stream(),
1680+
body,
16351681
legacy: def.legacy,
16361682
});
16371683
}

src/librustc/ich/impls_syntax.rs

+5-18
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use syntax::ast;
2020
use syntax::parse::token;
2121
use syntax::symbol::InternedString;
2222
use syntax::tokenstream;
23-
use syntax_pos::{Span, FileMap};
23+
use syntax_pos::FileMap;
2424

2525
use hir::def_id::{DefId, CrateNum, CRATE_DEF_INDEX};
2626

@@ -228,7 +228,7 @@ for tokenstream::TokenTree {
228228
match *self {
229229
tokenstream::TokenTree::Token(span, ref token) => {
230230
span.hash_stable(hcx, hasher);
231-
hash_token(token, hcx, hasher, span);
231+
hash_token(token, hcx, hasher);
232232
}
233233
tokenstream::TokenTree::Delimited(span, ref delimited) => {
234234
span.hash_stable(hcx, hasher);
@@ -254,8 +254,7 @@ for tokenstream::TokenStream {
254254

255255
fn hash_token<'gcx, W: StableHasherResult>(token: &token::Token,
256256
hcx: &mut StableHashingContext<'gcx>,
257-
hasher: &mut StableHasher<W>,
258-
error_reporting_span: Span) {
257+
hasher: &mut StableHasher<W>) {
259258
mem::discriminant(token).hash_stable(hcx, hasher);
260259
match *token {
261260
token::Token::Eq |
@@ -318,20 +317,8 @@ fn hash_token<'gcx, W: StableHasherResult>(token: &token::Token,
318317
token::Token::Ident(ident) |
319318
token::Token::Lifetime(ident) => ident.name.hash_stable(hcx, hasher),
320319

321-
token::Token::Interpolated(ref non_terminal) => {
322-
// FIXME(mw): This could be implemented properly. It's just a
323-
// lot of work, since we would need to hash the AST
324-
// in a stable way, in addition to the HIR.
325-
// Since this is hardly used anywhere, just emit a
326-
// warning for now.
327-
if hcx.sess().opts.debugging_opts.incremental.is_some() {
328-
let msg = format!("Quasi-quoting might make incremental \
329-
compilation very inefficient: {:?}",
330-
non_terminal);
331-
hcx.sess().span_warn(error_reporting_span, &msg[..]);
332-
}
333-
334-
std_hash::Hash::hash(non_terminal, hasher);
320+
token::Token::Interpolated(_) => {
321+
bug!("interpolated tokens should not be present in the HIR")
335322
}
336323

337324
token::Token::DocComment(val) |

src/libsyntax/parse/token.rs

+85
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,15 @@ pub use self::Lit::*;
1515
pub use self::Token::*;
1616

1717
use ast::{self};
18+
use parse::ParseSess;
19+
use print::pprust;
1820
use ptr::P;
1921
use serialize::{Decodable, Decoder, Encodable, Encoder};
2022
use symbol::keywords;
23+
use syntax::parse::parse_stream_from_source_str;
24+
use syntax_pos::{self, Span};
2125
use tokenstream::{TokenStream, TokenTree};
26+
use tokenstream;
2227

2328
use std::cell::Cell;
2429
use std::{cmp, fmt};
@@ -421,6 +426,59 @@ impl Token {
421426
pub fn is_reserved_ident(&self) -> bool {
422427
self.is_special_ident() || self.is_used_keyword() || self.is_unused_keyword()
423428
}
429+
430+
pub fn interpolated_to_tokenstream(&self, sess: &ParseSess, span: Span)
431+
-> TokenStream
432+
{
433+
let nt = match *self {
434+
Token::Interpolated(ref nt) => nt,
435+
_ => panic!("only works on interpolated tokens"),
436+
};
437+
438+
// An `Interpolated` token means that we have a `Nonterminal`
439+
// which is often a parsed AST item. At this point we now need
440+
// to convert the parsed AST to an actual token stream, e.g.
441+
// un-parse it basically.
442+
//
443+
// Unfortunately there's not really a great way to do that in a
444+
// guaranteed lossless fashion right now. The fallback here is
445+
// to just stringify the AST node and reparse it, but this loses
446+
// all span information.
447+
//
448+
// As a result, some AST nodes are annotated with the token
449+
// stream they came from. Attempt to extract these lossless
450+
// token streams before we fall back to the stringification.
451+
let mut tokens = None;
452+
453+
match nt.0 {
454+
Nonterminal::NtItem(ref item) => {
455+
tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
456+
}
457+
Nonterminal::NtTraitItem(ref item) => {
458+
tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
459+
}
460+
Nonterminal::NtImplItem(ref item) => {
461+
tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
462+
}
463+
Nonterminal::NtIdent(ident) => {
464+
let token = Token::Ident(ident.node);
465+
tokens = Some(TokenTree::Token(ident.span, token).into());
466+
}
467+
Nonterminal::NtTT(ref tt) => {
468+
tokens = Some(tt.clone().into());
469+
}
470+
_ => {}
471+
}
472+
473+
tokens.unwrap_or_else(|| {
474+
nt.1.force(|| {
475+
// FIXME(jseyfried): Avoid this pretty-print + reparse hack
476+
let name = "<macro expansion>".to_owned();
477+
let source = pprust::token_to_string(self);
478+
parse_stream_from_source_str(name, source, sess, Some(span))
479+
})
480+
})
481+
}
424482
}
425483

426484
#[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash)]
@@ -533,3 +591,30 @@ impl Decodable for LazyTokenStream {
533591
impl ::std::hash::Hash for LazyTokenStream {
534592
fn hash<H: ::std::hash::Hasher>(&self, _hasher: &mut H) {}
535593
}
594+
595+
fn prepend_attrs(sess: &ParseSess,
596+
attrs: &[ast::Attribute],
597+
tokens: Option<&tokenstream::TokenStream>,
598+
span: syntax_pos::Span)
599+
-> Option<tokenstream::TokenStream>
600+
{
601+
let tokens = match tokens {
602+
Some(tokens) => tokens,
603+
None => return None,
604+
};
605+
if attrs.len() == 0 {
606+
return Some(tokens.clone())
607+
}
608+
let mut builder = tokenstream::TokenStreamBuilder::new();
609+
for attr in attrs {
610+
assert_eq!(attr.style, ast::AttrStyle::Outer,
611+
"inner attributes should prevent cached tokens from existing");
612+
// FIXME: Avoid this pretty-print + reparse hack as bove
613+
let name = "<macro expansion>".to_owned();
614+
let source = pprust::attr_to_string(attr);
615+
let stream = parse_stream_from_source_str(name, source, sess, Some(span));
616+
builder.push(stream);
617+
}
618+
builder.push(tokens.clone());
619+
Some(builder.build())
620+
}

0 commit comments

Comments
 (0)