|
14 | 14 | //! ownership of the original.
|
15 | 15 |
|
16 | 16 | use std::borrow::Cow;
|
| 17 | +use std::ops::Range; |
17 | 18 | use std::sync::Arc;
|
18 |
| -use std::{cmp, fmt, iter}; |
| 19 | +use std::{cmp, fmt, iter, mem}; |
19 | 20 |
|
20 | 21 | use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
|
21 | 22 | use rustc_data_structures::sync;
|
@@ -156,13 +157,238 @@ impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
|
156 | 157 | }
|
157 | 158 | }
|
158 | 159 |
|
| 160 | +/// A token range within a `Parser`'s full token stream. |
| 161 | +#[derive(Clone, Debug)] |
| 162 | +pub struct ParserRange(pub Range<u32>); |
| 163 | + |
| 164 | +/// A token range within an individual AST node's (lazy) token stream, i.e. |
| 165 | +/// relative to that node's first token. Distinct from `ParserRange` so the two |
| 166 | +/// kinds of range can't be mixed up. |
| 167 | +#[derive(Clone, Debug)] |
| 168 | +pub struct NodeRange(pub Range<u32>); |
| 169 | + |
| 170 | +/// Indicates a range of tokens that should be replaced by an `AttrsTarget` |
| 171 | +/// (replacement) or be replaced by nothing (deletion). This is used in two |
| 172 | +/// places during token collection. |
| 173 | +/// |
| 174 | +/// 1. Replacement. During the parsing of an AST node that may have a |
| 175 | +/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]` |
| 176 | +/// or `#[cfg_attr]`, we replace the entire inner AST node with |
| 177 | +/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an |
| 178 | +/// `AttrTokenStream`. |
| 179 | +/// |
| 180 | +/// 2. Deletion. We delete inner attributes from all collected token streams, |
| 181 | +/// and instead track them through the `attrs` field on the AST node. This |
| 182 | +/// lets us manipulate them similarly to outer attributes. When we create a |
| 183 | +/// `TokenStream`, the inner attributes are inserted into the proper place |
| 184 | +/// in the token stream. |
| 185 | +/// |
| 186 | +/// Each replacement starts off in `ParserReplacement` form but is converted to |
| 187 | +/// `NodeReplacement` form when it is attached to a single AST node, via |
| 188 | +/// `LazyAttrTokenStreamImpl`. |
| 189 | +pub type ParserReplacement = (ParserRange, Option<AttrsTarget>); |
| 190 | + |
| 191 | +/// See the comment on `ParserReplacement`. |
| 192 | +pub type NodeReplacement = (NodeRange, Option<AttrsTarget>); |
| 193 | + |
| 194 | +impl NodeRange { |
| 195 | + // Converts a range within a parser's tokens to a range within a |
| 196 | + // node's tokens beginning at `start_pos`. |
| 197 | + // |
| 198 | + // For example, imagine a parser with 50 tokens in its token stream, a |
| 199 | + // function that spans `ParserRange(20..40)` and an inner attribute within |
| 200 | + // that function that spans `ParserRange(30..35)`. We would find the inner |
| 201 | + // attribute's range within the function's tokens by subtracting 20, which |
| 202 | + // is the position of the function's start token. This gives |
| 203 | + // `NodeRange(10..15)`. |
| 204 | + pub fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange { |
| 205 | + assert!(!parser_range.is_empty()); |
| 206 | + assert!(parser_range.start >= start_pos); |
| 207 | + NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos)) |
| 208 | + } |
| 209 | +} |
| 210 | + |
| 211 | +// From a value of this type we can reconstruct the `TokenStream` seen by the |
| 212 | +// `f` callback passed to a call to `Parser::collect_tokens`, by |
| 213 | +// replaying the getting of the tokens. This saves us producing a `TokenStream` |
| 214 | +// if it is never needed, e.g. a captured `macro_rules!` argument that is never |
| 215 | +// passed to a proc macro. In practice, token stream creation happens rarely |
| 216 | +// compared to calls to `collect_tokens` (see some statistics in #78736) so we |
| 217 | +// are doing as little up-front work as possible. |
| 218 | +// |
| 219 | +// This also makes `Parser` very cheap to clone, since |
| 220 | +// there is no intermediate collection buffer to clone. |
| 221 | +pub struct LazyAttrTokenStreamImpl { |
| 222 | + pub start_token: (Token, Spacing), |
| 223 | + pub cursor_snapshot: TokenCursor, |
| 224 | + pub num_calls: u32, |
| 225 | + pub break_last_token: u32, |
| 226 | + pub node_replacements: Box<[NodeReplacement]>, |
| 227 | +} |
| 228 | + |
| 229 | +impl ToAttrTokenStream for LazyAttrTokenStreamImpl { |
| 230 | + fn to_attr_token_stream(&self) -> AttrTokenStream { |
| 231 | + // The token produced by the final call to `{,inlined_}next` was not |
| 232 | + // actually consumed by the callback. The combination of chaining the |
| 233 | + // initial token and using `take` produces the desired result - we |
| 234 | + // produce an empty `TokenStream` if no calls were made, and omit the |
| 235 | + // final token otherwise. |
| 236 | + let mut cursor_snapshot = self.cursor_snapshot.clone(); |
| 237 | + let tokens = iter::once(FlatToken::Token(self.start_token)) |
| 238 | + .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next()))) |
| 239 | + .take(self.num_calls as usize); |
| 240 | + |
| 241 | + if self.node_replacements.is_empty() { |
| 242 | + make_attr_token_stream(tokens, self.break_last_token) |
| 243 | + } else { |
| 244 | + let mut tokens: Vec<_> = tokens.collect(); |
| 245 | + let mut node_replacements = self.node_replacements.to_vec(); |
| 246 | + node_replacements.sort_by_key(|(range, _)| range.0.start); |
| 247 | + |
| 248 | + #[cfg(debug_assertions)] |
| 249 | + for [(node_range, tokens), (next_node_range, next_tokens)] in |
| 250 | + node_replacements.array_windows() |
| 251 | + { |
| 252 | + assert!( |
| 253 | + node_range.0.end <= next_node_range.0.start |
| 254 | + || node_range.0.end >= next_node_range.0.end, |
| 255 | + "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", |
| 256 | + node_range, |
| 257 | + tokens, |
| 258 | + next_node_range, |
| 259 | + next_tokens, |
| 260 | + ); |
| 261 | + } |
| 262 | + |
| 263 | + // Process the replace ranges, starting from the highest start |
| 264 | + // position and working our way back. If have tokens like: |
| 265 | + // |
| 266 | + // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` |
| 267 | + // |
| 268 | + // Then we will generate replace ranges for both |
| 269 | + // the `#[cfg(FALSE)] field: bool` and the entire |
| 270 | + // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` |
| 271 | + // |
| 272 | + // By starting processing from the replace range with the greatest |
| 273 | + // start position, we ensure that any (outer) replace range which |
| 274 | + // encloses another (inner) replace range will fully overwrite the |
| 275 | + // inner range's replacement. |
| 276 | + for (node_range, target) in node_replacements.into_iter().rev() { |
| 277 | + assert!( |
| 278 | + !node_range.0.is_empty(), |
| 279 | + "Cannot replace an empty node range: {:?}", |
| 280 | + node_range.0 |
| 281 | + ); |
| 282 | + |
| 283 | + // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus |
| 284 | + // enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the |
| 285 | + // total length of `tokens` constant throughout the replacement process, allowing |
| 286 | + // us to do all replacements without adjusting indices. |
| 287 | + let target_len = target.is_some() as usize; |
| 288 | + tokens.splice( |
| 289 | + (node_range.0.start as usize)..(node_range.0.end as usize), |
| 290 | + target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain( |
| 291 | + iter::repeat(FlatToken::Empty).take(node_range.0.len() - target_len), |
| 292 | + ), |
| 293 | + ); |
| 294 | + } |
| 295 | + make_attr_token_stream(tokens.into_iter(), self.break_last_token) |
| 296 | + } |
| 297 | + } |
| 298 | +} |
| 299 | + |
| 300 | +/// A helper struct used when building an `AttrTokenStream` from |
| 301 | +/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens |
| 302 | +/// are stored as `FlatToken::Token`. A vector of `FlatToken`s |
| 303 | +/// is then 'parsed' to build up an `AttrTokenStream` with nested |
| 304 | +/// `AttrTokenTree::Delimited` tokens. |
| 305 | +#[derive(Debug, Clone)] |
| 306 | +enum FlatToken { |
| 307 | + /// A token - this holds both delimiter (e.g. '{' and '}') |
| 308 | + /// and non-delimiter tokens |
| 309 | + Token((Token, Spacing)), |
| 310 | + /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted |
| 311 | + /// directly into the constructed `AttrTokenStream` as an |
| 312 | + /// `AttrTokenTree::AttrsTarget`. |
| 313 | + AttrsTarget(AttrsTarget), |
| 314 | + /// A special 'empty' token that is ignored during the conversion |
| 315 | + /// to an `AttrTokenStream`. This is used to simplify the |
| 316 | + /// handling of replace ranges. |
| 317 | + Empty, |
| 318 | +} |
| 319 | + |
159 | 320 | /// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
|
160 | 321 | /// information about the tokens for attribute targets. This is used
|
161 | 322 | /// during expansion to perform early cfg-expansion, and to process attributes
|
162 | 323 | /// during proc-macro invocations.
|
163 | 324 | #[derive(Clone, Debug, Default, Encodable, Decodable)]
|
164 | 325 | pub struct AttrTokenStream(pub Arc<Vec<AttrTokenTree>>);
|
165 | 326 |
|
| 327 | +/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an |
| 328 | +/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and |
| 329 | +/// close delims. |
| 330 | +fn make_attr_token_stream( |
| 331 | + iter: impl Iterator<Item = FlatToken>, |
| 332 | + break_last_token: u32, |
| 333 | +) -> AttrTokenStream { |
| 334 | + #[derive(Debug)] |
| 335 | + struct FrameData { |
| 336 | + // This is `None` for the first frame, `Some` for all others. |
| 337 | + open_delim_sp: Option<(Delimiter, Span, Spacing)>, |
| 338 | + inner: Vec<AttrTokenTree>, |
| 339 | + } |
| 340 | + // The stack always has at least one element. Storing it separately makes for shorter code. |
| 341 | + let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] }; |
| 342 | + let mut stack_rest = vec![]; |
| 343 | + for flat_token in iter { |
| 344 | + match flat_token { |
| 345 | + FlatToken::Token((token @ Token { kind, span }, spacing)) => { |
| 346 | + if let Some(delim) = kind.open_delim() { |
| 347 | + stack_rest.push(mem::replace( |
| 348 | + &mut stack_top, |
| 349 | + FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] }, |
| 350 | + )); |
| 351 | + } else if let Some(delim) = kind.close_delim() { |
| 352 | + let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap()); |
| 353 | + let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap(); |
| 354 | + assert!( |
| 355 | + open_delim.eq_ignoring_invisible_origin(&delim), |
| 356 | + "Mismatched open/close delims: open={open_delim:?} close={span:?}" |
| 357 | + ); |
| 358 | + let dspan = DelimSpan::from_pair(open_sp, span); |
| 359 | + let dspacing = DelimSpacing::new(open_spacing, spacing); |
| 360 | + let stream = AttrTokenStream::new(frame_data.inner); |
| 361 | + let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream); |
| 362 | + stack_top.inner.push(delimited); |
| 363 | + } else { |
| 364 | + stack_top.inner.push(AttrTokenTree::Token(token, spacing)) |
| 365 | + } |
| 366 | + } |
| 367 | + FlatToken::AttrsTarget(target) => { |
| 368 | + stack_top.inner.push(AttrTokenTree::AttrsTarget(target)) |
| 369 | + } |
| 370 | + FlatToken::Empty => {} |
| 371 | + } |
| 372 | + } |
| 373 | + |
| 374 | + if break_last_token > 0 { |
| 375 | + let last_token = stack_top.inner.pop().unwrap(); |
| 376 | + if let AttrTokenTree::Token(last_token, spacing) = last_token { |
| 377 | + let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap(); |
| 378 | + |
| 379 | + // Tokens are always ASCII chars, so we can use byte arithmetic here. |
| 380 | + let mut first_span = last_token.span.shrink_to_lo(); |
| 381 | + first_span = |
| 382 | + first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token)); |
| 383 | + |
| 384 | + stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing)); |
| 385 | + } else { |
| 386 | + panic!("Unexpected last token {last_token:?}") |
| 387 | + } |
| 388 | + } |
| 389 | + AttrTokenStream::new(stack_top.inner) |
| 390 | +} |
| 391 | + |
166 | 392 | /// Like `TokenTree`, but for `AttrTokenStream`.
|
167 | 393 | #[derive(Clone, Debug, Encodable, Decodable)]
|
168 | 394 | pub enum AttrTokenTree {
|
@@ -641,6 +867,104 @@ impl<'t> Iterator for TokenStreamIter<'t> {
|
641 | 867 | }
|
642 | 868 | }
|
643 | 869 |
|
| 870 | +#[derive(Clone, Debug)] |
| 871 | +pub struct TokenTreeCursor { |
| 872 | + stream: TokenStream, |
| 873 | + /// Points to the current token tree in the stream. In `TokenCursor::curr`, |
| 874 | + /// this can be any token tree. In `TokenCursor::stack`, this is always a |
| 875 | + /// `TokenTree::Delimited`. |
| 876 | + index: usize, |
| 877 | +} |
| 878 | + |
| 879 | +impl TokenTreeCursor { |
| 880 | + #[inline] |
| 881 | + pub fn new(stream: TokenStream) -> Self { |
| 882 | + TokenTreeCursor { stream, index: 0 } |
| 883 | + } |
| 884 | + |
| 885 | + #[inline] |
| 886 | + pub fn curr(&self) -> Option<&TokenTree> { |
| 887 | + self.stream.get(self.index) |
| 888 | + } |
| 889 | + |
| 890 | + pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> { |
| 891 | + self.stream.get(self.index + n) |
| 892 | + } |
| 893 | + |
| 894 | + #[inline] |
| 895 | + pub fn bump(&mut self) { |
| 896 | + self.index += 1; |
| 897 | + } |
| 898 | +} |
| 899 | + |
| 900 | +/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that |
| 901 | +/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b) |
| 902 | +/// use this type to emit them as a linear sequence. But a linear sequence is |
| 903 | +/// what the parser expects, for the most part. |
| 904 | +#[derive(Clone, Debug)] |
| 905 | +pub struct TokenCursor { |
| 906 | + // Cursor for the current (innermost) token stream. The index within the |
| 907 | + // cursor can point to any token tree in the stream (or one past the end). |
| 908 | + // The delimiters for this token stream are found in `self.stack.last()`; |
| 909 | + // if that is `None` we are in the outermost token stream which never has |
| 910 | + // delimiters. |
| 911 | + pub curr: TokenTreeCursor, |
| 912 | + |
| 913 | + // Token streams surrounding the current one. The index within each cursor |
| 914 | + // always points to a `TokenTree::Delimited`. |
| 915 | + pub stack: Vec<TokenTreeCursor>, |
| 916 | +} |
| 917 | + |
| 918 | +impl TokenCursor { |
| 919 | + pub fn next(&mut self) -> (Token, Spacing) { |
| 920 | + self.inlined_next() |
| 921 | + } |
| 922 | + |
| 923 | + /// This always-inlined version should only be used on hot code paths. |
| 924 | + #[inline(always)] |
| 925 | + pub fn inlined_next(&mut self) -> (Token, Spacing) { |
| 926 | + loop { |
| 927 | + // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix |
| 928 | + // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions |
| 929 | + // below can be removed. |
| 930 | + if let Some(tree) = self.curr.curr() { |
| 931 | + match tree { |
| 932 | + &TokenTree::Token(token, spacing) => { |
| 933 | + debug_assert!(!token.kind.is_delim()); |
| 934 | + let res = (token, spacing); |
| 935 | + self.curr.bump(); |
| 936 | + return res; |
| 937 | + } |
| 938 | + &TokenTree::Delimited(sp, spacing, delim, ref tts) => { |
| 939 | + let trees = TokenTreeCursor::new(tts.clone()); |
| 940 | + self.stack.push(mem::replace(&mut self.curr, trees)); |
| 941 | + if !delim.skip() { |
| 942 | + return (Token::new(delim.as_open_token_kind(), sp.open), spacing.open); |
| 943 | + } |
| 944 | + // No open delimiter to return; continue on to the next iteration. |
| 945 | + } |
| 946 | + }; |
| 947 | + } else if let Some(parent) = self.stack.pop() { |
| 948 | + // We have exhausted this token stream. Move back to its parent token stream. |
| 949 | + let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else { |
| 950 | + panic!("parent should be Delimited") |
| 951 | + }; |
| 952 | + self.curr = parent; |
| 953 | + self.curr.bump(); // move past the `Delimited` |
| 954 | + if !delim.skip() { |
| 955 | + return (Token::new(delim.as_close_token_kind(), span.close), spacing.close); |
| 956 | + } |
| 957 | + // No close delimiter to return; continue on to the next iteration. |
| 958 | + } else { |
| 959 | + // We have exhausted the outermost token stream. The use of |
| 960 | + // `Spacing::Alone` is arbitrary and immaterial, because the |
| 961 | + // `Eof` token's spacing is never used. |
| 962 | + return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); |
| 963 | + } |
| 964 | + } |
| 965 | + } |
| 966 | +} |
| 967 | + |
644 | 968 | #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
|
645 | 969 | pub struct DelimSpan {
|
646 | 970 | pub open: Span,
|
|
0 commit comments