Skip to content

Commit 92a6256

Browse files
committed
substitute_repetition: Correctly insert sub-fragments
1 parent 1f546e5 commit 92a6256

File tree

2 files changed

+119
-71
lines changed

2 files changed

+119
-71
lines changed

gcc/rust/expand/rust-macro-expand.cc

Lines changed: 72 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3118,7 +3118,7 @@ MacroExpander::expand_decl_macro (Location invoc_locus,
31183118

31193119
// find matching arm
31203120
AST::MacroRule *matched_rule = nullptr;
3121-
std::map<std::string, MatchedFragment> matched_fragments;
3121+
std::map<std::string, std::vector<MatchedFragment>> matched_fragments;
31223122
for (auto &rule : rules_def.get_rules ())
31233123
{
31243124
sub_stack.push ();
@@ -3127,9 +3127,9 @@ MacroExpander::expand_decl_macro (Location invoc_locus,
31273127

31283128
if (did_match_rule)
31293129
{
3130-
for (auto &frag : matched_fragments)
3131-
rust_debug ("matched fragment: %s",
3132-
frag.second.as_string ().c_str ());
3130+
for (auto &kv : matched_fragments)
3131+
rust_debug ("[fragment]: %s (%ld)", kv.first.c_str (),
3132+
kv.second.size ());
31333133

31343134
matched_rule = &rule;
31353135
break;
@@ -3535,9 +3535,8 @@ MacroExpander::match_matcher (Parser<MacroInvocLexer> &parser,
35353535

35363536
// matched fragment get the offset in the token stream
35373537
size_t offs_end = source.get_offs ();
3538-
sub_stack.peek ().insert (
3539-
{fragment->get_ident (),
3540-
MatchedFragment (fragment->get_ident (), offs_begin, offs_end)});
3538+
sub_stack.insert_fragment (
3539+
MatchedFragment (fragment->get_ident (), offs_begin, offs_end));
35413540
}
35423541
break;
35433542

@@ -3632,10 +3631,9 @@ MacroExpander::match_n_matches (
36323631

36333632
// matched fragment get the offset in the token stream
36343633
size_t offs_end = source.get_offs ();
3635-
sub_stack.peek ().insert (
3636-
{fragment->get_ident (),
3637-
MatchedFragment (fragment->get_ident (), offs_begin,
3638-
offs_end)});
3634+
sub_stack.insert_fragment (
3635+
MatchedFragment (fragment->get_ident (), offs_begin,
3636+
offs_end));
36393637
}
36403638
break;
36413639

@@ -3729,17 +3727,19 @@ MacroExpander::match_repetition (Parser<MacroInvocLexer> &parser,
37293727
auto fragment = static_cast<AST::MacroMatchFragment *> (match.get ());
37303728
auto it = stack_map.find (fragment->get_ident ());
37313729

3732-
// If we can't find the fragment, but the result was valid, then it's
3733-
// a zero-matched fragment and we can insert it
3730+
// If we can't find the fragment, but the result was valid, then
3731+
// it's a zero-matched fragment and we can insert it
37343732
if (it == stack_map.end ())
37353733
{
3736-
stack_map.insert (
3737-
{fragment->get_ident (),
3738-
MatchedFragment::zero (fragment->get_ident ())});
3734+
sub_stack.insert_fragment (
3735+
MatchedFragment::zero (fragment->get_ident ()));
37393736
}
37403737
else
37413738
{
3742-
it->second.set_match_amount (match_amount);
3739+
// We can just set the repetition amount on the first match
3740+
// FIXME: Make this more ergonomic and similar to what we fetch
3741+
// in `substitute_repetition`
3742+
it->second[0].set_match_amount (match_amount);
37433743
}
37443744
}
37453745
}
@@ -3750,8 +3750,8 @@ MacroExpander::match_repetition (Parser<MacroInvocLexer> &parser,
37503750
AST::ASTFragment
37513751
MacroExpander::transcribe_rule (
37523752
AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree,
3753-
std::map<std::string, MatchedFragment> &matched_fragments, bool semicolon,
3754-
ContextType ctx)
3753+
std::map<std::string, std::vector<MatchedFragment>> &matched_fragments,
3754+
bool semicolon, ContextType ctx)
37553755
{
37563756
// we can manipulate the token tree to substitute the dollar identifiers so
37573757
// that when we call parse its already substituted for us
@@ -3765,10 +3765,10 @@ MacroExpander::transcribe_rule (
37653765
= substitute_tokens (invoc_stream, macro_rule_tokens, matched_fragments);
37663766

37673767
// // handy for debugging
3768-
for (auto &tok : substituted_tokens)
3769-
{
3770-
rust_debug ("tok: [%s]", tok->as_string ().c_str ());
3771-
}
3768+
// for (auto &tok : substituted_tokens)
3769+
// {
3770+
// rust_debug ("tok: [%s]", tok->as_string ().c_str ());
3771+
// }
37723772

37733773
// parse it to an ASTFragment
37743774
MacroInvocLexer lex (std::move (substituted_tokens));
@@ -3888,7 +3888,7 @@ MacroExpander::transcribe_rule (
38883888
std::vector<std::unique_ptr<AST::Token>>
38893889
MacroExpander::substitute_metavar (
38903890
std::vector<std::unique_ptr<AST::Token>> &input,
3891-
std::map<std::string, MatchedFragment> &fragments,
3891+
std::map<std::string, std::vector<MatchedFragment>> &fragments,
38923892
std::unique_ptr<AST::Token> &metavar)
38933893
{
38943894
auto metavar_name = metavar->get_str ();
@@ -3903,7 +3903,10 @@ MacroExpander::substitute_metavar (
39033903
else
39043904
{
39053905
// Replace
3906-
MatchedFragment &frag = it->second;
3906+
// We only care about the vector when expanding repetitions. Just access
3907+
// the first element of the vector.
3908+
// FIXME: Clean this up so it makes more sense
3909+
auto &frag = it->second[0];
39073910
for (size_t offs = frag.token_offset_begin; offs < frag.token_offset_end;
39083911
offs++)
39093912
{
@@ -3919,8 +3922,8 @@ std::vector<std::unique_ptr<AST::Token>>
39193922
MacroExpander::substitute_repetition (
39203923
std::vector<std::unique_ptr<AST::Token>> &input,
39213924
std::vector<std::unique_ptr<AST::Token>> &macro,
3922-
std::map<std::string, MatchedFragment> &fragments, size_t pattern_start,
3923-
size_t pattern_end)
3925+
std::map<std::string, std::vector<MatchedFragment>> &fragments,
3926+
size_t pattern_start, size_t pattern_end)
39243927
{
39253928
rust_assert (pattern_end < macro.size ());
39263929

@@ -3929,10 +3932,6 @@ MacroExpander::substitute_repetition (
39293932

39303933
std::vector<std::unique_ptr<AST::Token>> expanded;
39313934

3932-
for (size_t i = pattern_start; i < pattern_end; i++)
3933-
rust_debug ("[repetition pattern]: %s",
3934-
macro.at (i)->as_string ().c_str ());
3935-
39363935
// Find the first fragment and get the amount of repetitions that we should
39373936
// perform
39383937
size_t repeat_amount = 0;
@@ -3951,29 +3950,56 @@ MacroExpander::substitute_repetition (
39513950
// fragment), we can just error out. No need to paste the
39523951
// tokens as if nothing had happened.
39533952
rust_error_at (frag_token->get_locus (),
3954-
"metavar used in repetition does not exist");
3953+
"metavar %s used in repetition does not exist",
3954+
frag_token->get_str ().c_str ());
3955+
// FIXME:
39553956
return expanded;
39563957
}
39573958

3958-
repeat_amount = it->second.match_amount;
3959+
// FIXME: Refactor, ugly
3960+
repeat_amount = it->second[0].match_amount;
39593961
}
39603962
}
39613963
}
39623964

3965+
rust_debug ("repetition amount to use: %lu", repeat_amount);
39633966
std::vector<std::unique_ptr<AST::Token>> new_macro;
3964-
for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++)
3965-
{
3966-
new_macro.emplace_back (macro.at (tok_idx)->clone_token ());
3967-
rust_debug ("new macro token: %s",
3968-
macro.at (tok_idx)->as_string ().c_str ());
3969-
}
39703967

3971-
// FIXME: We have to be careful and not push the repetition token
3972-
auto new_tokens = substitute_tokens (input, new_macro, fragments);
3968+
// We want to generate a "new macro" to substitute with. This new macro
3969+
// should contain only the tokens inside the pattern
3970+
for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++)
3971+
new_macro.emplace_back (macro.at (tok_idx)->clone_token ());
3972+
3973+
// Then, we want to create a subset of the matches so that
3974+
// `substitute_tokens()` can only see one fragment per metavar. Let's say we
3975+
// have the following user input: (1 145 'h')
3976+
// on the following match arm: ($($lit:literal)*)
3977+
// which causes the following matches: { "lit": [1, 145, 'h'] }
3978+
//
3979+
// The pattern (new_macro) is `$lit:literal`
3980+
// The first time we expand it, we want $lit to have the following token: 1
3981+
// The second time, 145
3982+
// The third and final time, 'h'
3983+
//
3984+
// In order to do so we must create "sub maps", which only contain parts of
3985+
// the original matches
3986+
// sub-maps: [ { "lit": 1 }, { "lit": 145 }, { "lit": 'h' } ]
3987+
//
3988+
// and give them to `substitute_tokens` one by one.
39733989

3974-
rust_debug ("repetition amount to use: %lu", repeat_amount);
39753990
for (size_t i = 0; i < repeat_amount; i++)
39763991
{
3992+
std::map<std::string, std::vector<MatchedFragment>> sub_map;
3993+
for (auto &kv_match : fragments)
3994+
{
3995+
std::vector<MatchedFragment> sub_vec;
3996+
sub_vec.emplace_back (kv_match.second[i]);
3997+
3998+
sub_map.insert ({kv_match.first, sub_vec});
3999+
}
4000+
4001+
auto new_tokens = substitute_tokens (input, new_macro, sub_map);
4002+
39774003
for (auto &new_token : new_tokens)
39784004
expanded.emplace_back (new_token->clone_token ());
39794005
}
@@ -3988,7 +4014,8 @@ std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t>
39884014
MacroExpander::substitute_token (
39894015
std::vector<std::unique_ptr<AST::Token>> &input,
39904016
std::vector<std::unique_ptr<AST::Token>> &macro,
3991-
std::map<std::string, MatchedFragment> &fragments, size_t token_idx)
4017+
std::map<std::string, std::vector<MatchedFragment>> &fragments,
4018+
size_t token_idx)
39924019
{
39934020
auto &token = macro.at (token_idx);
39944021
switch (token->get_id ())
@@ -4020,7 +4047,7 @@ MacroExpander::substitute_token (
40204047
return {
40214048
substitute_repetition (input, macro, fragments, pattern_start,
40224049
pattern_end),
4023-
// + 2 for the opening and closing parenthesis which are mandatory
4050+
// + 2 for the opening and closing parentheses which are mandatory
40244051
// + 1 for the repetitor (+, *, ?)
40254052
pattern_end - pattern_start + 3};
40264053
}
@@ -4044,7 +4071,7 @@ std::vector<std::unique_ptr<AST::Token>>
40444071
MacroExpander::substitute_tokens (
40454072
std::vector<std::unique_ptr<AST::Token>> &input,
40464073
std::vector<std::unique_ptr<AST::Token>> &macro,
4047-
std::map<std::string, MatchedFragment> &fragments)
4074+
std::map<std::string, std::vector<MatchedFragment>> &fragments)
40484075
{
40494076
std::vector<std::unique_ptr<AST::Token>> replaced_tokens;
40504077

gcc/rust/expand/rust-macro-expand.h

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -89,17 +89,38 @@ class SubstitutionScope
8989

9090
void push () { stack.push_back ({}); }
9191

92-
std::map<std::string, MatchedFragment> pop ()
92+
std::map<std::string, std::vector<MatchedFragment>> pop ()
9393
{
9494
auto top = stack.back ();
9595
stack.pop_back ();
9696
return top;
9797
}
9898

99-
std::map<std::string, MatchedFragment> &peek () { return stack.back (); }
99+
std::map<std::string, std::vector<MatchedFragment>> &peek ()
100+
{
101+
return stack.back ();
102+
}
103+
104+
void insert_fragment (MatchedFragment fragment)
105+
{
106+
auto &current_map = stack.back ();
107+
auto it = current_map.find (fragment.fragment_ident);
108+
109+
if (it == current_map.end ())
110+
{
111+
auto new_frags = std::vector<MatchedFragment> ();
112+
new_frags.emplace_back (fragment);
113+
current_map.insert ({fragment.fragment_ident, new_frags});
114+
}
115+
else
116+
{
117+
auto &frags = it->second;
118+
frags.emplace_back (fragment);
119+
}
120+
}
100121

101122
private:
102-
std::vector<std::map<std::string, MatchedFragment>> stack;
123+
std::vector<std::map<std::string, std::vector<MatchedFragment>>> stack;
103124
};
104125

105126
// Object used to store shared data (between functions) for macro expansion.
@@ -151,11 +172,10 @@ struct MacroExpander
151172
bool try_match_rule (AST::MacroRule &match_rule,
152173
AST::DelimTokenTree &invoc_token_tree);
153174

154-
AST::ASTFragment
155-
transcribe_rule (AST::MacroRule &match_rule,
156-
AST::DelimTokenTree &invoc_token_tree,
157-
std::map<std::string, MatchedFragment> &matched_fragments,
158-
bool semicolon, ContextType ctx);
175+
AST::ASTFragment transcribe_rule (
176+
AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree,
177+
std::map<std::string, std::vector<MatchedFragment>> &matched_fragments,
178+
bool semicolon, ContextType ctx);
159179

160180
bool match_fragment (Parser<MacroInvocLexer> &parser,
161181
AST::MacroMatchFragment &fragment);
@@ -204,10 +224,10 @@ struct MacroExpander
204224
* @return A token containing the associated fragment expanded into tokens if
205225
* any, or the cloned token if no fragment was associated
206226
*/
207-
static std::vector<std::unique_ptr<AST::Token>>
208-
substitute_metavar (std::vector<std::unique_ptr<AST::Token>> &input,
209-
std::map<std::string, MatchedFragment> &fragments,
210-
std::unique_ptr<AST::Token> &metavar);
227+
static std::vector<std::unique_ptr<AST::Token>> substitute_metavar (
228+
std::vector<std::unique_ptr<AST::Token>> &input,
229+
std::map<std::string, std::vector<MatchedFragment>> &fragments,
230+
std::unique_ptr<AST::Token> &metavar);
211231

212232
/**
213233
* Substitute a macro repetition by its given fragments
@@ -219,11 +239,11 @@ struct MacroExpander
219239
*
220240
* @return A vector containing the repeated pattern
221241
*/
222-
static std::vector<std::unique_ptr<AST::Token>>
223-
substitute_repetition (std::vector<std::unique_ptr<AST::Token>> &input,
224-
std::vector<std::unique_ptr<AST::Token>> &macro,
225-
std::map<std::string, MatchedFragment> &fragments,
226-
size_t pattern_start, size_t pattern_end);
242+
static std::vector<std::unique_ptr<AST::Token>> substitute_repetition (
243+
std::vector<std::unique_ptr<AST::Token>> &input,
244+
std::vector<std::unique_ptr<AST::Token>> &macro,
245+
std::map<std::string, std::vector<MatchedFragment>> &fragments,
246+
size_t pattern_start, size_t pattern_end);
227247

228248
/**
229249
* Substitute a given token by its appropriate representation
@@ -240,15 +260,16 @@ struct MacroExpander
240260
* ahead of the input to avoid mis-substitutions
241261
*/
242262
static std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t>
243-
substitute_token (std::vector<std::unique_ptr<AST::Token>> &input,
244-
std::vector<std::unique_ptr<AST::Token>> &macro,
245-
std::map<std::string, MatchedFragment> &fragments,
246-
size_t token_idx);
247-
248-
static std::vector<std::unique_ptr<AST::Token>>
249-
substitute_tokens (std::vector<std::unique_ptr<AST::Token>> &input,
250-
std::vector<std::unique_ptr<AST::Token>> &macro,
251-
std::map<std::string, MatchedFragment> &fragments);
263+
substitute_token (
264+
std::vector<std::unique_ptr<AST::Token>> &input,
265+
std::vector<std::unique_ptr<AST::Token>> &macro,
266+
std::map<std::string, std::vector<MatchedFragment>> &fragments,
267+
size_t token_idx);
268+
269+
static std::vector<std::unique_ptr<AST::Token>> substitute_tokens (
270+
std::vector<std::unique_ptr<AST::Token>> &input,
271+
std::vector<std::unique_ptr<AST::Token>> &macro,
272+
std::map<std::string, std::vector<MatchedFragment>> &fragments);
252273

253274
void push_context (ContextType t) { context.push_back (t); }
254275

0 commit comments

Comments
 (0)