Skip to content

Commit 10de9cf

Browse files
Merge #956
956: Substitute repetitions r=CohenArthur a=CohenArthur Needs #955 This PR splits up the `substitute_tokens` function into multiple smaller functions. Still a draft until I can get repetitions working. Closes #960 Closes #961 Co-authored-by: Arthur Cohen <[email protected]>
2 parents e0f261f + 92a6256 commit 10de9cf

File tree

8 files changed

+432
-87
lines changed

8 files changed

+432
-87
lines changed

gcc/rust/ast/rust-ast.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ class Token : public TokenTree, public MacroMatch
204204
std::vector<std::unique_ptr<Token> > to_token_stream () const override;
205205

206206
TokenId get_id () const { return tok_ref->get_id (); }
207+
const std::string &get_str () const { return tok_ref->get_str (); }
207208

208209
Location get_locus () const { return tok_ref->get_locus (); }
209210

gcc/rust/expand/rust-macro-expand.cc

Lines changed: 233 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -3118,7 +3118,7 @@ MacroExpander::expand_decl_macro (Location invoc_locus,
31183118

31193119
// find matching arm
31203120
AST::MacroRule *matched_rule = nullptr;
3121-
std::map<std::string, MatchedFragment> matched_fragments;
3121+
std::map<std::string, std::vector<MatchedFragment>> matched_fragments;
31223122
for (auto &rule : rules_def.get_rules ())
31233123
{
31243124
sub_stack.push ();
@@ -3127,9 +3127,9 @@ MacroExpander::expand_decl_macro (Location invoc_locus,
31273127

31283128
if (did_match_rule)
31293129
{
3130-
for (auto &frag : matched_fragments)
3131-
rust_debug ("matched fragment: %s",
3132-
frag.second.as_string ().c_str ());
3130+
for (auto &kv : matched_fragments)
3131+
rust_debug ("[fragment]: %s (%ld)", kv.first.c_str (),
3132+
kv.second.size ());
31333133

31343134
matched_rule = &rule;
31353135
break;
@@ -3535,9 +3535,8 @@ MacroExpander::match_matcher (Parser<MacroInvocLexer> &parser,
35353535

35363536
// matched fragment get the offset in the token stream
35373537
size_t offs_end = source.get_offs ();
3538-
sub_stack.peek ().insert (
3539-
{fragment->get_ident (),
3540-
MatchedFragment (fragment->get_ident (), offs_begin, offs_end)});
3538+
sub_stack.insert_fragment (
3539+
MatchedFragment (fragment->get_ident (), offs_begin, offs_end));
35413540
}
35423541
break;
35433542

@@ -3611,7 +3610,6 @@ MacroExpander::match_n_matches (
36113610
match_amount = 0;
36123611

36133612
const MacroInvocLexer &source = parser.get_token_source ();
3614-
std::vector<std::string> fragment_identifiers;
36153613
while (true)
36163614
{
36173615
// If the current token is a closing macro delimiter, break away.
@@ -3633,12 +3631,9 @@ MacroExpander::match_n_matches (
36333631

36343632
// matched fragment get the offset in the token stream
36353633
size_t offs_end = source.get_offs ();
3636-
sub_stack.peek ().insert (
3637-
{fragment->get_ident (),
3638-
MatchedFragment (fragment->get_ident (), offs_begin,
3639-
offs_end)});
3640-
3641-
fragment_identifiers.emplace_back (fragment->get_ident ());
3634+
sub_stack.insert_fragment (
3635+
MatchedFragment (fragment->get_ident (), offs_begin,
3636+
offs_end));
36423637
}
36433638
break;
36443639

@@ -3677,21 +3672,10 @@ MacroExpander::match_n_matches (
36773672

36783673
// Check if the amount of matches we got is valid: Is it more than the lower
36793674
// bound and less than the higher bound?
3680-
auto result = hi_bound ? match_amount >= lo_bound && match_amount <= hi_bound
3681-
: match_amount >= lo_bound;
3682-
3683-
// We can now set the amount to each fragment we matched in the substack
3684-
auto &stack_map = sub_stack.peek ();
3685-
for (auto &fragment_id : fragment_identifiers)
3686-
{
3687-
auto it = stack_map.find (fragment_id);
3688-
3689-
rust_assert (it != stack_map.end ());
3675+
bool did_meet_lo_bound = match_amount >= lo_bound;
3676+
bool did_meet_hi_bound = hi_bound ? match_amount <= hi_bound : true;
36903677

3691-
it->second.set_match_amount (match_amount);
3692-
}
3693-
3694-
return result;
3678+
return did_meet_lo_bound && did_meet_hi_bound;
36953679
}
36963680

36973681
bool
@@ -3733,14 +3717,41 @@ MacroExpander::match_repetition (Parser<MacroInvocLexer> &parser,
37333717
rust_debug_loc (rep.get_match_locus (), "%s matched %lu times",
37343718
res ? "successfully" : "unsuccessfully", match_amount);
37353719

3720+
// We can now set the amount to each fragment we matched in the substack
3721+
auto &stack_map = sub_stack.peek ();
3722+
for (auto &match : rep.get_matches ())
3723+
{
3724+
if (match->get_macro_match_type ()
3725+
== AST::MacroMatch::MacroMatchType::Fragment)
3726+
{
3727+
auto fragment = static_cast<AST::MacroMatchFragment *> (match.get ());
3728+
auto it = stack_map.find (fragment->get_ident ());
3729+
3730+
// If we can't find the fragment, but the result was valid, then
3731+
// it's a zero-matched fragment and we can insert it
3732+
if (it == stack_map.end ())
3733+
{
3734+
sub_stack.insert_fragment (
3735+
MatchedFragment::zero (fragment->get_ident ()));
3736+
}
3737+
else
3738+
{
3739+
// We can just set the repetition amount on the first match
3740+
// FIXME: Make this more ergonomic and similar to what we fetch
3741+
// in `substitute_repetition`
3742+
it->second[0].set_match_amount (match_amount);
3743+
}
3744+
}
3745+
}
3746+
37363747
return res;
37373748
}
37383749

37393750
AST::ASTFragment
37403751
MacroExpander::transcribe_rule (
37413752
AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree,
3742-
std::map<std::string, MatchedFragment> &matched_fragments, bool semicolon,
3743-
ContextType ctx)
3753+
std::map<std::string, std::vector<MatchedFragment>> &matched_fragments,
3754+
bool semicolon, ContextType ctx)
37443755
{
37453756
// we can manipulate the token tree to substitute the dollar identifiers so
37463757
// that when we call parse its already substituted for us
@@ -3874,11 +3885,193 @@ MacroExpander::transcribe_rule (
38743885
return AST::ASTFragment (std::move (nodes));
38753886
}
38763887

3888+
std::vector<std::unique_ptr<AST::Token>>
3889+
MacroExpander::substitute_metavar (
3890+
std::vector<std::unique_ptr<AST::Token>> &input,
3891+
std::map<std::string, std::vector<MatchedFragment>> &fragments,
3892+
std::unique_ptr<AST::Token> &metavar)
3893+
{
3894+
auto metavar_name = metavar->get_str ();
3895+
3896+
std::vector<std::unique_ptr<AST::Token>> expanded;
3897+
auto it = fragments.find (metavar_name);
3898+
if (it == fragments.end ())
3899+
{
3900+
// Return a copy of the original token
3901+
expanded.push_back (metavar->clone_token ());
3902+
}
3903+
else
3904+
{
3905+
// Replace
3906+
// We only care about the vector when expanding repetitions. Just access
3907+
// the first element of the vector.
3908+
// FIXME: Clean this up so it makes more sense
3909+
auto &frag = it->second[0];
3910+
for (size_t offs = frag.token_offset_begin; offs < frag.token_offset_end;
3911+
offs++)
3912+
{
3913+
auto &tok = input.at (offs);
3914+
expanded.push_back (tok->clone_token ());
3915+
}
3916+
}
3917+
3918+
return expanded;
3919+
}
3920+
3921+
std::vector<std::unique_ptr<AST::Token>>
3922+
MacroExpander::substitute_repetition (
3923+
std::vector<std::unique_ptr<AST::Token>> &input,
3924+
std::vector<std::unique_ptr<AST::Token>> &macro,
3925+
std::map<std::string, std::vector<MatchedFragment>> &fragments,
3926+
size_t pattern_start, size_t pattern_end)
3927+
{
3928+
rust_assert (pattern_end < macro.size ());
3929+
3930+
rust_debug ("pattern start: %lu", pattern_start);
3931+
rust_debug ("pattern end: %lu", pattern_end);
3932+
3933+
std::vector<std::unique_ptr<AST::Token>> expanded;
3934+
3935+
// Find the first fragment and get the amount of repetitions that we should
3936+
// perform
3937+
size_t repeat_amount = 0;
3938+
for (size_t i = pattern_start; i < pattern_end; i++)
3939+
{
3940+
if (macro.at (i)->get_id () == DOLLAR_SIGN)
3941+
{
3942+
auto &frag_token = macro.at (i + 1);
3943+
if (frag_token->get_id () == IDENTIFIER)
3944+
{
3945+
auto it = fragments.find (frag_token->get_str ());
3946+
if (it == fragments.end ())
3947+
{
3948+
// If the repetition is not anything we know (ie no declared
3949+
// metavars, or metavars which aren't present in the
3950+
// fragment), we can just error out. No need to paste the
3951+
// tokens as if nothing had happened.
3952+
rust_error_at (frag_token->get_locus (),
3953+
"metavar %s used in repetition does not exist",
3954+
frag_token->get_str ().c_str ());
3955+
// FIXME:
3956+
return expanded;
3957+
}
3958+
3959+
// FIXME: Refactor, ugly
3960+
repeat_amount = it->second[0].match_amount;
3961+
}
3962+
}
3963+
}
3964+
3965+
rust_debug ("repetition amount to use: %lu", repeat_amount);
3966+
std::vector<std::unique_ptr<AST::Token>> new_macro;
3967+
3968+
// We want to generate a "new macro" to substitute with. This new macro
3969+
// should contain only the tokens inside the pattern
3970+
for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++)
3971+
new_macro.emplace_back (macro.at (tok_idx)->clone_token ());
3972+
3973+
// Then, we want to create a subset of the matches so that
3974+
// `substitute_tokens()` can only see one fragment per metavar. Let's say we
3975+
// have the following user input: (1 145 'h')
3976+
// on the following match arm: ($($lit:literal)*)
3977+
// which causes the following matches: { "lit": [1, 145, 'h'] }
3978+
//
3979+
// The pattern (new_macro) is `$lit:literal`
3980+
// The first time we expand it, we want $lit to have the following token: 1
3981+
// The second time, 145
3982+
// The third and final time, 'h'
3983+
//
3984+
// In order to do so we must create "sub maps", which only contain parts of
3985+
// the original matches
3986+
// sub-maps: [ { "lit": 1 }, { "lit": 145 }, { "lit": 'h' } ]
3987+
//
3988+
// and give them to `substitute_tokens` one by one.
3989+
3990+
for (size_t i = 0; i < repeat_amount; i++)
3991+
{
3992+
std::map<std::string, std::vector<MatchedFragment>> sub_map;
3993+
for (auto &kv_match : fragments)
3994+
{
3995+
std::vector<MatchedFragment> sub_vec;
3996+
sub_vec.emplace_back (kv_match.second[i]);
3997+
3998+
sub_map.insert ({kv_match.first, sub_vec});
3999+
}
4000+
4001+
auto new_tokens = substitute_tokens (input, new_macro, sub_map);
4002+
4003+
for (auto &new_token : new_tokens)
4004+
expanded.emplace_back (new_token->clone_token ());
4005+
}
4006+
4007+
// FIXME: We also need to make sure that all subsequent fragments
4008+
// contain the same amount of repetitions as the first one
4009+
4010+
return expanded;
4011+
}
4012+
4013+
std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t>
4014+
MacroExpander::substitute_token (
4015+
std::vector<std::unique_ptr<AST::Token>> &input,
4016+
std::vector<std::unique_ptr<AST::Token>> &macro,
4017+
std::map<std::string, std::vector<MatchedFragment>> &fragments,
4018+
size_t token_idx)
4019+
{
4020+
auto &token = macro.at (token_idx);
4021+
switch (token->get_id ())
4022+
{
4023+
case IDENTIFIER:
4024+
rust_debug ("expanding metavar: %s", token->get_str ().c_str ());
4025+
return {substitute_metavar (input, fragments, token), 1};
4026+
case LEFT_PAREN: {
4027+
// We need to parse up until the closing delimiter and expand this
4028+
// fragment->n times.
4029+
rust_debug ("expanding repetition");
4030+
std::vector<std::unique_ptr<AST::Token>> repetition_pattern;
4031+
size_t pattern_start = token_idx + 1;
4032+
size_t pattern_end = pattern_start;
4033+
for (; pattern_end < macro.size ()
4034+
&& macro.at (pattern_end)->get_id () != RIGHT_PAREN;
4035+
pattern_end++)
4036+
;
4037+
4038+
// FIXME: This skips whitespaces... Is that okay??
4039+
// FIXME: Is there any existing parsing function that allows us to parse
4040+
// a macro pattern?
4041+
4042+
// FIXME: Add error handling in the case we haven't found a matching
4043+
// closing delimiter
4044+
4045+
// FIXME: We need to parse the repetition token now
4046+
4047+
return {
4048+
substitute_repetition (input, macro, fragments, pattern_start,
4049+
pattern_end),
4050+
// + 2 for the opening and closing parentheses which are mandatory
4051+
// + 1 for the repetitor (+, *, ?)
4052+
pattern_end - pattern_start + 3};
4053+
}
4054+
// TODO: We need to check if the $ was alone. In that case, do
4055+
// not error out: Simply act as if there was an empty identifier
4056+
// with no associated fragment and paste the dollar sign in the
4057+
// transcription. Unsure how to do that since we always have at
4058+
// least the closing curly brace after an empty $...
4059+
default:
4060+
rust_error_at (token->get_locus (),
4061+
"unexpected token in macro transcribe: expected "
4062+
"%<(%> or identifier after %<$%>, got %<%s%>",
4063+
get_token_description (token->get_id ()));
4064+
}
4065+
4066+
// FIXME: gcc_unreachable() error case?
4067+
return {std::vector<std::unique_ptr<AST::Token>> (), 0};
4068+
}
4069+
38774070
std::vector<std::unique_ptr<AST::Token>>
38784071
MacroExpander::substitute_tokens (
38794072
std::vector<std::unique_ptr<AST::Token>> &input,
38804073
std::vector<std::unique_ptr<AST::Token>> &macro,
3881-
std::map<std::string, MatchedFragment> &fragments)
4074+
std::map<std::string, std::vector<MatchedFragment>> &fragments)
38824075
{
38834076
std::vector<std::unique_ptr<AST::Token>> replaced_tokens;
38844077

@@ -3887,54 +4080,20 @@ MacroExpander::substitute_tokens (
38874080
auto &tok = macro.at (i);
38884081
if (tok->get_id () == DOLLAR_SIGN)
38894082
{
3890-
std::vector<std::unique_ptr<AST::Token>> parsed_toks;
4083+
// Aaaaah, if only we had C++17 :)
4084+
// auto [expanded, tok_to_skip] = ...
4085+
auto p = substitute_token (input, macro, fragments, i + 1);
4086+
auto expanded = std::move (p.first);
4087+
auto tok_to_skip = p.second;
38914088

3892-
std::string ident;
3893-
for (size_t offs = i; i < macro.size (); offs++)
3894-
{
3895-
auto &tok = macro.at (offs);
3896-
if (tok->get_id () == DOLLAR_SIGN && offs == i)
3897-
{
3898-
parsed_toks.push_back (tok->clone_token ());
3899-
}
3900-
else if (tok->get_id () == IDENTIFIER)
3901-
{
3902-
rust_assert (tok->as_string ().size () == 1);
3903-
ident.push_back (tok->as_string ().at (0));
3904-
parsed_toks.push_back (tok->clone_token ());
3905-
}
3906-
else
3907-
{
3908-
break;
3909-
}
3910-
}
4089+
i += tok_to_skip;
39114090

3912-
// lookup the ident
3913-
auto it = fragments.find (ident);
3914-
if (it == fragments.end ())
3915-
{
3916-
// just leave the tokens in
3917-
for (auto &tok : parsed_toks)
3918-
{
3919-
replaced_tokens.push_back (tok->clone_token ());
3920-
}
3921-
}
3922-
else
3923-
{
3924-
// replace
3925-
MatchedFragment &frag = it->second;
3926-
for (size_t offs = frag.token_offset_begin;
3927-
offs < frag.token_offset_end; offs++)
3928-
{
3929-
auto &tok = input.at (offs);
3930-
replaced_tokens.push_back (tok->clone_token ());
3931-
}
3932-
}
3933-
i += parsed_toks.size () - 1;
4091+
for (auto &token : expanded)
4092+
replaced_tokens.emplace_back (token->clone_token ());
39344093
}
39354094
else
39364095
{
3937-
replaced_tokens.push_back (tok->clone_token ());
4096+
replaced_tokens.emplace_back (tok->clone_token ());
39384097
}
39394098
}
39404099

0 commit comments

Comments
 (0)