@@ -3118,7 +3118,7 @@ MacroExpander::expand_decl_macro (Location invoc_locus,
3118
3118
3119
3119
// find matching arm
3120
3120
AST::MacroRule *matched_rule = nullptr ;
3121
- std::map<std::string, MatchedFragment> matched_fragments;
3121
+ std::map<std::string, std::vector< MatchedFragment> > matched_fragments;
3122
3122
for (auto &rule : rules_def.get_rules ())
3123
3123
{
3124
3124
sub_stack.push ();
@@ -3127,9 +3127,9 @@ MacroExpander::expand_decl_macro (Location invoc_locus,
3127
3127
3128
3128
if (did_match_rule)
3129
3129
{
3130
- for (auto &frag : matched_fragments)
3131
- rust_debug (" matched fragment: %s" ,
3132
- frag .second .as_string (). c_str ());
3130
+ for (auto &kv : matched_fragments)
3131
+ rust_debug (" [ fragment] : %s (%ld) " , kv. first . c_str () ,
3132
+ kv .second .size ());
3133
3133
3134
3134
matched_rule = &rule;
3135
3135
break ;
@@ -3535,9 +3535,8 @@ MacroExpander::match_matcher (Parser<MacroInvocLexer> &parser,
3535
3535
3536
3536
// matched fragment get the offset in the token stream
3537
3537
size_t offs_end = source.get_offs ();
3538
- sub_stack.peek ().insert (
3539
- {fragment->get_ident (),
3540
- MatchedFragment (fragment->get_ident (), offs_begin, offs_end)});
3538
+ sub_stack.insert_fragment (
3539
+ MatchedFragment (fragment->get_ident (), offs_begin, offs_end));
3541
3540
}
3542
3541
break ;
3543
3542
@@ -3611,7 +3610,6 @@ MacroExpander::match_n_matches (
3611
3610
match_amount = 0 ;
3612
3611
3613
3612
const MacroInvocLexer &source = parser.get_token_source ();
3614
- std::vector<std::string> fragment_identifiers;
3615
3613
while (true )
3616
3614
{
3617
3615
// If the current token is a closing macro delimiter, break away.
@@ -3633,12 +3631,9 @@ MacroExpander::match_n_matches (
3633
3631
3634
3632
// matched fragment get the offset in the token stream
3635
3633
size_t offs_end = source.get_offs ();
3636
- sub_stack.peek ().insert (
3637
- {fragment->get_ident (),
3638
- MatchedFragment (fragment->get_ident (), offs_begin,
3639
- offs_end)});
3640
-
3641
- fragment_identifiers.emplace_back (fragment->get_ident ());
3634
+ sub_stack.insert_fragment (
3635
+ MatchedFragment (fragment->get_ident (), offs_begin,
3636
+ offs_end));
3642
3637
}
3643
3638
break ;
3644
3639
@@ -3677,21 +3672,10 @@ MacroExpander::match_n_matches (
3677
3672
3678
3673
// Check if the amount of matches we got is valid: Is it more than the lower
3679
3674
// bound and less than the higher bound?
3680
- auto result = hi_bound ? match_amount >= lo_bound && match_amount <= hi_bound
3681
- : match_amount >= lo_bound;
3682
-
3683
- // We can now set the amount to each fragment we matched in the substack
3684
- auto &stack_map = sub_stack.peek ();
3685
- for (auto &fragment_id : fragment_identifiers)
3686
- {
3687
- auto it = stack_map.find (fragment_id);
3688
-
3689
- rust_assert (it != stack_map.end ());
3675
+ bool did_meet_lo_bound = match_amount >= lo_bound;
3676
+ bool did_meet_hi_bound = hi_bound ? match_amount <= hi_bound : true ;
3690
3677
3691
- it->second .set_match_amount (match_amount);
3692
- }
3693
-
3694
- return result;
3678
+ return did_meet_lo_bound && did_meet_hi_bound;
3695
3679
}
3696
3680
3697
3681
bool
@@ -3733,14 +3717,41 @@ MacroExpander::match_repetition (Parser<MacroInvocLexer> &parser,
3733
3717
rust_debug_loc (rep.get_match_locus (), " %s matched %lu times" ,
3734
3718
res ? " successfully" : " unsuccessfully" , match_amount);
3735
3719
3720
+ // We can now set the amount to each fragment we matched in the substack
3721
+ auto &stack_map = sub_stack.peek ();
3722
+ for (auto &match : rep.get_matches ())
3723
+ {
3724
+ if (match->get_macro_match_type ()
3725
+ == AST::MacroMatch::MacroMatchType::Fragment)
3726
+ {
3727
+ auto fragment = static_cast <AST::MacroMatchFragment *> (match.get ());
3728
+ auto it = stack_map.find (fragment->get_ident ());
3729
+
3730
+ // If we can't find the fragment, but the result was valid, then
3731
+ // it's a zero-matched fragment and we can insert it
3732
+ if (it == stack_map.end ())
3733
+ {
3734
+ sub_stack.insert_fragment (
3735
+ MatchedFragment::zero (fragment->get_ident ()));
3736
+ }
3737
+ else
3738
+ {
3739
+ // We can just set the repetition amount on the first match
3740
+ // FIXME: Make this more ergonomic and similar to what we fetch
3741
+ // in `substitute_repetition`
3742
+ it->second [0 ].set_match_amount (match_amount);
3743
+ }
3744
+ }
3745
+ }
3746
+
3736
3747
return res;
3737
3748
}
3738
3749
3739
3750
AST::ASTFragment
3740
3751
MacroExpander::transcribe_rule (
3741
3752
AST::MacroRule &match_rule, AST::DelimTokenTree &invoc_token_tree,
3742
- std::map<std::string, MatchedFragment> &matched_fragments, bool semicolon ,
3743
- ContextType ctx)
3753
+ std::map<std::string, std::vector< MatchedFragment>> &matched_fragments,
3754
+ bool semicolon, ContextType ctx)
3744
3755
{
3745
3756
// we can manipulate the token tree to substitute the dollar identifiers so
3746
3757
// that when we call parse its already substituted for us
@@ -3874,11 +3885,193 @@ MacroExpander::transcribe_rule (
3874
3885
return AST::ASTFragment (std::move (nodes));
3875
3886
}
3876
3887
3888
+ std::vector<std::unique_ptr<AST::Token>>
3889
+ MacroExpander::substitute_metavar (
3890
+ std::vector<std::unique_ptr<AST::Token>> &input,
3891
+ std::map<std::string, std::vector<MatchedFragment>> &fragments,
3892
+ std::unique_ptr<AST::Token> &metavar)
3893
+ {
3894
+ auto metavar_name = metavar->get_str ();
3895
+
3896
+ std::vector<std::unique_ptr<AST::Token>> expanded;
3897
+ auto it = fragments.find (metavar_name);
3898
+ if (it == fragments.end ())
3899
+ {
3900
+ // Return a copy of the original token
3901
+ expanded.push_back (metavar->clone_token ());
3902
+ }
3903
+ else
3904
+ {
3905
+ // Replace
3906
+ // We only care about the vector when expanding repetitions. Just access
3907
+ // the first element of the vector.
3908
+ // FIXME: Clean this up so it makes more sense
3909
+ auto &frag = it->second [0 ];
3910
+ for (size_t offs = frag.token_offset_begin ; offs < frag.token_offset_end ;
3911
+ offs++)
3912
+ {
3913
+ auto &tok = input.at (offs);
3914
+ expanded.push_back (tok->clone_token ());
3915
+ }
3916
+ }
3917
+
3918
+ return expanded;
3919
+ }
3920
+
3921
+ std::vector<std::unique_ptr<AST::Token>>
3922
+ MacroExpander::substitute_repetition (
3923
+ std::vector<std::unique_ptr<AST::Token>> &input,
3924
+ std::vector<std::unique_ptr<AST::Token>> ¯o,
3925
+ std::map<std::string, std::vector<MatchedFragment>> &fragments,
3926
+ size_t pattern_start, size_t pattern_end)
3927
+ {
3928
+ rust_assert (pattern_end < macro.size ());
3929
+
3930
+ rust_debug (" pattern start: %lu" , pattern_start);
3931
+ rust_debug (" pattern end: %lu" , pattern_end);
3932
+
3933
+ std::vector<std::unique_ptr<AST::Token>> expanded;
3934
+
3935
+ // Find the first fragment and get the amount of repetitions that we should
3936
+ // perform
3937
+ size_t repeat_amount = 0 ;
3938
+ for (size_t i = pattern_start; i < pattern_end; i++)
3939
+ {
3940
+ if (macro.at (i)->get_id () == DOLLAR_SIGN)
3941
+ {
3942
+ auto &frag_token = macro.at (i + 1 );
3943
+ if (frag_token->get_id () == IDENTIFIER)
3944
+ {
3945
+ auto it = fragments.find (frag_token->get_str ());
3946
+ if (it == fragments.end ())
3947
+ {
3948
+ // If the repetition is not anything we know (ie no declared
3949
+ // metavars, or metavars which aren't present in the
3950
+ // fragment), we can just error out. No need to paste the
3951
+ // tokens as if nothing had happened.
3952
+ rust_error_at (frag_token->get_locus (),
3953
+ " metavar %s used in repetition does not exist" ,
3954
+ frag_token->get_str ().c_str ());
3955
+ // FIXME:
3956
+ return expanded;
3957
+ }
3958
+
3959
+ // FIXME: Refactor, ugly
3960
+ repeat_amount = it->second [0 ].match_amount ;
3961
+ }
3962
+ }
3963
+ }
3964
+
3965
+ rust_debug (" repetition amount to use: %lu" , repeat_amount);
3966
+ std::vector<std::unique_ptr<AST::Token>> new_macro;
3967
+
3968
+ // We want to generate a "new macro" to substitute with. This new macro
3969
+ // should contain only the tokens inside the pattern
3970
+ for (size_t tok_idx = pattern_start; tok_idx < pattern_end; tok_idx++)
3971
+ new_macro.emplace_back (macro.at (tok_idx)->clone_token ());
3972
+
3973
+ // Then, we want to create a subset of the matches so that
3974
+ // `substitute_tokens()` can only see one fragment per metavar. Let's say we
3975
+ // have the following user input: (1 145 'h')
3976
+ // on the following match arm: ($($lit:literal)*)
3977
+ // which causes the following matches: { "lit": [1, 145, 'h'] }
3978
+ //
3979
+ // The pattern (new_macro) is `$lit:literal`
3980
+ // The first time we expand it, we want $lit to have the following token: 1
3981
+ // The second time, 145
3982
+ // The third and final time, 'h'
3983
+ //
3984
+ // In order to do so we must create "sub maps", which only contain parts of
3985
+ // the original matches
3986
+ // sub-maps: [ { "lit": 1 }, { "lit": 145 }, { "lit": 'h' } ]
3987
+ //
3988
+ // and give them to `substitute_tokens` one by one.
3989
+
3990
+ for (size_t i = 0 ; i < repeat_amount; i++)
3991
+ {
3992
+ std::map<std::string, std::vector<MatchedFragment>> sub_map;
3993
+ for (auto &kv_match : fragments)
3994
+ {
3995
+ std::vector<MatchedFragment> sub_vec;
3996
+ sub_vec.emplace_back (kv_match.second [i]);
3997
+
3998
+ sub_map.insert ({kv_match.first , sub_vec});
3999
+ }
4000
+
4001
+ auto new_tokens = substitute_tokens (input, new_macro, sub_map);
4002
+
4003
+ for (auto &new_token : new_tokens)
4004
+ expanded.emplace_back (new_token->clone_token ());
4005
+ }
4006
+
4007
+ // FIXME: We also need to make sure that all subsequent fragments
4008
+ // contain the same amount of repetitions as the first one
4009
+
4010
+ return expanded;
4011
+ }
4012
+
4013
+ std::pair<std::vector<std::unique_ptr<AST::Token>>, size_t >
4014
+ MacroExpander::substitute_token (
4015
+ std::vector<std::unique_ptr<AST::Token>> &input,
4016
+ std::vector<std::unique_ptr<AST::Token>> ¯o,
4017
+ std::map<std::string, std::vector<MatchedFragment>> &fragments,
4018
+ size_t token_idx)
4019
+ {
4020
+ auto &token = macro.at (token_idx);
4021
+ switch (token->get_id ())
4022
+ {
4023
+ case IDENTIFIER:
4024
+ rust_debug (" expanding metavar: %s" , token->get_str ().c_str ());
4025
+ return {substitute_metavar (input, fragments, token), 1 };
4026
+ case LEFT_PAREN: {
4027
+ // We need to parse up until the closing delimiter and expand this
4028
+ // fragment->n times.
4029
+ rust_debug (" expanding repetition" );
4030
+ std::vector<std::unique_ptr<AST::Token>> repetition_pattern;
4031
+ size_t pattern_start = token_idx + 1 ;
4032
+ size_t pattern_end = pattern_start;
4033
+ for (; pattern_end < macro.size ()
4034
+ && macro.at (pattern_end)->get_id () != RIGHT_PAREN;
4035
+ pattern_end++)
4036
+ ;
4037
+
4038
+ // FIXME: This skips whitespaces... Is that okay??
4039
+ // FIXME: Is there any existing parsing function that allows us to parse
4040
+ // a macro pattern?
4041
+
4042
+ // FIXME: Add error handling in the case we haven't found a matching
4043
+ // closing delimiter
4044
+
4045
+ // FIXME: We need to parse the repetition token now
4046
+
4047
+ return {
4048
+ substitute_repetition (input, macro, fragments, pattern_start,
4049
+ pattern_end),
4050
+ // + 2 for the opening and closing parentheses which are mandatory
4051
+ // + 1 for the repetitor (+, *, ?)
4052
+ pattern_end - pattern_start + 3 };
4053
+ }
4054
+ // TODO: We need to check if the $ was alone. In that case, do
4055
+ // not error out: Simply act as if there was an empty identifier
4056
+ // with no associated fragment and paste the dollar sign in the
4057
+ // transcription. Unsure how to do that since we always have at
4058
+ // least the closing curly brace after an empty $...
4059
+ default :
4060
+ rust_error_at (token->get_locus (),
4061
+ " unexpected token in macro transcribe: expected "
4062
+ " %<(%> or identifier after %<$%>, got %<%s%>" ,
4063
+ get_token_description (token->get_id ()));
4064
+ }
4065
+
4066
+ // FIXME: gcc_unreachable() error case?
4067
+ return {std::vector<std::unique_ptr<AST::Token>> (), 0 };
4068
+ }
4069
+
3877
4070
std::vector<std::unique_ptr<AST::Token>>
3878
4071
MacroExpander::substitute_tokens (
3879
4072
std::vector<std::unique_ptr<AST::Token>> &input,
3880
4073
std::vector<std::unique_ptr<AST::Token>> ¯o,
3881
- std::map<std::string, MatchedFragment> &fragments)
4074
+ std::map<std::string, std::vector< MatchedFragment> > &fragments)
3882
4075
{
3883
4076
std::vector<std::unique_ptr<AST::Token>> replaced_tokens;
3884
4077
@@ -3887,54 +4080,20 @@ MacroExpander::substitute_tokens (
3887
4080
auto &tok = macro.at (i);
3888
4081
if (tok->get_id () == DOLLAR_SIGN)
3889
4082
{
3890
- std::vector<std::unique_ptr<AST::Token>> parsed_toks;
4083
+ // Aaaaah, if only we had C++17 :)
4084
+ // auto [expanded, tok_to_skip] = ...
4085
+ auto p = substitute_token (input, macro, fragments, i + 1 );
4086
+ auto expanded = std::move (p.first );
4087
+ auto tok_to_skip = p.second ;
3891
4088
3892
- std::string ident;
3893
- for (size_t offs = i; i < macro.size (); offs++)
3894
- {
3895
- auto &tok = macro.at (offs);
3896
- if (tok->get_id () == DOLLAR_SIGN && offs == i)
3897
- {
3898
- parsed_toks.push_back (tok->clone_token ());
3899
- }
3900
- else if (tok->get_id () == IDENTIFIER)
3901
- {
3902
- rust_assert (tok->as_string ().size () == 1 );
3903
- ident.push_back (tok->as_string ().at (0 ));
3904
- parsed_toks.push_back (tok->clone_token ());
3905
- }
3906
- else
3907
- {
3908
- break ;
3909
- }
3910
- }
4089
+ i += tok_to_skip;
3911
4090
3912
- // lookup the ident
3913
- auto it = fragments.find (ident);
3914
- if (it == fragments.end ())
3915
- {
3916
- // just leave the tokens in
3917
- for (auto &tok : parsed_toks)
3918
- {
3919
- replaced_tokens.push_back (tok->clone_token ());
3920
- }
3921
- }
3922
- else
3923
- {
3924
- // replace
3925
- MatchedFragment &frag = it->second ;
3926
- for (size_t offs = frag.token_offset_begin ;
3927
- offs < frag.token_offset_end ; offs++)
3928
- {
3929
- auto &tok = input.at (offs);
3930
- replaced_tokens.push_back (tok->clone_token ());
3931
- }
3932
- }
3933
- i += parsed_toks.size () - 1 ;
4091
+ for (auto &token : expanded)
4092
+ replaced_tokens.emplace_back (token->clone_token ());
3934
4093
}
3935
4094
else
3936
4095
{
3937
- replaced_tokens.push_back (tok->clone_token ());
4096
+ replaced_tokens.emplace_back (tok->clone_token ());
3938
4097
}
3939
4098
}
3940
4099
0 commit comments