@@ -306,8 +306,7 @@ Lexer::build_token ()
306306 Location loc = get_current_location ();
307307
308308 current_char = peek_input ();
309- current_char32 = peek_codepoint_input ();
310- skip_codepoint_input ();
309+ skip_input ();
311310
312311 // detect shebang
313312 // Must be the first thing on the first line, starting with #!
@@ -1089,7 +1088,7 @@ Lexer::build_token ()
10891088 }
10901089
10911090 // find identifiers and keywords.
1092- if (is_identifier_start (current_char32 .value ))
1091+ if (is_identifier_start (current_char .value ))
10931092 return parse_identifier_or_keyword (loc);
10941093
10951094 // int and float literals
@@ -1998,59 +1997,56 @@ Lexer::skip_broken_string_input (Codepoint current_char)
19981997 current_column);
19991998}
20001999
2001- // Parses a unicode string.
2000+ // Parses a string.
20022001TokenPtr
20032002Lexer::parse_string (Location loc)
20042003{
2005- Codepoint current_char32;
2006-
20072004 std::string str;
20082005 str.reserve (16 ); // some sensible default
20092006
20102007 int length = 1 ;
2011- current_char32 = peek_codepoint_input ();
2008+ current_char = peek_input ();
20122009
20132010 // FIXME: This fails if the input ends. How do we check for EOF?
2014- while (current_char32 .value != ' "' && !current_char32 .is_eof ())
2011+ while (current_char .value != ' "' && !current_char .is_eof ())
20152012 {
2016- if (current_char32 .value == ' \\ ' )
2013+ if (current_char .value == ' \\ ' )
20172014 {
20182015 // parse escape
20192016 auto utf8_escape_pair = parse_utf8_escape ();
2020- current_char32 = std::get<0 > (utf8_escape_pair);
2017+ current_char = std::get<0 > (utf8_escape_pair);
20212018
2022- if (current_char32 == Codepoint (0 ) && std::get<2 > (utf8_escape_pair))
2019+ if (current_char == Codepoint (0 ) && std::get<2 > (utf8_escape_pair))
20232020 length = std::get<1 > (utf8_escape_pair) - 1 ;
20242021 else
20252022 length += std::get<1 > (utf8_escape_pair);
20262023
2027- if (current_char32 != Codepoint (0 )
2028- || !std::get<2 > (utf8_escape_pair))
2029- str += current_char32;
2030-
2031- // required as parsing utf8 escape only changes current_char
2032- current_char32 = peek_codepoint_input ();
2024+ if (current_char != Codepoint (0 ) || !std::get<2 > (utf8_escape_pair))
2025+ str += current_char.as_string ();
20332026
2027+ // FIXME: should remove this but can't.
2028+ // `parse_utf8_escape` does not update `current_char` correctly.
2029+ current_char = peek_input ();
20342030 continue ;
20352031 }
20362032
2037- length += get_input_codepoint_length () ;
2033+ length++ ;
20382034
2039- str += current_char32 ;
2040- skip_codepoint_input ();
2041- current_char32 = peek_codepoint_input ();
2035+ str += current_char ;
2036+ skip_input ();
2037+ current_char = peek_input ();
20422038 }
20432039
20442040 current_column += length;
20452041
2046- if (current_char32 .value == ' "' )
2042+ if (current_char .value == ' "' )
20472043 {
20482044 current_column++;
20492045
20502046 skip_input ();
20512047 current_char = peek_input ();
20522048 }
2053- else if (current_char32 .is_eof ())
2049+ else if (current_char .is_eof ())
20542050 {
20552051 rust_error_at (get_current_location (), " unended string literal" );
20562052 return Token::make (END_OF_FILE, get_current_location ());
@@ -2072,22 +2068,22 @@ Lexer::parse_identifier_or_keyword (Location loc)
20722068{
20732069 std::string str;
20742070 str.reserve (16 ); // default
2075- str += current_char32 .as_string ();
2071+ str += current_char .as_string ();
20762072
20772073 bool first_is_underscore = current_char == ' _' ;
20782074
20792075 int length = 1 ;
2080- current_char32 = peek_codepoint_input ();
2076+ current_char = peek_input ();
20812077
20822078 // loop through entire name
2083- while (is_identifier_continue (current_char32 .value ))
2079+ while (is_identifier_continue (current_char .value ))
20842080 {
2085- auto s = current_char32 .as_string ();
2081+ auto s = current_char .as_string ();
20862082 length++;
20872083
2088- str += current_char32 .as_string ();
2089- skip_codepoint_input ();
2090- current_char32 = peek_codepoint_input ();
2084+ str += current_char .as_string ();
2085+ skip_input ();
2086+ current_char = peek_input ();
20912087 }
20922088
20932089 current_column += length;
@@ -2141,11 +2137,11 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count)
21412137
21422138 length++;
21432139 skip_input ();
2144- Codepoint current_char32 = peek_codepoint_input ();
2140+ current_char = peek_input ();
21452141
2146- while (!current_char32 .is_eof ())
2142+ while (!current_char .is_eof ())
21472143 {
2148- if (current_char32 .value == ' "' )
2144+ if (current_char .value == ' "' )
21492145 {
21502146 bool enough_hashes = true ;
21512147
@@ -2170,9 +2166,9 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count)
21702166
21712167 length++;
21722168
2173- str += current_char32 ;
2174- skip_codepoint_input ();
2175- current_char32 = peek_codepoint_input ();
2169+ str += current_char. as_string () ;
2170+ skip_input ();
2171+ current_char = peek_input ();
21762172 }
21772173
21782174 current_column += length;
@@ -2424,29 +2420,27 @@ Lexer::parse_decimal_int_or_float (Location loc)
24242420TokenPtr
24252421Lexer::parse_char_or_lifetime (Location loc)
24262422{
2427- Codepoint current_char32;
2428-
24292423 int length = 1 ;
24302424
2431- current_char32 = peek_codepoint_input ();
2432- if (current_char32 .is_eof ())
2425+ current_char = peek_input ();
2426+ if (current_char .is_eof ())
24332427 return nullptr ;
24342428
24352429 // parse escaped char literal
2436- if (current_char32 .value == ' \\ ' )
2430+ if (current_char .value == ' \\ ' )
24372431 {
24382432 // parse escape
24392433 auto utf8_escape_pair = parse_utf8_escape ();
2440- current_char32 = std::get<0 > (utf8_escape_pair);
2434+ Codepoint escaped_char = std::get<0 > (utf8_escape_pair);
24412435 length += std::get<1 > (utf8_escape_pair);
24422436
2443- if (peek_codepoint_input ().value != ' \' ' )
2437+ if (peek_input ().value != ' \' ' )
24442438 {
24452439 rust_error_at (get_current_location (), " unended character literal" );
24462440 }
24472441 else
24482442 {
2449- skip_codepoint_input ();
2443+ skip_input ();
24502444 current_char = peek_input ();
24512445 length++;
24522446 }
@@ -2455,15 +2449,16 @@ Lexer::parse_char_or_lifetime (Location loc)
24552449
24562450 loc += length - 1 ;
24572451
2458- return Token::make_char (loc, current_char32 );
2452+ return Token::make_char (loc, escaped_char );
24592453 }
24602454 else
24612455 {
2462- skip_codepoint_input ();
2456+ skip_input ();
24632457
2464- if (peek_codepoint_input ().value == ' \' ' )
2458+ if (peek_input ().value == ' \' ' )
24652459 {
24662460 // parse non-escaped char literal
2461+ Codepoint non_escaped_char = current_char;
24672462
24682463 // skip the ' character
24692464 skip_input ();
@@ -2474,21 +2469,21 @@ Lexer::parse_char_or_lifetime (Location loc)
24742469
24752470 loc += 2 ;
24762471
2477- return Token::make_char (loc, current_char32 );
2472+ return Token::make_char (loc, non_escaped_char );
24782473 }
2479- else if (is_identifier_start (current_char32 .value ))
2474+ else if (is_identifier_start (current_char .value ))
24802475 {
24812476 // parse lifetime name
24822477 std::string str;
2483- str += current_char32 ;
2478+ str += current_char. as_string () ;
24842479 length++;
24852480
2486- current_char32 = peek_codepoint_input ();
2487- while (is_identifier_continue (current_char32 .value ))
2481+ current_char = peek_input ();
2482+ while (is_identifier_continue (current_char .value ))
24882483 {
2489- str += current_char32 ;
2490- skip_codepoint_input ();
2491- current_char32 = peek_codepoint_input ();
2484+ str += current_char. as_string () ;
2485+ skip_input ();
2486+ current_char = peek_input ();
24922487 length++;
24932488 }
24942489
@@ -2512,29 +2507,6 @@ Lexer::parse_char_or_lifetime (Location loc)
25122507 }
25132508}
25142509
2515- // TODO remove this function
2516- // Returns the length of the codepoint at the current position.
2517- int
2518- Lexer::get_input_codepoint_length ()
2519- {
2520- return 1 ;
2521- }
2522-
2523- // TODO remove this function
2524- // Returns the codepoint at the current position.
2525- Codepoint
2526- Lexer::peek_codepoint_input ()
2527- {
2528- return peek_input ();
2529- }
2530-
2531- // TODO remove this function
2532- void
2533- Lexer::skip_codepoint_input ()
2534- {
2535- skip_input ();
2536- }
2537-
25382510void
25392511Lexer::split_current_token (TokenId new_left, TokenId new_right)
25402512{
0 commit comments