Skip to content

Commit 5d33b88

Browse files
tamaroningCohenArthur
authored andcommitted
gccrs: Remove unnecessary methods of Rust::Lexer
gcc/rust/ChangeLog: * lex/rust-lex.cc (Lexer::build_token): Replace function call. (Lexer::parse_string): Likewise. (Lexer::parse_identifier_or_keyword): Likewise. (Lexer::parse_raw_string): Likewise. (Lexer::parse_char_or_lifetime): Likewise. (Lexer::get_input_codepoint_length): Deleted. (Lexer::peek_codepoint_input): Deleted. (Lexer::skip_codepoint_input): Deleted. * lex/rust-lex.h: Remove some methods and fields Signed-off-by: Raiki Tamura <[email protected]>
1 parent b49a958 commit 5d33b88

File tree

2 files changed

+50
-83
lines changed

2 files changed

+50
-83
lines changed

gcc/rust/lex/rust-lex.cc

Lines changed: 50 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,7 @@ Lexer::build_token ()
306306
Location loc = get_current_location ();
307307

308308
current_char = peek_input ();
309-
current_char32 = peek_codepoint_input ();
310-
skip_codepoint_input ();
309+
skip_input ();
311310

312311
// detect shebang
313312
// Must be the first thing on the first line, starting with #!
@@ -1089,7 +1088,7 @@ Lexer::build_token ()
10891088
}
10901089

10911090
// find identifiers and keywords.
1092-
if (is_identifier_start (current_char32.value))
1091+
if (is_identifier_start (current_char.value))
10931092
return parse_identifier_or_keyword (loc);
10941093

10951094
// int and float literals
@@ -1998,59 +1997,56 @@ Lexer::skip_broken_string_input (Codepoint current_char)
19981997
current_column);
19991998
}
20001999

2001-
// Parses a unicode string.
2000+
// Parses a string.
20022001
TokenPtr
20032002
Lexer::parse_string (Location loc)
20042003
{
2005-
Codepoint current_char32;
2006-
20072004
std::string str;
20082005
str.reserve (16); // some sensible default
20092006

20102007
int length = 1;
2011-
current_char32 = peek_codepoint_input ();
2008+
current_char = peek_input ();
20122009

20132010
// FIXME: This fails if the input ends. How do we check for EOF?
2014-
while (current_char32.value != '"' && !current_char32.is_eof ())
2011+
while (current_char.value != '"' && !current_char.is_eof ())
20152012
{
2016-
if (current_char32.value == '\\')
2013+
if (current_char.value == '\\')
20172014
{
20182015
// parse escape
20192016
auto utf8_escape_pair = parse_utf8_escape ();
2020-
current_char32 = std::get<0> (utf8_escape_pair);
2017+
current_char = std::get<0> (utf8_escape_pair);
20212018

2022-
if (current_char32 == Codepoint (0) && std::get<2> (utf8_escape_pair))
2019+
if (current_char == Codepoint (0) && std::get<2> (utf8_escape_pair))
20232020
length = std::get<1> (utf8_escape_pair) - 1;
20242021
else
20252022
length += std::get<1> (utf8_escape_pair);
20262023

2027-
if (current_char32 != Codepoint (0)
2028-
|| !std::get<2> (utf8_escape_pair))
2029-
str += current_char32;
2030-
2031-
// required as parsing utf8 escape only changes current_char
2032-
current_char32 = peek_codepoint_input ();
2024+
if (current_char != Codepoint (0) || !std::get<2> (utf8_escape_pair))
2025+
str += current_char.as_string ();
20332026

2027+
// FIXME: should remove this but can't.
2028+
// `parse_utf8_escape` does not update `current_char` correctly.
2029+
current_char = peek_input ();
20342030
continue;
20352031
}
20362032

2037-
length += get_input_codepoint_length ();
2033+
length++;
20382034

2039-
str += current_char32;
2040-
skip_codepoint_input ();
2041-
current_char32 = peek_codepoint_input ();
2035+
str += current_char;
2036+
skip_input ();
2037+
current_char = peek_input ();
20422038
}
20432039

20442040
current_column += length;
20452041

2046-
if (current_char32.value == '"')
2042+
if (current_char.value == '"')
20472043
{
20482044
current_column++;
20492045

20502046
skip_input ();
20512047
current_char = peek_input ();
20522048
}
2053-
else if (current_char32.is_eof ())
2049+
else if (current_char.is_eof ())
20542050
{
20552051
rust_error_at (get_current_location (), "unended string literal");
20562052
return Token::make (END_OF_FILE, get_current_location ());
@@ -2072,22 +2068,22 @@ Lexer::parse_identifier_or_keyword (Location loc)
20722068
{
20732069
std::string str;
20742070
str.reserve (16); // default
2075-
str += current_char32.as_string ();
2071+
str += current_char.as_string ();
20762072

20772073
bool first_is_underscore = current_char == '_';
20782074

20792075
int length = 1;
2080-
current_char32 = peek_codepoint_input ();
2076+
current_char = peek_input ();
20812077

20822078
// loop through entire name
2083-
while (is_identifier_continue (current_char32.value))
2079+
while (is_identifier_continue (current_char.value))
20842080
{
2085-
auto s = current_char32.as_string ();
2081+
auto s = current_char.as_string ();
20862082
length++;
20872083

2088-
str += current_char32.as_string ();
2089-
skip_codepoint_input ();
2090-
current_char32 = peek_codepoint_input ();
2084+
str += current_char.as_string ();
2085+
skip_input ();
2086+
current_char = peek_input ();
20912087
}
20922088

20932089
current_column += length;
@@ -2141,11 +2137,11 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count)
21412137

21422138
length++;
21432139
skip_input ();
2144-
Codepoint current_char32 = peek_codepoint_input ();
2140+
current_char = peek_input ();
21452141

2146-
while (!current_char32.is_eof ())
2142+
while (!current_char.is_eof ())
21472143
{
2148-
if (current_char32.value == '"')
2144+
if (current_char.value == '"')
21492145
{
21502146
bool enough_hashes = true;
21512147

@@ -2170,9 +2166,9 @@ Lexer::parse_raw_string (Location loc, int initial_hash_count)
21702166

21712167
length++;
21722168

2173-
str += current_char32;
2174-
skip_codepoint_input ();
2175-
current_char32 = peek_codepoint_input ();
2169+
str += current_char.as_string ();
2170+
skip_input ();
2171+
current_char = peek_input ();
21762172
}
21772173

21782174
current_column += length;
@@ -2424,29 +2420,27 @@ Lexer::parse_decimal_int_or_float (Location loc)
24242420
TokenPtr
24252421
Lexer::parse_char_or_lifetime (Location loc)
24262422
{
2427-
Codepoint current_char32;
2428-
24292423
int length = 1;
24302424

2431-
current_char32 = peek_codepoint_input ();
2432-
if (current_char32.is_eof ())
2425+
current_char = peek_input ();
2426+
if (current_char.is_eof ())
24332427
return nullptr;
24342428

24352429
// parse escaped char literal
2436-
if (current_char32.value == '\\')
2430+
if (current_char.value == '\\')
24372431
{
24382432
// parse escape
24392433
auto utf8_escape_pair = parse_utf8_escape ();
2440-
current_char32 = std::get<0> (utf8_escape_pair);
2434+
Codepoint escaped_char = std::get<0> (utf8_escape_pair);
24412435
length += std::get<1> (utf8_escape_pair);
24422436

2443-
if (peek_codepoint_input ().value != '\'')
2437+
if (peek_input ().value != '\'')
24442438
{
24452439
rust_error_at (get_current_location (), "unended character literal");
24462440
}
24472441
else
24482442
{
2449-
skip_codepoint_input ();
2443+
skip_input ();
24502444
current_char = peek_input ();
24512445
length++;
24522446
}
@@ -2455,15 +2449,16 @@ Lexer::parse_char_or_lifetime (Location loc)
24552449

24562450
loc += length - 1;
24572451

2458-
return Token::make_char (loc, current_char32);
2452+
return Token::make_char (loc, escaped_char);
24592453
}
24602454
else
24612455
{
2462-
skip_codepoint_input ();
2456+
skip_input ();
24632457

2464-
if (peek_codepoint_input ().value == '\'')
2458+
if (peek_input ().value == '\'')
24652459
{
24662460
// parse non-escaped char literal
2461+
Codepoint non_escaped_char = current_char;
24672462

24682463
// skip the ' character
24692464
skip_input ();
@@ -2474,21 +2469,21 @@ Lexer::parse_char_or_lifetime (Location loc)
24742469

24752470
loc += 2;
24762471

2477-
return Token::make_char (loc, current_char32);
2472+
return Token::make_char (loc, non_escaped_char);
24782473
}
2479-
else if (is_identifier_start (current_char32.value))
2474+
else if (is_identifier_start (current_char.value))
24802475
{
24812476
// parse lifetime name
24822477
std::string str;
2483-
str += current_char32;
2478+
str += current_char.as_string ();
24842479
length++;
24852480

2486-
current_char32 = peek_codepoint_input ();
2487-
while (is_identifier_continue (current_char32.value))
2481+
current_char = peek_input ();
2482+
while (is_identifier_continue (current_char.value))
24882483
{
2489-
str += current_char32;
2490-
skip_codepoint_input ();
2491-
current_char32 = peek_codepoint_input ();
2484+
str += current_char.as_string ();
2485+
skip_input ();
2486+
current_char = peek_input ();
24922487
length++;
24932488
}
24942489

@@ -2512,29 +2507,6 @@ Lexer::parse_char_or_lifetime (Location loc)
25122507
}
25132508
}
25142509

2515-
// TODO remove this function
2516-
// Returns the length of the codepoint at the current position.
2517-
int
2518-
Lexer::get_input_codepoint_length ()
2519-
{
2520-
return 1;
2521-
}
2522-
2523-
// TODO remove this function
2524-
// Returns the codepoint at the current position.
2525-
Codepoint
2526-
Lexer::peek_codepoint_input ()
2527-
{
2528-
return peek_input ();
2529-
}
2530-
2531-
// TODO remove this function
2532-
void
2533-
Lexer::skip_codepoint_input ()
2534-
{
2535-
skip_input ();
2536-
}
2537-
25382510
void
25392511
Lexer::split_current_token (TokenId new_left, TokenId new_right)
25402512
{

gcc/rust/lex/rust-lex.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,6 @@ class Lexer
136136
std::pair<long, int> parse_partial_hex_escape ();
137137
std::pair<Codepoint, int> parse_partial_unicode_escape ();
138138

139-
int get_input_codepoint_length ();
140-
// Peeks the current utf-8 char
141-
Codepoint peek_codepoint_input ();
142-
void skip_codepoint_input ();
143139
void skip_broken_string_input (Codepoint current_char);
144140

145141
TokenPtr parse_byte_char (Location loc);
@@ -393,7 +389,6 @@ class Lexer
393389
int current_column;
394390
// Current character.
395391
Codepoint current_char;
396-
Codepoint current_char32;
397392
// Line map.
398393
Linemap *line_map;
399394

0 commit comments

Comments
 (0)