Skip to content

Commit 5fffd96

Browse files
committed
Feat: make error message for quoted elements less ambiguous
1 parent 0c17243 commit 5fffd96

File tree

1 file changed

+55
-16
lines changed

1 file changed

+55
-16
lines changed

vhdl_lang/src/syntax/tokens/tokenizer.rs

+55-16
Original file line numberDiff line numberDiff line change
@@ -1170,17 +1170,39 @@ fn parse_quoted(
11701170
buffer.bytes.push(quote)
11711171
}
11721172

1173-
while let Some(chr) = reader.pop()? {
1174-
is_multiline |= chr == b'\n';
1175-
if chr == quote {
1176-
if reader.peek()? == Some(quote) {
1177-
reader.skip();
1178-
} else {
1179-
found_end = true;
1180-
break;
1173+
// Closure that allows usage of the `?` operator
1174+
let mut quoted_inner = || {
1175+
while let Some(chr) = reader.pop()? {
1176+
is_multiline |= chr == b'\n';
1177+
if chr == quote {
1178+
if reader.peek()? == Some(quote) {
1179+
reader.skip();
1180+
} else {
1181+
found_end = true;
1182+
break;
1183+
}
1184+
}
1185+
buffer.bytes.push(chr);
1186+
}
1187+
Ok(())
1188+
};
1189+
1190+
match quoted_inner() {
1191+
Ok(_) => {}
1192+
// When we discover a token error, consume all remaining
1193+
// characters respecting quote rules.
1194+
Err(token_err) => {
1195+
while let Some(char) = reader.pop_char() {
1196+
if char == quote as char {
1197+
if reader.peek_char() == Some(quote as char) {
1198+
reader.skip();
1199+
} else {
1200+
break;
1201+
}
1202+
}
11811203
}
1204+
return Err(token_err);
11821205
}
1183-
buffer.bytes.push(chr);
11841206
}
11851207

11861208
if include_quote {
@@ -1493,13 +1515,7 @@ fn parse_bit_string(
14931515
bit_string_length: Option<u32>,
14941516
start: usize,
14951517
) -> Result<(Kind, Value), TokenError> {
1496-
let value = match parse_string(buffer, reader) {
1497-
Ok(value) => value,
1498-
Err(mut err) => {
1499-
err.message = "Invalid bit string literal".to_string();
1500-
return Err(err);
1501-
}
1502-
};
1518+
let value = parse_string(buffer, reader)?;
15031519

15041520
let end_pos = reader.state().pos();
15051521
let actual_value = reader
@@ -2612,6 +2628,29 @@ my_other_ident",
26122628
);
26132629
}
26142630

2631+
#[test]
2632+
fn non_ascii_in_bit_string() {
2633+
let code = Code::new("X\"Ä087€\"");
2634+
let (tokens, _) = code.tokenize_result();
2635+
assert_eq!(
2636+
tokens,
2637+
vec![Err(Diagnostic::syntax_error(
2638+
code.s1("€"),
2639+
"Found invalid latin-1 character '€'",
2640+
))]
2641+
);
2642+
2643+
let code = Code::new("X\"Ä087€\"\"A\"");
2644+
let (tokens, _) = code.tokenize_result();
2645+
assert_eq!(
2646+
tokens,
2647+
vec![Err(Diagnostic::syntax_error(
2648+
code.s1("€"),
2649+
"Found invalid latin-1 character '€'",
2650+
))]
2651+
);
2652+
}
2653+
26152654
#[test]
26162655
fn tokenize_based_integer() {
26172656
assert_eq!(

0 commit comments

Comments
 (0)