Skip to content

Commit fbf83cf

Browse files
committed
Upgrade chumsky to 0.10.1
Chumsky 0.10 now tracks position using byte offsets instead of char offsets. This implictly fixes a bug in `split_token`, where we were accidentally indexing a string at an invalid offset, pointing between Unicode characters. However, there are still more Unicode related issues. E.g., `lsp_pos_to_offset` and `offset_to_lsp_pos` are still treating positions inconsistently.
1 parent 430db80 commit fbf83cf

File tree

6 files changed

+183
-109
lines changed

6 files changed

+183
-109
lines changed

Cargo.lock

Lines changed: 55 additions & 47 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ regex = "1.11.1"
1010
tokio = { version = "1.43.0", features = ["full"] }
1111
tower-lsp = { version = "0.20.0", features = ["proposed"] }
1212
serde = { version = "1.0", features = ["derive"] }
13-
chumsky = "0.9.3"
13+
chumsky = "0.10.1"
1414
dashmap = "6.1.0"
1515
ropey = "1.6.1"
1616
prost = "0.13.4"

src/diagnostic.rs

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
use std::path::Path;
1+
use std::fmt::Write as _;
2+
use std::{ops::Deref, path::Path};
23

3-
use chumsky::error::Simple;
4+
use chumsky::error::Rich;
45
use regex::Regex;
56
use ropey::Rope;
67
use tower_lsp::lsp_types::{Diagnostic, DiagnosticSeverity, DiagnosticTag};
78

9+
use crate::tokenizer::Span;
810
use crate::{
911
bazel_flags::{combine_key_value_flags, BazelFlags, FlagLookupType},
1012
file_utils::resolve_bazelrc_path,
@@ -14,40 +16,45 @@ use crate::{
1416

1517
pub fn diagnostics_from_parser<'a>(
1618
rope: &'a Rope,
17-
errors: &'a [Simple<char>],
19+
errors: &'a [Rich<'a, char>],
1820
) -> impl Iterator<Item = Diagnostic> + 'a {
1921
errors.iter().filter_map(move |item| {
20-
let (message, span) = match item.reason() {
21-
chumsky::error::SimpleReason::Unclosed { span, delimiter } => {
22-
(format!("Unclosed delimiter {}", delimiter), span.clone())
23-
}
24-
chumsky::error::SimpleReason::Unexpected => (
25-
format!(
26-
"{}, expected {}",
27-
if item.found().is_some() {
28-
"Unexpected token in input"
29-
} else {
30-
"Unexpected end of input"
31-
},
32-
if item.expected().len() == 0 {
33-
"something else".to_string()
34-
} else {
35-
item.expected()
36-
.map(|expected| match expected {
37-
Some(expected) => expected.to_string(),
38-
None => "end of input".to_string(),
39-
})
40-
.collect::<Vec<_>>()
41-
.join(", ")
22+
let (message, err_span) = match item.reason() {
23+
chumsky::error::RichReason::ExpectedFound { expected, found } => {
24+
let mut s = String::new();
25+
if let Some(found) = found {
26+
write!(s, "Found {}", found.deref()).unwrap();
27+
} else {
28+
write!(&mut s, "Unexpected end of input").unwrap();
29+
}
30+
write!(&mut s, ", expected ").unwrap();
31+
match &expected[..] {
32+
[] => {
33+
write!(s, "something else").unwrap();
4234
}
43-
),
44-
item.span(),
45-
),
46-
chumsky::error::SimpleReason::Custom(msg) => (msg.to_string(), item.span()),
35+
[expected] => {
36+
write!(s, "{}", expected).unwrap();
37+
}
38+
_ => {
39+
for expected in &expected[..expected.len() - 1] {
40+
write!(s, "{}", expected).unwrap();
41+
write!(s, ", ").unwrap();
42+
}
43+
write!(s, "or ").unwrap();
44+
write!(s, "{}", expected.last().unwrap()).unwrap();
45+
}
46+
}
47+
(s, item.span())
48+
}
49+
chumsky::error::RichReason::Custom(msg) => (msg.to_string(), item.span()),
4750
};
4851

52+
let span = &Span {
53+
start: err_span.start,
54+
end: err_span.end,
55+
};
4956
|| -> Option<Diagnostic> {
50-
Some(Diagnostic::new_simple(range_to_lsp(rope, &span)?, message))
57+
Some(Diagnostic::new_simple(range_to_lsp(rope, span)?, message))
5158
}()
5259
})
5360
}

src/language_server.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ pub struct AnalyzedDocument {
2828
rope: Rope,
2929
semantic_tokens: Vec<RCSemanticToken>,
3030
indexed_lines: IndexedLines,
31-
parser_errors: Vec<chumsky::prelude::Simple<char>>,
31+
has_parser_errors: bool,
3232
}
3333

3434
#[derive(Deserialize, Serialize, Debug)]
@@ -78,9 +78,9 @@ impl Backend {
7878
params.uri.to_string(),
7979
AnalyzedDocument {
8080
rope,
81-
parser_errors: errors,
8281
semantic_tokens,
8382
indexed_lines,
83+
has_parser_errors: !errors.is_empty(),
8484
},
8585
);
8686

@@ -319,7 +319,7 @@ impl LanguageServer for Backend {
319319
.ok_or(Error::invalid_params("Unknown document!"))?;
320320
let rope = &doc.rope;
321321

322-
if !doc.parser_errors.is_empty() {
322+
if doc.has_parser_errors {
323323
return Err(Error::invalid_params(
324324
"Formatting can only be applied if there are no parsing errors",
325325
));
@@ -346,7 +346,7 @@ impl LanguageServer for Backend {
346346
.ok_or(Error::invalid_params("Unknown document!"))?;
347347
let rope = &doc.rope;
348348

349-
if !doc.parser_errors.is_empty() {
349+
if doc.has_parser_errors {
350350
return Err(Error::invalid_params(
351351
"Formatting can only be applied if there are no parsing errors",
352352
));

src/parser.rs

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use chumsky::{error::Simple, Parser};
1+
use chumsky::{error::Rich, Parser};
22

33
use crate::tokenizer::{tokenizer, Span, Spanned, Token};
44

@@ -18,10 +18,10 @@ pub struct Line {
1818
pub span: Span,
1919
}
2020

21-
pub struct ParserResult {
21+
pub struct ParserResult<'a> {
2222
pub tokens: Vec<Spanned<Token>>,
2323
pub lines: Vec<Line>,
24-
pub errors: Vec<Simple<char>>,
24+
pub errors: Vec<Rich<'a, char>>,
2525
}
2626

2727
// Splits a token at a given separator, keeping the position tracking
@@ -110,7 +110,7 @@ fn parse(tokens: &[(Token, Span)], orig: &str) -> Vec<Line> {
110110
};
111111
}
112112
if let Some(mut l) = current_line.take() {
113-
let implicit_final_newline = orig.chars().count();
113+
let implicit_final_newline = orig.len();
114114
l.span = current_line_start..implicit_final_newline;
115115
result_lines.push(l);
116116
}
@@ -121,8 +121,9 @@ fn parse(tokens: &[(Token, Span)], orig: &str) -> Vec<Line> {
121121
// Parser for bazelrc files.
122122
pub fn parse_from_str(str: &str) -> ParserResult {
123123
// Tokenize
124-
let (tokens_opt, errors) = tokenizer().parse_recovery(str);
125-
let tokens = tokens_opt.unwrap_or(Vec::new());
124+
let tokenizer_result = tokenizer().parse(str);
125+
let tokens = tokenizer_result.output().unwrap_or(&Vec::new()).clone();
126+
let errors = tokenizer_result.into_errors();
126127

127128
// Parse
128129
let lines = parse(&tokens, str);
@@ -359,3 +360,21 @@ fn test_empty_lines() {
359360
)
360361
);
361362
}
363+
364+
#[test]
365+
fn test_unicode() {
366+
// Check that we keep also keep a representation for empty lines
367+
assert_eq!(
368+
parse_from_str("build:🔥 --❄️=🔥").lines,
369+
vec!(Line {
370+
command: Some(("build".to_string(), 0..5)),
371+
config: Some(("🔥".to_string(), 5..10)),
372+
flags: vec!(Flag {
373+
name: Some(("--❄️".to_string(), 11..19)),
374+
value: Some(("🔥".to_string(), 19..24))
375+
}),
376+
comment: None,
377+
span: 0..24
378+
})
379+
);
380+
}

0 commit comments

Comments
 (0)