@@ -2564,22 +2564,13 @@ fn num_decimal_digits(num: usize) -> usize {
2564
2564
2565
2565
// We replace some characters so the CLI output is always consistent and underlines aligned.
2566
2566
// Keep the following list in sync with `rustc_span::char_width`.
2567
+ // ATTENTION: keep lexicografically sorted so that the binary search will work
2567
2568
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2568
- ('\t', " "), // We do our own tab replacement
2569
- ('\u{200D}', ""), // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
2570
- ('\u{202A}', "�"), // The following unicode text flow control characters are inconsistently
2571
- ('\u{202B}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk
2572
- ('\u{202D}', "�"), // not corresponding to the visible source code, so we replace them always.
2573
- ('\u{202E}', "�"),
2574
- ('\u{2066}', "�"),
2575
- ('\u{2067}', "�"),
2576
- ('\u{2068}', "�"),
2577
- ('\u{202C}', "�"),
2578
- ('\u{2069}', "�"),
2569
+ // tidy-alphabetical-start
2579
2570
// In terminals without Unicode support the following will be garbled, but in *all* terminals
2580
2571
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
2581
2572
// support" gate.
2582
- ('\u{0000} ', "␀"),
2573
+ ('\0 ', "␀"),
2583
2574
('\u{0001}', "␁"),
2584
2575
('\u{0002}', "␂"),
2585
2576
('\u{0003}', "␃"),
@@ -2588,11 +2579,12 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2588
2579
('\u{0006}', "␆"),
2589
2580
('\u{0007}', "␇"),
2590
2581
('\u{0008}', "␈"),
2591
- ('\u{000B}', "␋"),
2592
- ('\u{000C}', "␌"),
2593
- ('\u{000D}', "␍"),
2594
- ('\u{000E}', "␎"),
2595
- ('\u{000F}', "␏"),
2582
+ ('\u{0009}', " "), // We do our own tab replacement
2583
+ ('\u{000b}', "␋"),
2584
+ ('\u{000c}', "␌"),
2585
+ ('\u{000d}', "␍"),
2586
+ ('\u{000e}', "␎"),
2587
+ ('\u{000f}', "␏"),
2596
2588
('\u{0010}', "␐"),
2597
2589
('\u{0011}', "␑"),
2598
2590
('\u{0012}', "␒"),
@@ -2603,21 +2595,37 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2603
2595
('\u{0017}', "␗"),
2604
2596
('\u{0018}', "␘"),
2605
2597
('\u{0019}', "␙"),
2606
- ('\u{001A}', "␚"),
2607
- ('\u{001B}', "␛"),
2608
- ('\u{001C}', "␜"),
2609
- ('\u{001D}', "␝"),
2610
- ('\u{001E}', "␞"),
2611
- ('\u{001F}', "␟"),
2612
- ('\u{007F}', "␡"),
2598
+ ('\u{001a}', "␚"),
2599
+ ('\u{001b}', "␛"),
2600
+ ('\u{001c}', "␜"),
2601
+ ('\u{001d}', "␝"),
2602
+ ('\u{001e}', "␞"),
2603
+ ('\u{001f}', "␟"),
2604
+ ('\u{007f}', "␡"),
2605
+ ('\u{200d}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters.
2606
+ ('\u{202a}', "�"), // The following unicode text flow control characters are inconsistently
2607
+ ('\u{202b}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk
2608
+ ('\u{202c}', "�"), // not corresponding to the visible source code, so we replace them always.
2609
+ ('\u{202d}', "�"),
2610
+ ('\u{202e}', "�"),
2611
+ ('\u{2066}', "�"),
2612
+ ('\u{2067}', "�"),
2613
+ ('\u{2068}', "�"),
2614
+ ('\u{2069}', "�"),
2615
+ // tidy-alphabetical-end
2613
2616
];
2614
2617
2615
- fn normalize_whitespace(str: &str) -> String {
2616
- let mut s = str.to_string();
2617
- for (c, replacement) in OUTPUT_REPLACEMENTS {
2618
- s = s.replace(*c, replacement);
2619
- }
2620
- s
2618
+ fn normalize_whitespace(s: &str) -> String {
2619
+ // Scan the input string for a character in the ordered table above. If it's present, replace
2620
+ // it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input
2621
+ // char. At the end, allocate all chars into a string in one operation.
2622
+ s.chars().fold(String::with_capacity(s.len()), |mut s, c| {
2623
+ match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) {
2624
+ Ok(i) => s.push_str(OUTPUT_REPLACEMENTS[i].1),
2625
+ _ => s.push(c),
2626
+ }
2627
+ s
2628
+ })
2621
2629
}
2622
2630
2623
2631
fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {
0 commit comments