Skip to content

Commit

Permalink
Fix raw string handling of invalid replacement codepoints
Browse files Browse the repository at this point in the history
Fix raw string tag scanning to retry rather than advancing, so that replacement
codepoints are checked for validity.

Add tests for valid replacement codepoints. Invalid replacement codepoints are
troublesome to embed in tests, and remain untested.
  • Loading branch information
Jason Evans committed Sep 20, 2024
1 parent 495d60f commit 65d1c61
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 3 deletions.
5 changes: 2 additions & 3 deletions bootstrap/src/hmc/scan.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5832,15 +5832,14 @@ module Dfa = struct
end
);
];
default1=(fun ({mals; ltag; body; ltag_cursor} as state) (View.{pcursor; _} as view)
t ->
default1=(fun ({mals; ltag; body; ltag_cursor} as state) (View.{pcursor; _} as view) t ->
let lcp = Source.Cursor.rget ltag_cursor in
let rcp = Source.Cursor.rget pcursor in
match Codepoint.(lcp = rcp) with
| false -> begin
let state' =
State.Rstring_body.init ~mals ~ltag ~body_base:(Source.Slice.base body) in
advance (State_rstring_body state') view t
retry (State_rstring_body state') t
end
| true -> advance (State_rstring_rtag (State.Rstring_rtag.next state)) view t
);
Expand Down
3 changes: 3 additions & 0 deletions bootstrap/test/hmc/scan/test_codepoint.expected
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
``'\u{000_ff_fd}'``
(Tok_codepoint {source=[1:0..1:15); codepoint=(Constant '�')})
(Tok_end_of_input {source=[1:15..1:15)})
``'�'``
(Tok_codepoint {source=[1:0..1:3); codepoint=(Constant '�')})
(Tok_end_of_input {source=[1:3..1:3)})
``'``
(Tok_tick {source=[1:0..1:1)})
(Tok_end_of_input {source=[1:1..1:1)})
Expand Down
1 change: 1 addition & 0 deletions bootstrap/test/hmc/scan/test_codepoint.ml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ let test () =
scan_str "'\\t' '\\n' '\\r' '\\'' '\\\\'";
scan_str "'\\u{41}'";
scan_str "'\\u{000_ff_fd}'";
scan_str "'�'";

(* Type parameter sigils. *)
scan_str "'";
Expand Down
6 changes: 6 additions & 0 deletions bootstrap/test/hmc/scan/test_rstring.expected
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ b```_`
`_c9_``_xx_`...``...`_`...`__`...`__`_0_`_1_`_2_`_3_`_4_`_5_`_6_`_7_`_8_`_9_`_a_`_b_`_c_`_d_`_e_`_f_...`_xx_``_c9_`
(Tok_rstring {source=[1:0..1:103); rstring=(Constant "...``...`_`...`__`...`__`_0_`_1_`_2_`_3_`_4_`_5_`_6_`_7_`_8_`_9_`_a_`_b_`_c_`_d_`_e_`_f_...")})
(Tok_end_of_input {source=[1:103..1:103)})
`_```�```_`
(Tok_rstring {source=[1:0..1:5); rstring=(Constant "�")})
(Tok_end_of_input {source=[1:5..1:5)})
`_``__`...`�_`_�...`__``_`
(Tok_rstring {source=[1:0..1:20); rstring=(Constant "...`�_`_�...")})
(Tok_end_of_input {source=[1:20..1:20)})
`_`` `aoeu` ``_`
(Tok_rstring {source=[1:0..1:10); rstring=(Malformed ["[1:1..1:2): Invalid codepoint in raw string tag"])})
(Tok_end_of_input {source=[1:10..1:10)})
Expand Down
3 changes: 3 additions & 0 deletions bootstrap/test/hmc/scan/test_rstring.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ b``|};
scan_str
{|`_xx_`...``...`_`...`__`...`__`_0_`_1_`_2_`_3_`_4_`_5_`_6_`_7_`_8_`_9_`_a_`_b_`_c_`_d_`_e_`_f_...`_xx_`|};
scan_str {|``�``|};
scan_str {|`__`...`�_`_�...`__`|};
(* Errors. *)
scan_str {|` `aoeu` `|};
scan_str {|`*`aoeu`*`|};
Expand Down

0 comments on commit 65d1c61

Please sign in to comment.