From cdea62b39a8a55cac666410b245ee1a089dc5a73 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 16 Sep 2024 20:02:06 -0700 Subject: [PATCH] Implement "long" integer literal support This is OCaml-specific syntax. --- bootstrap/src/hmc/scan.ml | 42 ++++++++++++++++++- bootstrap/src/hmc/scan.mli | 1 + bootstrap/src/mlc/scan.ml | 5 +-- bootstrap/test/hmc/scan/test_integer.expected | 15 +++++++ bootstrap/test/hmc/scan/test_integer.ml | 5 +++ 5 files changed, 62 insertions(+), 6 deletions(-) diff --git a/bootstrap/src/hmc/scan.ml b/bootstrap/src/hmc/scan.ml index 9d415bc43..ee8a1f685 100644 --- a/bootstrap/src/hmc/scan.ml +++ b/bootstrap/src/hmc/scan.ml @@ -375,6 +375,7 @@ module Token = struct | Tok_i16 of {source: Source.Slice.t; i16: i16 Rendition.t} | Tok_u32 of {source: Source.Slice.t; u32: u32 Rendition.t} | Tok_i32 of {source: Source.Slice.t; i32: i32 Rendition.t} + | Tok_long of {source: Source.Slice.t; long: u64 Rendition.t} | Tok_u64 of {source: Source.Slice.t; u64: u64 Rendition.t} | Tok_i64 of {source: Source.Slice.t; i64: i64 Rendition.t} | Tok_u128 of {source: Source.Slice.t; u128: u128 Rendition.t} @@ -939,6 +940,14 @@ module Token = struct |> Rendition.pp I32.pp i32 |> Fmt.fmt "}" end + | Tok_long {source; long} -> begin + formatter + |> Fmt.fmt "Tok_long {source=" + |> Source.Slice.pp source + |> Fmt.fmt "; long=" + |> Rendition.pp U64.pp long + |> Fmt.fmt "}" + end | Tok_u64 {source; u64} -> begin formatter |> Fmt.fmt "Tok_u64 {source=" @@ -1156,6 +1165,7 @@ module Token = struct | Tok_i16 {source; _} | Tok_u32 {source; _} | Tok_i32 {source; _} + | Tok_long {source; _} | Tok_u64 {source; _} | Tok_i64 {source; _} | Tok_u128 {source; _} @@ -1224,6 +1234,7 @@ module Token = struct | Tok_i16 {i16=(Constant _); _} | Tok_u32 {u32=(Constant _); _} | Tok_i32 {i32=(Constant _); _} + | Tok_long {long=(Constant _); _} | Tok_u64 {u64=(Constant _); _} | Tok_i64 {i64=(Constant _); _} | Tok_u128 {u128=(Constant _); _} @@ -1259,6 +1270,7 @@ module Token = struct | Tok_i16 {i16=(Malformed mals); _} | Tok_u32 {u32=(Malformed mals); _} | Tok_i32 {i32=(Malformed mals); _} + | Tok_long {long=(Malformed mals); _} | Tok_u64 {u64=(Malformed mals); _} | Tok_i64 {i64=(Malformed mals); _} | Tok_u128 {u128=(Malformed mals); _} @@ -2071,6 +2083,8 @@ module State = struct {n; radix} end + module Integer_l = Integer_u + module Integer_i = Integer_u module Integer_n = Integer_u @@ -2682,6 +2696,7 @@ module State = struct | State_integer_oct_dot of Integer_oct_dot.t | State_integer_dec_dot of Integer_dec_dot.t | State_integer_hex_dot of Integer_hex_dot.t + | State_integer_l of Integer_l.t | State_integer_u of Integer_u.t | State_integer_i of Integer_i.t | State_integer_n of Integer_n.t @@ -2827,6 +2842,7 @@ module State = struct formatter |> Fmt.fmt "State_integer_dec_dot " |> Integer_dec_dot.pp v | State_integer_hex_dot v -> formatter |> Fmt.fmt "State_integer_hex_dot " |> Integer_hex_dot.pp v + | State_integer_l v -> formatter |> Fmt.fmt "State_integer_l " |> Integer_l.pp v | State_integer_u v -> formatter |> Fmt.fmt "State_integer_u " |> Integer_u.pp v | State_integer_i v -> formatter |> Fmt.fmt "State_integer_i " |> Integer_i.pp v | State_integer_n v -> formatter |> Fmt.fmt "State_integer_n " |> Integer_n.pp v @@ -3994,6 +4010,7 @@ module Dfa = struct | Subtype_i16 | Subtype_u32 | Subtype_i32 + | Subtype_long | Subtype_u64 | Subtype_i64 | Subtype_u128 @@ -4024,6 +4041,7 @@ module Dfa = struct | Subtype_i16 -> Some Sint.(widen_to_nat_hlt (abs (I16.(extend_to_sint min_value)))) | Subtype_u32 -> Some U32.(extend_to_nat max_value) | Subtype_i32 -> Some Sint.(widen_to_nat_hlt (abs (I32.(extend_to_sint min_value)))) + | Subtype_long -> Some U64.(extend_to_nat max_value) | Subtype_u64 -> Some U64.(extend_to_nat max_value) | Subtype_i64 -> Some (Nat.like_of_zint_hlt (Zint.abs I64.(extend_to_zint min_value))) | Subtype_u128 -> Some U128.(extend_to_nat max_value) @@ -4057,6 +4075,7 @@ module Dfa = struct | Subtype_i16 -> Tok_i16 {source; i16=malformed} | Subtype_u32 -> Tok_u32 {source; u32=malformed} | Subtype_i32 -> Tok_i32 {source; i32=malformed} + | Subtype_long -> Tok_long {source; long=malformed} | Subtype_u64 -> Tok_u64 {source; u64=malformed} | Subtype_i64 -> Tok_i64 {source; i64=malformed} | Subtype_u128 -> Tok_u128 {source; u128=malformed} @@ -4079,6 +4098,7 @@ module Dfa = struct | Subtype_i16 -> Tok_i16 {source; i16=Constant (I16.trunc_of_nat n)} | Subtype_u32 -> Tok_u32 {source; u32=Constant (U32.trunc_of_nat n)} | Subtype_i32 -> Tok_i32 {source; i32=Constant (I32.trunc_of_nat n)} + | Subtype_long -> Tok_long {source; long=Constant (U64.trunc_of_nat n)} | Subtype_u64 -> Tok_u64 {source; u64=Constant (U64.trunc_of_nat n)} | Subtype_i64 -> Tok_i64 {source; i64=Constant (I64.trunc_of_nat n)} | Subtype_u128 -> Tok_u128 {source; u128=Constant (U128.trunc_of_nat n)} @@ -4166,6 +4186,7 @@ module Dfa = struct (cpset_of_cps "b", advance State_integer_0b); (cpset_of_cps "o", advance State_integer_0o); (cpset_of_cps "x", advance State_integer_0x); + (cpset_of_cps "L", advance (State_integer_l (State.Integer_l.init ~n:Nat.k_0 ~radix:Dec))); (cpset_of_cps "u", advance (State_integer_u (State.Integer_u.init ~n:Nat.k_0 ~radix:Dec))); (cpset_of_cps "i", advance (State_integer_i (State.Integer_i.init ~n:Nat.k_0 ~radix:Dec))); (cpset_of_cps "n", advance (State_integer_n (State.Integer_n.init ~n:Nat.k_0 ~radix:Dec))); @@ -4174,7 +4195,7 @@ module Dfa = struct (cpset_of_cps ".", advance State_integer_0_dot); (cpset_of_cps "e", advance (State_real_e (State.Real_e.init ~m:0.))); (Set.diff (cpset_of_cps ident_cps) - (Set.union (cpset_of_cps dec_cps) (cpset_of_cps "_beinoruxz")), + (Set.union (cpset_of_cps dec_cps) (cpset_of_cps "_Lbeinoruxz")), advance State_integer_mal_ident); ]; default0=accept_zero_excl; @@ -4236,6 +4257,10 @@ module Dfa = struct advance (state_init (state |> accum_digit digit)) view t); (cpset_of_cps ".", fun state view t -> advance (state_dot_init state) view t); (cpset_of_cps ep_cp, ep_advance); + (cpset_of_cps "L", fun state view t -> + let n = n_of_state state in + advance (State_integer_l (State.Integer_l.init ~n ~radix)) view t + ); (cpset_of_cps "u", fun state view t -> let n = n_of_state state in advance (State_integer_u (State.Integer_u.init ~n ~radix)) view t @@ -4254,7 +4279,7 @@ module Dfa = struct ); (cpset_of_cps "r", r_advance); (Set.diff (cpset_of_cps ident_cps) - (Set.union (cpset_of_cps base_cps) (cpset_of_cps (String.join ["_inruz"; ep_cp]))), + (Set.union (cpset_of_cps base_cps) (cpset_of_cps (String.join ["_Linruz"; ep_cp]))), fun _state view t -> advance State_integer_mal_ident view t); ]; default1=(fun state view t -> @@ -4367,6 +4392,18 @@ module Dfa = struct retry (State_real_hex_dot (State.Real_hex_dot.init ~m)) t ) + let node1_l = + let open State.Integer_l in + { + edges1=map_of_cps_alist [ + (dec_cps, fun _state view t -> advance State_integer_mal_ident view t); + ]; + default1=(fun {n; radix} view t -> + accept_integer_excl ~subtype:Subtype_long n radix view t + ); + eoi1=(fun {n; radix} view t -> accept_integer_incl ~subtype:Subtype_long n radix view t); + } + let node1_u = let open State.Integer_u in { @@ -6960,6 +6997,7 @@ module Dfa = struct | State_integer_oct_dot v -> act1 trace Integer.node1_oct_dot v view t | State_integer_dec_dot v -> act1 trace Integer.node1_dec_dot v view t | State_integer_hex_dot v -> act1 trace Integer.node1_hex_dot v view t + | State_integer_l v -> act1 trace Integer.node1_l v view t | State_integer_u v -> act1 trace Integer.node1_u v view t | State_integer_i v -> act1 trace Integer.node1_i v view t | State_integer_n v -> act1 trace Integer.node1_n v view t diff --git a/bootstrap/src/hmc/scan.mli b/bootstrap/src/hmc/scan.mli index 8017f90df..70c8f8d3b 100644 --- a/bootstrap/src/hmc/scan.mli +++ b/bootstrap/src/hmc/scan.mli @@ -228,6 +228,7 @@ module Token : sig | Tok_i16 of {source: Source.Slice.t; i16: i16 Rendition.t} | Tok_u32 of {source: Source.Slice.t; u32: u32 Rendition.t} | Tok_i32 of {source: Source.Slice.t; i32: i32 Rendition.t} + | Tok_long of {source: Source.Slice.t; long: u64 Rendition.t} | Tok_u64 of {source: Source.Slice.t; u64: u64 Rendition.t} | Tok_i64 of {source: Source.Slice.t; i64: i64 Rendition.t} | Tok_u128 of {source: Source.Slice.t; u128: u128 Rendition.t} diff --git a/bootstrap/src/mlc/scan.ml b/bootstrap/src/mlc/scan.ml index 03c52f3d1..54605a860 100644 --- a/bootstrap/src/mlc/scan.ml +++ b/bootstrap/src/mlc/scan.ml @@ -793,15 +793,12 @@ let rec next t = -> malformation ~source "Hemlock-specific formatted string syntax" | Tok_r32 {source; _} -> malformation ~source "Hemlock-specific real syntax" | Tok_r64 {source; r64} -> Tok_r64 {source; r64} - | Tok_u8 {source; _} | Tok_i8 {source; _} | Tok_u16 {source; _} | Tok_i16 {source; _} | Tok_u32 {source; _} | Tok_i32 {source; _} | Tok_u64 {source; _} | Tok_i64 {source; _} | Tok_u128 {source; _} | Tok_i128 {source; _} | Tok_u256 {source; _} | Tok_i256 {source; _} | Tok_u512 {source; _} | Tok_i512 {source; _} | Tok_nat {source; _} | Tok_zint {source; _} -> malformation ~source "Hemlock-specific integer syntax" -(* - | Tok_long {source; long} -> Tok_long {source; long} -*) + | Tok_long {source; long} -> Tok_long {source; long} | Tok_end_of_input {source} -> Tok_end_of_input {source} | Tok_misaligned _ -> not_reached () (* Handled by outer match. *) | Tok_error {source; error} -> Tok_error {source; error} diff --git a/bootstrap/test/hmc/scan/test_integer.expected b/bootstrap/test/hmc/scan/test_integer.expected index c52c71cc9..d2d8312c2 100644 --- a/bootstrap/test/hmc/scan/test_integer.expected +++ b/bootstrap/test/hmc/scan/test_integer.expected @@ -203,6 +203,9 @@ (Tok_whitespace {source=[1:63..1:64)}) (Tok_zint {source=[1:64..1:69); zint=(Constant 254z)}) (Tok_end_of_input {source=[1:69..1:69)}) +``0L`` + (Tok_long {source=[1:0..1:2); long=(Constant 0)}) + (Tok_end_of_input {source=[1:2..1:2)}) ``0u`` (Tok_u64 {source=[1:0..1:2); u64=(Constant 0)}) (Tok_end_of_input {source=[1:2..1:2)}) @@ -221,6 +224,9 @@ ``0z`` (Tok_zint {source=[1:0..1:2); zint=(Constant 0z)}) (Tok_end_of_input {source=[1:2..1:2)}) +``42L`` + (Tok_long {source=[1:0..1:3); long=(Constant 42)}) + (Tok_end_of_input {source=[1:3..1:3)}) ``42u`` (Tok_u64 {source=[1:0..1:3); u64=(Constant 42)}) (Tok_end_of_input {source=[1:3..1:3)}) @@ -257,6 +263,9 @@ ``0xg`` (Tok_u64 {source=[1:0..1:3); u64=(Malformed ["[1:0..1:3): Invalid numerical constant"])}) (Tok_end_of_input {source=[1:3..1:3)}) +``0L1`` + (Tok_u64 {source=[1:0..1:3); u64=(Malformed ["[1:0..1:3): Invalid numerical constant"])}) + (Tok_end_of_input {source=[1:3..1:3)}) ``0u7`` (Tok_u64 {source=[1:0..1:3); u64=(Malformed ["[1:0..1:3): Unsupported bitwidth"])}) (Tok_end_of_input {source=[1:3..1:3)}) @@ -303,6 +312,12 @@ ``0xabu8`` (Tok_u8 {source=[1:0..1:6); u8=(Constant 171u8)}) (Tok_end_of_input {source=[1:6..1:6)}) +``0x15L`` + (Tok_long {source=[1:0..1:5); long=(Constant 21)}) + (Tok_end_of_input {source=[1:5..1:5)}) +``15L`` + (Tok_long {source=[1:0..1:3); long=(Constant 15)}) + (Tok_end_of_input {source=[1:3..1:3)}) ``15u`` (Tok_u64 {source=[1:0..1:3); u64=(Constant 15)}) (Tok_end_of_input {source=[1:3..1:3)}) diff --git a/bootstrap/test/hmc/scan/test_integer.ml b/bootstrap/test/hmc/scan/test_integer.ml index 6c6187c47..3815b364e 100644 --- a/bootstrap/test/hmc/scan/test_integer.ml +++ b/bootstrap/test/hmc/scan/test_integer.ml @@ -34,6 +34,7 @@ let test () = scan_str "0xfeu8 0xfeu16 0xfeu32 0xfe 0xfeu 0xfeu64 0xfeu128 0xfeu256 0xfeu512 0xfen"; scan_str "0xfei8 0xfei16 0xfei32 0xfei 0xfei64 0xfei128 0xfei256 0xfei512 0xfez"; + scan_str "0L"; scan_str "0u"; scan_str "0i"; scan_str "0u8"; @@ -41,6 +42,7 @@ let test () = scan_str "0n"; scan_str "0z"; + scan_str "42L"; scan_str "42u"; scan_str "42i"; scan_str "42u8"; @@ -57,6 +59,7 @@ let test () = scan_str "0o8"; scan_str "0xg"; + scan_str "0L1"; scan_str "0u7"; scan_str "0n1"; scan_str "0z1"; @@ -78,6 +81,8 @@ let test () = scan_str "0o253u8"; scan_str "0xabu8"; + scan_str "0x15L"; + scan_str "15L"; scan_str "15u"; scan_str "17u64"; scan_str "0x0123_4567_89ab_cdef";