From 1121f1d69dc21aad856336c2dc05f453fffc92f6 Mon Sep 17 00:00:00 2001 From: Marc Lasson Date: Tue, 8 Nov 2022 16:42:59 +0100 Subject: [PATCH] Runtime: Implemement set_parser_trace (#1308) --- CHANGES.md | 1 + compiler/tests-jsoo/gh_1307.ml | 7 +- compiler/tests-jsoo/test_parsing.ml | 101 ++++++++++++++++++++++++---- runtime/parsing.js | 74 +++++++++++++++++--- 4 files changed, 159 insertions(+), 24 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index b9799739f1..042f99644e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -20,6 +20,7 @@ * Runtime: Implement buffer for in_channels * Runtime: add support for unix_opendir, unix_readdir, unix_closedir, win_findfirst, win_findnext, win_findclose * Runtime: Dont use require when target-env is browser +* Runtime: Implements Parsing.set_trace (#1308) * Test: track external used in the stdlib and unix ## Bug fixes diff --git a/compiler/tests-jsoo/gh_1307.ml b/compiler/tests-jsoo/gh_1307.ml index a1c148553b..6ac526d6e0 100644 --- a/compiler/tests-jsoo/gh_1307.ml +++ b/compiler/tests-jsoo/gh_1307.ml @@ -10,7 +10,8 @@ let test content = print_endline "failure" let%expect_test "parsing" = - let (_ : bool) = Parsing.set_trace false in + (* use [Parsing.set_trace true] once https://github.com/janestreet/ppx_expect/issues/43 is fixed *) + let (old : bool) = Parsing.set_trace false in test "a"; [%expect {| input: "a" @@ -25,4 +26,6 @@ let%expect_test "parsing" = [%expect {| input: "aaa" Stdlib.Parsing.Parse_error - failure |}] + failure |}]; + let (_ : bool) = Parsing.set_trace old in + () diff --git a/compiler/tests-jsoo/test_parsing.ml b/compiler/tests-jsoo/test_parsing.ml index 65fb29a99c..2c97cefb39 100644 --- a/compiler/tests-jsoo/test_parsing.ml +++ b/compiler/tests-jsoo/test_parsing.ml @@ -29,20 +29,93 @@ let parse s = with Calc_lexer.Eof -> print_endline "EOF" let%expect_test "parsing" = - let (old : bool) = Parsing.set_trace false in - parse "1+2*3"; - [%expect {| - EOF |}]; - parse "(1+2)*3"; - [%expect {| - EOF |}]; - parse "-10-1"; - [%expect {| - EOF |}]; - parse "63/2*-3"; - [%expect {| - EOF |}]; - let (_ : bool) = Parsing.set_trace old in + (* Uncomment once https://github.com/janestreet/ppx_expect/issues/43 is fixed. + {[ + let (old : bool) = Parsing.set_trace true in + parse "1+2*3"; + [%expect + {| + State 0: shift to state 1 + State 1: read token INT(1) + State 1: shift to state 3 + State 3: reduce by rule 2 + State 7: read token PLUS + State 7: shift to state 10 + State 10: read token INT(2) + State 10: shift to state 3 + State 3: reduce by rule 2 + State 16: read token TIMES + State 16: shift to state 12 + State 12: read token INT(3) + State 12: shift to state 3 + State 3: reduce by rule 2 + State 18: reduce by rule 6 + EOF |}]; + parse "(1+2)*3"; + [%expect + {| + State 0: shift to state 1 + State 1: read token LPAREN + State 1: shift to state 5 + State 5: read token INT(1) + State 5: shift to state 3 + State 3: reduce by rule 2 + State 9: read token PLUS + State 9: shift to state 10 + State 10: read token INT(2) + State 10: shift to state 3 + State 3: reduce by rule 2 + State 16: read token RPAREN + State 16: reduce by rule 4 + State 9: shift to state 15 + State 15: reduce by rule 3 + State 7: read token TIMES + State 7: shift to state 12 + State 12: read token INT(3) + State 12: shift to state 3 + State 3: reduce by rule 2 + State 18: reduce by rule 6 + EOF |}]; + parse "-10-1"; + [%expect + {| + State 0: shift to state 1 + State 1: read token MINUS + State 1: shift to state 4 + State 4: read token INT(10) + State 4: shift to state 3 + State 3: reduce by rule 2 + State 8: reduce by rule 8 + State 7: read token MINUS + State 7: shift to state 11 + State 11: read token INT(1) + State 11: shift to state 3 + State 3: reduce by rule 2 + EOF |}]; + parse "63/2*-3"; + [%expect + {| + State 0: shift to state 1 + State 1: read token INT(63) + State 1: shift to state 3 + State 3: reduce by rule 2 + State 7: read token DIV + State 7: shift to state 13 + State 13: read token INT(2) + State 13: shift to state 3 + State 3: reduce by rule 2 + State 19: reduce by rule 7 + State 7: read token TIMES + State 7: shift to state 12 + State 12: read token MINUS + State 12: shift to state 4 + State 4: read token INT(3) + State 4: shift to state 3 + State 3: reduce by rule 2 + State 8: reduce by rule 8 + State 18: reduce by rule 6 + EOF |}]; + let (_ : bool) = Parsing.set_trace old in ]} *) parse "1+2*3"; [%expect {| EOF |}]; parse "(1+2)*3"; diff --git a/runtime/parsing.js b/runtime/parsing.js index 6a3ce1b948..ca88663c22 100644 --- a/runtime/parsing.js +++ b/runtime/parsing.js @@ -17,8 +17,13 @@ /* The pushdown automata */ +//Provides: caml_parser_trace +var caml_parser_trace = 0; + //Provides: caml_parse_engine -//Requires: caml_lex_array +//Requires: caml_lex_array, caml_parser_trace,caml_jsstring_of_string +//Requires: caml_ml_output, caml_ml_string_length, caml_string_of_jsbytes +//Requires: caml_jsbytes_of_string, MlBytes function caml_parse_engine(tables, env, cmd, arg) { var ERRCODE = 256; @@ -73,8 +78,42 @@ function caml_parse_engine(tables, env, cmd, arg) var tbl_table = 12; var tbl_check = 13; // var _tbl_error_function = 14; - // var _tbl_names_const = 15; - // var _tbl_names_block = 16; + var tbl_names_const = 15; + var tbl_names_block = 16; + + + function log(x) { + var s = caml_string_of_jsbytes(x + "\n"); + caml_ml_output(2, s, 0, caml_ml_string_length(s)); + } + + function token_name(names, number) + { + var str = caml_jsstring_of_string(names); + if (str[0] == '\x00') + return ""; + return str.split('\x00')[number]; + } + + function print_token(state, tok) + { + var token, kind; + if (tok instanceof Array) { + token = token_name(tables[tbl_names_block], tok[0]); + if (typeof tok[1] == "number") + kind = "" + tok[1]; + else if (typeof tok[1] == "string") + kind = tok[1] + else if (tok[1] instanceof MlBytes) + kind = caml_jsbytes_of_string(tok[1]) + else + kind = "_" + log("State " + state + ": read token " + token + "(" + kind + ")"); + } else { + token = token_name(tables[tbl_names_const], tok); + log("State " + state + ": read token " + token); + } + } if (!tables.dgoto) { tables.defred = caml_lex_array (tables[tbl_defred]); @@ -118,6 +157,7 @@ function caml_parse_engine(tables, env, cmd, arg) env[env_curr_char] = tables[tbl_transl_const][arg + 1]; env[env_lval] = 0; } + if (caml_parser_trace) print_token (state, arg); // Fall through case 7://testshift: @@ -149,16 +189,26 @@ function caml_parse_engine(tables, env, cmd, arg) n2 = n1 + ERRCODE; if (n1 != 0 && n2 >= 0 && n2 <= tables[tbl_tablesize] && tables.check[n2] == ERRCODE) { + if (caml_parser_trace) + log("Recovering in state " + state1); cmd = shift_recover; break next; } else { - if (sp <= env[env_stackbase]) return RAISE_PARSE_ERROR; + if (caml_parser_trace) + log("Discarding state " + state1); + if (sp <= env[env_stackbase]) { + if (caml_parser_trace) + log("No more states to discard"); + return RAISE_PARSE_ERROR; + } /* The ML code raises Parse_error */ sp--; } } } else { - if (env[env_curr_char] == 0) return RAISE_PARSE_ERROR; - /* The ML code raises Parse_error */ + if (env[env_curr_char] == 0) + return RAISE_PARSE_ERROR; /* The ML code raises Parse_error */ + if (caml_parser_trace) + log("Discarding last token read"); env[env_curr_char] = -1; cmd = loop; break; } @@ -168,6 +218,8 @@ function caml_parse_engine(tables, env, cmd, arg) if (errflag > 0) errflag--; // Fall through case 9://shift_recover: + if (caml_parser_trace) + log("State " + state + ": shift to state " + tables.table[n2]); state = tables.table[n2]; sp++; if (sp >= env[env_stacksize]) { @@ -185,6 +237,8 @@ function caml_parse_engine(tables, env, cmd, arg) break; case 10://reduce: + if (caml_parser_trace) + log("State " + state + ": reduce by rule " + n); var m = tables.len[n]; env[env_asp] = sp; env[env_rule_number] = n; @@ -232,5 +286,9 @@ function caml_parse_engine(tables, env, cmd, arg) } //Provides: caml_set_parser_trace const -//Dummy function! -function caml_set_parser_trace() { return 0; } +//Requires: caml_parser_trace +function caml_set_parser_trace(bool) { + var oldflag = caml_parser_trace; + caml_parser_trace = bool; + return oldflag; +}