From 8772fa6f6db996ae3a2b4c090356e390f57f3e9a Mon Sep 17 00:00:00 2001 From: zhongwencool Date: Wed, 7 Sep 2022 16:01:33 +0800 Subject: [PATCH] chore: make scan invalid utf8 error more clear --- src/hocon_token.erl | 7 ++++++- test/data/invalid-utf8.conf | 28 ++++++++++++++++++++++++++++ test/data/unicode-utf8.conf | 6 ++++++ test/hocon_tests.erl | 19 +++++++++++++++++++ 4 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 test/data/invalid-utf8.conf create mode 100644 test/data/unicode-utf8.conf diff --git a/src/hocon_token.erl b/src/hocon_token.erl index 98a5596..9a034da 100644 --- a/src/hocon_token.erl +++ b/src/hocon_token.erl @@ -58,7 +58,12 @@ read(Filename) -> -spec scan(binary() | string(), hocon:ctx()) -> list(). scan(Input, Ctx) when is_binary(Input) -> - scan(unicode_list(Input), Ctx); + case unicode_list(Input) of + {error, _Ok, Invalid} -> + throw({scan_invalid_utf8, Invalid, Ctx}); + InputList -> + scan(InputList, Ctx) + end; scan(Input, Ctx) when is_list(Input) -> case hocon_scanner:string(Input) of {ok, Tokens, _EndLine} -> diff --git a/test/data/invalid-utf8.conf b/test/data/invalid-utf8.conf new file mode 100644 index 0000000..b755099 --- /dev/null +++ b/test/data/invalid-utf8.conf @@ -0,0 +1,28 @@ +bridges { + webhook { + test { + body = " \n \nTove \nJani \nReminder \nDon't forget me this weekend! \n" + connect_timeout = "11s" + direction = "egress" + enable = true + enable_pipelining = 100 + headers {"content-type" = "application/json"} + max_retries = 3 + method = "post" + pool_size = 9 + pool_type = "random" + request_timeout = "5s" + ssl { + ciphers = "" + depth = 10 + enable = false + reuse_sessions = true + secure_renegotiate = true + user_lookup_fun = "emqx_tls_psk:lookup" + verify = "verify_peer" + versions = ["tlsv1.3", "tlsv1.2", "tlsv1.1", "tlsv1"] + } + url = "http://127.0.0.1:18083" + } + } +} \ No newline at end of file diff --git a/test/data/unicode-utf8.conf b/test/data/unicode-utf8.conf new file mode 100644 index 0000000..047188a --- /dev/null +++ b/test/data/unicode-utf8.conf @@ -0,0 +1,6 @@ +# unicode:characters_to_list(<<"®">>, utf8) return {error, _, _} +# but unicode:characters_to_list(<<"®"/utf8>>, utf8) is ok. +test { + body = "\n" + text = "你我他" +} diff --git a/test/hocon_tests.erl b/test/hocon_tests.erl index dcf083a..0b94171 100644 --- a/test/hocon_tests.erl +++ b/test/hocon_tests.erl @@ -881,3 +881,22 @@ unescape_test() -> }, Conf ). + +unicode_utf8_test() -> + {ok, Conf} = hocon:load("./test/data/unicode-utf8.conf"), + ?assertEqual( + #{ + <<"test">> => + #{ + <<"body">> => <<"\n"/utf8>>, + <<"text">> => <<"你我他"/utf8>> + } + }, + Conf + ). + +invalid_utf8_test() -> + ?assertMatch( + {error, {scan_invalid_utf8, _, _}}, + hocon:load("./test/data/invalid-utf8.conf") + ).