diff --git a/src/hocon_token.erl b/src/hocon_token.erl
index 98a5596..9a034da 100644
--- a/src/hocon_token.erl
+++ b/src/hocon_token.erl
@@ -58,7 +58,12 @@ read(Filename) ->
-spec scan(binary() | string(), hocon:ctx()) -> list().
scan(Input, Ctx) when is_binary(Input) ->
- scan(unicode_list(Input), Ctx);
+ case unicode_list(Input) of
+ {error, _Ok, Invalid} ->
+ throw({scan_invalid_utf8, Invalid, Ctx});
+ InputList ->
+ scan(InputList, Ctx)
+ end;
scan(Input, Ctx) when is_list(Input) ->
case hocon_scanner:string(Input) of
{ok, Tokens, _EndLine} ->
diff --git a/test/data/invalid-utf8.conf b/test/data/invalid-utf8.conf
new file mode 100644
index 0000000..b755099
--- /dev/null
+++ b/test/data/invalid-utf8.conf
@@ -0,0 +1,28 @@
+bridges {
+ webhook {
+ test {
+ body = " \n \nTove \nJani \nReminder \nDon't forget me this weekend! \n"
+ connect_timeout = "11s"
+ direction = "egress"
+ enable = true
+ enable_pipelining = 100
+ headers {"content-type" = "application/json"}
+ max_retries = 3
+ method = "post"
+ pool_size = 9
+ pool_type = "random"
+ request_timeout = "5s"
+ ssl {
+ ciphers = ""
+ depth = 10
+ enable = false
+ reuse_sessions = true
+ secure_renegotiate = true
+ user_lookup_fun = "emqx_tls_psk:lookup"
+ verify = "verify_peer"
+ versions = ["tlsv1.3", "tlsv1.2", "tlsv1.1", "tlsv1"]
+ }
+ url = "http://127.0.0.1:18083"
+ }
+ }
+}
\ No newline at end of file
diff --git a/test/data/unicode-utf8.conf b/test/data/unicode-utf8.conf
new file mode 100644
index 0000000..047188a
--- /dev/null
+++ b/test/data/unicode-utf8.conf
@@ -0,0 +1,6 @@
+# unicode:characters_to_list(<<"®">>, utf8) return {error, _, _}
+# but unicode:characters_to_list(<<"®"/utf8>>, utf8) is ok.
+test {
+ body = "\n"
+ text = "ä½ æˆ‘ä»–"
+}
diff --git a/test/hocon_tests.erl b/test/hocon_tests.erl
index dcf083a..0b94171 100644
--- a/test/hocon_tests.erl
+++ b/test/hocon_tests.erl
@@ -881,3 +881,22 @@ unescape_test() ->
},
Conf
).
+
+unicode_utf8_test() ->
+ {ok, Conf} = hocon:load("./test/data/unicode-utf8.conf"),
+ ?assertEqual(
+ #{
+ <<"test">> =>
+ #{
+ <<"body">> => <<"\n"/utf8>>,
+ <<"text">> => <<"ä½ æˆ‘ä»–"/utf8>>
+ }
+ },
+ Conf
+ ).
+
+invalid_utf8_test() ->
+ ?assertMatch(
+ {error, {scan_invalid_utf8, _, _}},
+ hocon:load("./test/data/invalid-utf8.conf")
+ ).