Skip to content

Commit

Permalink
Merge pull request #290 from zmstone/no-escape-for-multiline-str
Browse files Browse the repository at this point in the history
breaking: no escape at all in triple quotes
  • Loading branch information
zhongwencool authored Mar 4, 2024
2 parents 97905ee + c4c540c commit 5e5721b
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 43 deletions.
29 changes: 20 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,31 @@ HOCON spec for reference: https://lightbend.github.io/config/
* `key={a: 1}\n{b: 2}`
* `key={a=1, b=2}`
- `url()/file()/classpath()` includes are not supported
- Quotes next to triple-quotes needs to be escaped, otherwise they are discarded.
Meaning `"""a""""` is parsed as `a` but not `a"`, to crrectly express `a"`, it must be one of below:
* Escape the last `"`: `"""a\""""`;
* Or add `~` around the string value: `"""~a"~"""` (see below).
- Immediate quote before triple-quote is invalid sytax.
* `""""a""""` is invalid because there are 4 closing quotes instead of three.
* As a workaround, `"""~"a"~""" is valid, see below for more details.
- Multiline strings allow indentation (spaces, not tabs).
If `~\n` (or `~\r\n`) are the only characters following the opening triple-quote, then it's a multiline string with indentation:
* The first line `~\n` is discarded;
* The first line `~\n` is discarded.
* The closing triple-quote can be either `"""` or `~"""` (`~` allows the string to end with `"` without escaping).
* Indentation is allowed but not required for empty lines;
* Indentation level is determined by the least number of leading spaces among the non-empty lines;
* Indentation is allowed but not required for empty lines.
* Indentation level is determined by the least number of leading spaces among the non-empty lines.
* If the closing triple-quote takes the whole line, it's allowed to be indented less than other lines,
but if it's indented more than other lines, the spaces are treated as part of the string.
* Backslashes are treated as escape characters, i.e. should be escaped with another backslash;
* There is no need to escape quotes in multiline strings, but it's allowed.
* Backslash is NOT a escape character.
* If a string has three consecutive quotes, there are two workarounds:
- Make use of string concatenation, and only escape the triple-quotes. e.g.
```
a = """~
line1
~"""
"line2\"\"\"\n"
"""~
line3
~"""
```
- Use normal string with escape sequence.
For example: `a = "line1\nline2\"\"\"\nline3\n"`
## Schema
Expand Down
20 changes: 20 additions & 0 deletions etc/triple-quotes.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
## this file is to verify that 'a' and 'b' should have the same value

# normal quotes with escape sequence
a = "line1\nline2\"\"\"\nline3\n"

# triple quotes for line1 and line3, but normal quotes for line2"""
b = """line1
"""
"line2\"\"\"\n"
"""line3
"""

# tripel quotes with indentation for line1 and line3, but normal quotes for line2"""
c = """~
line1
~"""
"line2\"\"\"\n"
"""~
line3
~"""
8 changes: 1 addition & 7 deletions src/hocon_pp.erl
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ gen_single_quote_str(S, unicode) ->
gen_triple_quote_str(Str, Opts) ->
[
?TRIPLE_QUOTE,
maybe_indent(esc_backslashes(Str), Opts),
maybe_indent(Str, Opts),
?TRIPLE_QUOTE
].

Expand Down Expand Up @@ -464,9 +464,3 @@ esc($\") -> "\\\"";
% \
esc($\\) -> "\\\\";
esc(Char) -> Char.

esc_backslashes(Str) ->
lists:map(fun esc_backslash/1, Str).

esc_backslash($\\) -> "\\\\";
esc_backslash(Char) -> Char.
27 changes: 20 additions & 7 deletions src/hocon_scanner.xrl
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ Rules.
{Integer} : {token, {integer, TokenLine, list_to_integer(TokenChars)}}.
{Float} : {token, {float, TokenLine, to_float(TokenChars)}}.
{String} : {token, {string, TokenLine, unquote(TokenChars, force_escape)}}.
{MultilineString} : {token, {string, TokenLine, unindent(unquote(TokenChars, allow_unescaped))}}.
{MultilineString} : {token, {string, TokenLine, unindent(strip_quotes(TokenChars, 3, TokenLine))}}.
{Bytesize} : {token, {string, TokenLine, TokenChars}}.
{Percent} : {token, {string, TokenLine, TokenChars}}.
{Duration} : {token, {string, TokenLine, TokenChars}}.
Expand All @@ -92,6 +92,8 @@ Rules.
Erlang code.
-export([unindent/1]).
maybe_include("include", TokenLine) -> {include, TokenLine};
maybe_include(TokenChars, TokenLine) -> {unqstr, TokenLine, TokenChars}.
Expand All @@ -102,13 +104,24 @@ get_filename_from_required("required(" ++ Filename) ->
bool("true") -> true;
bool("false") -> false.
unquote(Str, Allow) ->
Str1 = strip_surrounded_quotes(Str),
unescape(Str1, Allow).
unquote([$\" | Str0], Allow) ->
[$\" | StrR] = lists:reverse(Str0),
unescape(lists:reverse(StrR), Allow).
%% strip the given number of quotes from both ends of the string.
strip_quotes(Str, N, Line) ->
Str1 = strip_quotes_loop(Str, N),
case strip_quotes_loop(lists:reverse(Str1), N) of
[$\", $\\ | _] ->
throw({scan_error, #{reason => four_closing_quotes, line => Line}});
StrR ->
lists:reverse(StrR)
end.
strip_surrounded_quotes([$" | Rem]) ->
lists:reverse(strip_surrounded_quotes(lists:reverse(Rem)));
strip_surrounded_quotes(Str) ->
%% strip the leading quotes and return the remaining chars
strip_quotes_loop([$" | Rem], N) when N > 0 ->
strip_quotes_loop(Rem, N - 1);
strip_quotes_loop(Str, _) ->
Str.

unindent([$~, $\r, $\n | Chars]) ->
Expand Down
8 changes: 4 additions & 4 deletions test/hocon_pp_tests.erl
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ pp_quote_test() ->
),
Fun(#{<<"$d_dfdk2f">> => <<"12">>}, <<"\"$d_dfdk2f\" = \"12\"\n">>),

%% backslash
Fun(#{<<"a">> => <<"\\emqx">>}, <<"a = \"\"\"\\\\emqx\"\"\"\n">>),
Fun(#{<<"b">> => <<"emqx\\emqx">>}, <<"b = \"\"\"emqx\\\\emqx\"\"\"\n">>),
Fun(#{<<"c">> => <<"emqx\\">>}, <<"c = \"\"\"emqx\\\\\"\"\"\n">>),
%% backslash is value
Fun(#{<<"a">> => <<"\\emqx">>}, <<"a = \"\"\"\\emqx\"\"\"\n">>),
Fun(#{<<"b">> => <<"emqx\\emqx">>}, <<"b = \"\"\"emqx\\emqx\"\"\"\n">>),
Fun(#{<<"c">> => <<"emqx\\">>}, <<"c = \"\"\"emqx\\\"\"\"\n">>),

%% quote
Fun(#{<<"A">> => <<"\"emqx">>}, <<"A = \"\"\"~\n \"emqx~\"\"\"\n">>),
Expand Down
49 changes: 33 additions & 16 deletions test/hocon_tests.erl
Original file line number Diff line number Diff line change
Expand Up @@ -231,20 +231,24 @@ escape_test_() ->
#{<<"k">> => <<"a\"cd\"x">>},
binary(<<"k=\"\"\"a\"cd\"x\"\"\"">>)
),
%% " is also allowed between """...""" with escaping
%% " immediately after opening """ is a value
?_assertEqual(
#{<<"k">> => <<"a\"cd\"x">>},
binary(<<"k=\"\"\"a\\\"cd\\\"x\"\"\"">>)
#{<<"k">> => <<"\"1">>},
binary(<<"k=\"\"\"\"1\"\"\"">>)
),
%% " immediately beofre """ should be escaped, otherwise scan_error
?_assertMatch(
{error, {scan_error, _}},
hocon:binary(<<"k=\"\"\"a\"cd\"\"\"\"">>)
),
%% " immediately beofre """ should be escaped
?_assertEqual(
#{<<"k">> => <<"a\"cd\"">>},
binary(<<"k=\"\"\"a\"cd\\\"\"\"\"">>)
%% " immediately beofre closing """ is scan_error
?_assertMatch(
{error, {scan_error, _}},
hocon:binary(<<"k=\"\"\"a\"cd\"\"\"\"">>)
),
%% \" immediately beofre closing """ is also scan_error
?_assertMatch(
{error, {scan_error, _}},
hocon:binary(<<"k=\"\"\"a\"cd\\\"\"\"\"">>)
),
%% \n is parsed as \n between """..."""
?_assertEqual(
Expand All @@ -256,9 +260,9 @@ escape_test_() ->
{error, {scan_error, _}},
hocon:binary(<<"k=\"a\nd\"">>)
),
%% \\n parsed as \n between """..."""
%% \\n parsed as \\n between """..."""
?_assertEqual(
#{<<"k">> => <<"a\nd">>},
#{<<"k">> => <<"a\\nd">>},
binary(<<"k=\"\"\"a\\nd\"\"\"">>)
),
%% \\n parsed as \n between "..."
Expand All @@ -284,10 +288,14 @@ triple_quote_string_test_() ->
?_assertEqual(<<" 1\n\n2\n">>, Parse(<<"~\n 1\n \n 2\n ~">>)),
?_assertEqual(<<" 1\n\n2\n ">>, Parse(<<"~\n 1\n \n 2\n ~">>)),
?_assertEqual(<<"1\"\"\n2">>, Parse(<<"~\n 1\"\"\n 2">>)),
%% must escape quotes if it's next to """
?_assertEqual(<<"1\"">>, Parse(<<"1\\\"">>)),
%% must escape quotes if it's next to """
?_assertEqual(<<"\"1">>, Parse(<<"\\\"1">>)),
%% leading """" is OK, the last quote is considered value
?_assertEqual(<<"\"1">>, Parse(<<"\"1">>)),
%% " as value
?_assertEqual(<<"\"">>, Parse(<<"~\n\"~">>)),
%% edning with """" is NOT OK, it's a syntax error
?_assertError({scan_error, _}, Parse(<<"1\"">>)),
%% ending with \"""" tricks the scanner, but caught by hand crafted Erlang code
?_assertError({scan_error, _}, Parse(<<"1\\\"">>)),
%% no need to escape quotes unless it's next to """
?_assertEqual(<<"1\"2">>, Parse(<<"1\"2">>)),
%% empty string with closing quote in the next line
Expand All @@ -300,6 +308,11 @@ triple_quote_string_test_() ->
?_assertEqual(<<"a\n">>, Parse(<<"~\n a\n ~">>))
].
triple_quote_inside_tripe_quote_test() ->
{ok, Map} = hocon:load("etc/triple-quotes.conf"),
?assertEqual(maps:get(<<"a">>, Map), maps:get(<<"b">>, Map)),
?assertEqual(maps:get(<<"a">>, Map), maps:get(<<"c">>, Map)).

obj_inside_array_test_() ->
[
?_assertEqual(#{<<"a">> => [#{<<"b">> => #{<<"c">> => 1}}]}, binary("a:[{b.c = 1}]")),
Expand Down Expand Up @@ -887,8 +900,12 @@ re_error(Filename0) ->
lists:map(fun([V, L, F]) -> [V, L, filename:basename(F)] end, VLFs).

binary(B) when is_binary(B) ->
{ok, R} = hocon:binary(B),
R;
case hocon:binary(B) of
{ok, R} ->
R;
{error, Reason} ->
error(Reason)
end;
binary(IO) ->
binary(iolist_to_binary(IO)).

Expand Down

0 comments on commit 5e5721b

Please sign in to comment.