From 6f14aa66f9ced00de4c8bed8abebe8ea0541fbed Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 21 Aug 2023 16:17:23 -0400 Subject: [PATCH 1/4] using a table-based convert_hex_to_binary function for better speed --- include/ada/unicode.h | 3 ++- src/unicode.cpp | 11 +++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/ada/unicode.h b/include/ada/unicode.h index 97c8fbb67..2c35480db 100644 --- a/include/ada/unicode.h +++ b/include/ada/unicode.h @@ -154,7 +154,8 @@ ada_really_inline constexpr bool is_single_dot_path_segment( ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept; /** - * @details Convert hex to binary. + * @details Convert hex to binary. Caller is responsible to ensure that + * the parameter is an hexadecimal digit (0-9, A-F, a-f). */ unsigned constexpr convert_hex_to_binary(char c) noexcept; diff --git a/src/unicode.cpp b/src/unicode.cpp index c6100dd0f..035d4df5f 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -368,13 +368,12 @@ ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); } +constexpr static char hex_to_binary_table[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11, + 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15}; unsigned constexpr convert_hex_to_binary(const char c) noexcept { - // this code can be optimized. - if (c <= '9') { - return c - '0'; - } - char del = c >= 'a' ? 'a' : 'A'; - return 10 + (c - del); + return hex_to_binary_table[c - '0']; } std::string percent_decode(const std::string_view input, size_t first_percent) { From 5050bdc4967531309b7cb7fb22c219a1e3f7d1ae Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 22 Aug 2023 12:16:16 -0400 Subject: [PATCH 2/4] Using table throughout. --- src/unicode.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/unicode.cpp b/src/unicode.cpp index 035d4df5f..f3513efff 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -293,11 +293,6 @@ static_assert(unicode::is_alnum_plus('1')); static_assert(unicode::is_alnum_plus('a')); static_assert(unicode::is_alnum_plus('b')); -ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept { - return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || - (c >= 'a' && c <= 'f'); -} - ada_really_inline constexpr bool is_c0_control_or_space(const char c) noexcept { return (unsigned char)c <= ' '; } @@ -369,9 +364,17 @@ ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept { } constexpr static char hex_to_binary_table[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11, - 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15}; + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, + 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15}; + +ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept { + if (c > 'f' || c < '0') { + return false; + } + return hex_to_binary_table[c - '0'] != -1; +} + unsigned constexpr convert_hex_to_binary(const char c) noexcept { return hex_to_binary_table[c - '0']; } From 78da8ba2fc57b0b7908668ff244cb94cba304745 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 22 Aug 2023 12:31:12 -0400 Subject: [PATCH 3/4] Improving is_ascii_hex_digit --- include/ada/unicode.h | 2 +- src/unicode.cpp | 29 +++++++++++++++++++---------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/include/ada/unicode.h b/include/ada/unicode.h index 2c35480db..81a79517d 100644 --- a/include/ada/unicode.h +++ b/include/ada/unicode.h @@ -157,7 +157,7 @@ ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept; * @details Convert hex to binary. Caller is responsible to ensure that * the parameter is an hexadecimal digit (0-9, A-F, a-f). */ -unsigned constexpr convert_hex_to_binary(char c) noexcept; +ada_really_inline unsigned constexpr convert_hex_to_binary(char c) noexcept; /** * first_percent should be = input.find('%') diff --git a/src/unicode.cpp b/src/unicode.cpp index f3513efff..26fe0900f 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -363,20 +363,29 @@ ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); } -constexpr static char hex_to_binary_table[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, - 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15}; +constexpr static char hex_to_binary_table[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1}; ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept { - if (c > 'f' || c < '0') { - return false; - } - return hex_to_binary_table[c - '0'] != -1; + return hex_to_binary_table[(unsigned char)c] != -1; } -unsigned constexpr convert_hex_to_binary(const char c) noexcept { - return hex_to_binary_table[c - '0']; +ada_really_inline unsigned constexpr convert_hex_to_binary( + const char c) noexcept { + return hex_to_binary_table[(unsigned char)c]; } std::string percent_decode(const std::string_view input, size_t first_percent) { From 643830e2962380fac6b417e4d528c71016fd9c05 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 22 Aug 2023 12:39:13 -0400 Subject: [PATCH 4/4] Revert "Using table throughout." This reverts commit 5050bdc4967531309b7cb7fb22c219a1e3f7d1ae. --- src/unicode.cpp | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/src/unicode.cpp b/src/unicode.cpp index 26fe0900f..035d4df5f 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -293,6 +293,11 @@ static_assert(unicode::is_alnum_plus('1')); static_assert(unicode::is_alnum_plus('a')); static_assert(unicode::is_alnum_plus('b')); +ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f'); +} + ada_really_inline constexpr bool is_c0_control_or_space(const char c) noexcept { return (unsigned char)c <= ' '; } @@ -363,29 +368,12 @@ ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); } -constexpr static char hex_to_binary_table[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, - 9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}; - -ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept { - return hex_to_binary_table[(unsigned char)c] != -1; -} - -ada_really_inline unsigned constexpr convert_hex_to_binary( - const char c) noexcept { - return hex_to_binary_table[(unsigned char)c]; +constexpr static char hex_to_binary_table[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11, + 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15}; +unsigned constexpr convert_hex_to_binary(const char c) noexcept { + return hex_to_binary_table[c - '0']; } std::string percent_decode(const std::string_view input, size_t first_percent) {