From 574c78ffeb6ee3665d186c86807b54ee02143a38 Mon Sep 17 00:00:00 2001 From: Michael Ragazzon Date: Sat, 6 Jul 2024 22:01:55 +0200 Subject: [PATCH] Various StringUtilities feature and safety improvements - Add `StringView::empty()`. - `ToCharacter` bounds checking instead of assuming zero-terminated strings. - Construct UTF-8 iterator from string view. --- Backends/RmlUi_Platform_Win32.cpp | 2 +- Include/RmlUi/Core/StringUtilities.h | 6 ++++-- Source/Core/ElementText.cpp | 5 +++-- Source/Core/Elements/WidgetTextInput.cpp | 5 +++-- Source/Core/StringUtilities.cpp | 8 +++++++- 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Backends/RmlUi_Platform_Win32.cpp b/Backends/RmlUi_Platform_Win32.cpp index 77d196da5..a760fd892 100644 --- a/Backends/RmlUi_Platform_Win32.cpp +++ b/Backends/RmlUi_Platform_Win32.cpp @@ -302,7 +302,7 @@ bool RmlWin32::WindowProcedure(Rml::Context* context, TextInputMethodEditor_Win3 { // Second 16-bit code unit of a two-wide character. Rml::String utf8 = ConvertToUTF8(std::wstring{first_u16_code_unit, c}); - character = Rml::StringUtilities::ToCharacter(utf8.data()); + character = Rml::StringUtilities::ToCharacter(utf8.data(), utf8.data() + utf8.size()); } else if (c == '\r') { diff --git a/Include/RmlUi/Core/StringUtilities.h b/Include/RmlUi/Core/StringUtilities.h index b5be7dd59..d9434e377 100644 --- a/Include/RmlUi/Core/StringUtilities.h +++ b/Include/RmlUi/Core/StringUtilities.h @@ -108,7 +108,7 @@ namespace StringUtilities { RMLUICORE_API bool StringCompareCaseInsensitive(StringView lhs, StringView rhs); // Decode the first code point in a zero-terminated UTF-8 string. - RMLUICORE_API Character ToCharacter(const char* p); + RMLUICORE_API Character ToCharacter(const char* p, const char* p_end); // Encode a single code point as a UTF-8 string. RMLUICORE_API String ToUTF8(Character character); @@ -169,6 +169,7 @@ class RMLUICORE_API StringView { inline const char* begin() const { return p_begin; } inline const char* end() const { return p_end; } + inline bool empty() const { return p_begin == p_end; } inline size_t size() const { return size_t(p_end - p_begin); } explicit inline operator String() const { return String(p_begin, p_end); } @@ -189,6 +190,7 @@ class RMLUICORE_API StringView { class RMLUICORE_API StringIteratorU8 { public: StringIteratorU8(const char* p_begin, const char* p, const char* p_end); + StringIteratorU8(StringView string); StringIteratorU8(const String& string); StringIteratorU8(const String& string, size_t offset); StringIteratorU8(const String& string, size_t offset, size_t count); @@ -199,7 +201,7 @@ class RMLUICORE_API StringIteratorU8 { StringIteratorU8& operator--(); // Returns the codepoint at the current position. The iterator must be dereferencable. - inline Character operator*() const { return StringUtilities::ToCharacter(p); } + inline Character operator*() const { return StringUtilities::ToCharacter(p, view.end()); } // Returns false when the iterator is located just outside the valid part of the string. explicit inline operator bool() const { return (p != view.begin() - 1) && (p != view.end()); } diff --git a/Source/Core/ElementText.cpp b/Source/Core/ElementText.cpp index 216997849..37566d963 100644 --- a/Source/Core/ElementText.cpp +++ b/Source/Core/ElementText.cpp @@ -212,7 +212,7 @@ bool ElementText::GenerateLine(String& line, int& line_length, float& line_width bool break_at_endline = white_space_property == WhiteSpace::Pre || white_space_property == WhiteSpace::Prewrap || white_space_property == WhiteSpace::Preline; - const TextShapingContext text_shaping_context{ computed.language(), computed.direction(), computed.letter_spacing() }; + const TextShapingContext text_shaping_context{computed.language(), computed.direction(), computed.letter_spacing()}; TextTransform text_transform_property = computed.text_transform(); WordBreak word_break = computed.word_break(); @@ -229,7 +229,8 @@ bool ElementText::GenerateLine(String& line, int& line_length, float& line_width const char* next_token_begin = token_begin; Character previous_codepoint = Character::Null; if (!line.empty()) - previous_codepoint = StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&line.back(), line.data())); + previous_codepoint = + StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&line.back(), line.data()), line.data() + line.size()); // Generate the next token and determine its pixel-length. bool break_line = BuildToken(token, next_token_begin, string_end, line.empty() && trim_whitespace_prefix, collapse_white_space, diff --git a/Source/Core/Elements/WidgetTextInput.cpp b/Source/Core/Elements/WidgetTextInput.cpp index f03c61a38..2d3139573 100644 --- a/Source/Core/Elements/WidgetTextInput.cpp +++ b/Source/Core/Elements/WidgetTextInput.cpp @@ -1082,7 +1082,7 @@ float WidgetTextInput::GetAlignmentSpecificTextOffset(const Line& line) const // For right alignment with soft-wrapped newlines, remove up to a single space to align the last word to the right edge. const bool is_last_line = (line.value_offset + line.size == (int)value.size()); const bool is_soft_wrapped = (!is_last_line && line.editable_length == line.size); - if (is_soft_wrapped && editable_line_string.size() > 0 && *(editable_line_string.end() - 1) == ' ') + if (is_soft_wrapped && !editable_line_string.empty() && *(editable_line_string.end() - 1) == ' ') { editable_line_string = StringView(editable_line_string.begin(), editable_line_string.end() - 1); } @@ -1324,7 +1324,8 @@ Vector2f WidgetTextInput::FormatText(float height_constraint) return 0.0f; // We could join the whole string, and compare the result of the joined width to the individual widths of each string. Instead, we take // the two neighboring characters from each string and compare the string width with and without kerning, which should be much faster. - const Character left_back = StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&left.back(), &left.front())); + const Character left_back = + StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&left.back(), &left.front()), left.data() + left.size()); const String right_front_u8 = right.substr(0, size_t(StringUtilities::SeekForwardUTF8(right.c_str() + 1, right.c_str() + right.size()) - right.c_str())); const int width_kerning = ElementUtilities::GetStringWidth(text_element, right_front_u8, left_back); diff --git a/Source/Core/StringUtilities.cpp b/Source/Core/StringUtilities.cpp index f83456941..ef35d519c 100644 --- a/Source/Core/StringUtilities.cpp +++ b/Source/Core/StringUtilities.cpp @@ -430,8 +430,10 @@ bool StringUtilities::StringCompareCaseInsensitive(const StringView lhs, const S return true; } -Character StringUtilities::ToCharacter(const char* p) +Character StringUtilities::ToCharacter(const char* p, const char* p_end) { + RMLUI_ASSERTMSG(p && p != p_end, "ToCharacter expects a valid, non-empty input string"); + if ((*p & (1 << 7)) == 0) return static_cast(*p); @@ -459,6 +461,9 @@ Character StringUtilities::ToCharacter(const char* p) return Character::Null; } + if (p_end - p < num_bytes) + return Character::Null; + for (int i = 1; i < num_bytes; i++) { const char byte = *(p + i); @@ -586,6 +591,7 @@ bool StringView::operator==(const StringView& other) const } StringIteratorU8::StringIteratorU8(const char* p_begin, const char* p, const char* p_end) : view(p_begin, p_end), p(p) {} +StringIteratorU8::StringIteratorU8(StringView string) : view(string), p(view.begin()) {} StringIteratorU8::StringIteratorU8(const String& string) : view(string), p(string.data()) {} StringIteratorU8::StringIteratorU8(const String& string, size_t offset) : view(string), p(string.data() + offset) {} StringIteratorU8::StringIteratorU8(const String& string, size_t offset, size_t count) : view(string, 0, offset + count), p(string.data() + offset) {}