Skip to content

Commit

Permalink
Various StringUtilities feature and safety improvements
Browse files Browse the repository at this point in the history
- Add `StringView::empty()`.
- `ToCharacter` bounds checking instead of assuming zero-terminated strings.
- Construct UTF-8 iterator from string view.
  • Loading branch information
mikke89 committed Jul 7, 2024
1 parent a9cf934 commit 574c78f
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 8 deletions.
2 changes: 1 addition & 1 deletion Backends/RmlUi_Platform_Win32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ bool RmlWin32::WindowProcedure(Rml::Context* context, TextInputMethodEditor_Win3
{
// Second 16-bit code unit of a two-wide character.
Rml::String utf8 = ConvertToUTF8(std::wstring{first_u16_code_unit, c});
character = Rml::StringUtilities::ToCharacter(utf8.data());
character = Rml::StringUtilities::ToCharacter(utf8.data(), utf8.data() + utf8.size());
}
else if (c == '\r')
{
Expand Down
6 changes: 4 additions & 2 deletions Include/RmlUi/Core/StringUtilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ namespace StringUtilities {
RMLUICORE_API bool StringCompareCaseInsensitive(StringView lhs, StringView rhs);

// Decode the first code point in a zero-terminated UTF-8 string.
RMLUICORE_API Character ToCharacter(const char* p);
RMLUICORE_API Character ToCharacter(const char* p, const char* p_end);

// Encode a single code point as a UTF-8 string.
RMLUICORE_API String ToUTF8(Character character);
Expand Down Expand Up @@ -169,6 +169,7 @@ class RMLUICORE_API StringView {
inline const char* begin() const { return p_begin; }
inline const char* end() const { return p_end; }

inline bool empty() const { return p_begin == p_end; }
inline size_t size() const { return size_t(p_end - p_begin); }

explicit inline operator String() const { return String(p_begin, p_end); }
Expand All @@ -189,6 +190,7 @@ class RMLUICORE_API StringView {
class RMLUICORE_API StringIteratorU8 {
public:
StringIteratorU8(const char* p_begin, const char* p, const char* p_end);
StringIteratorU8(StringView string);
StringIteratorU8(const String& string);
StringIteratorU8(const String& string, size_t offset);
StringIteratorU8(const String& string, size_t offset, size_t count);
Expand All @@ -199,7 +201,7 @@ class RMLUICORE_API StringIteratorU8 {
StringIteratorU8& operator--();

// Returns the codepoint at the current position. The iterator must be dereferencable.
inline Character operator*() const { return StringUtilities::ToCharacter(p); }
inline Character operator*() const { return StringUtilities::ToCharacter(p, view.end()); }

// Returns false when the iterator is located just outside the valid part of the string.
explicit inline operator bool() const { return (p != view.begin() - 1) && (p != view.end()); }
Expand Down
5 changes: 3 additions & 2 deletions Source/Core/ElementText.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ bool ElementText::GenerateLine(String& line, int& line_length, float& line_width
bool break_at_endline =
white_space_property == WhiteSpace::Pre || white_space_property == WhiteSpace::Prewrap || white_space_property == WhiteSpace::Preline;

const TextShapingContext text_shaping_context{ computed.language(), computed.direction(), computed.letter_spacing() };
const TextShapingContext text_shaping_context{computed.language(), computed.direction(), computed.letter_spacing()};
TextTransform text_transform_property = computed.text_transform();
WordBreak word_break = computed.word_break();

Expand All @@ -229,7 +229,8 @@ bool ElementText::GenerateLine(String& line, int& line_length, float& line_width
const char* next_token_begin = token_begin;
Character previous_codepoint = Character::Null;
if (!line.empty())
previous_codepoint = StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&line.back(), line.data()));
previous_codepoint =
StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&line.back(), line.data()), line.data() + line.size());

// Generate the next token and determine its pixel-length.
bool break_line = BuildToken(token, next_token_begin, string_end, line.empty() && trim_whitespace_prefix, collapse_white_space,
Expand Down
5 changes: 3 additions & 2 deletions Source/Core/Elements/WidgetTextInput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,7 @@ float WidgetTextInput::GetAlignmentSpecificTextOffset(const Line& line) const
// For right alignment with soft-wrapped newlines, remove up to a single space to align the last word to the right edge.
const bool is_last_line = (line.value_offset + line.size == (int)value.size());
const bool is_soft_wrapped = (!is_last_line && line.editable_length == line.size);
if (is_soft_wrapped && editable_line_string.size() > 0 && *(editable_line_string.end() - 1) == ' ')
if (is_soft_wrapped && !editable_line_string.empty() && *(editable_line_string.end() - 1) == ' ')
{
editable_line_string = StringView(editable_line_string.begin(), editable_line_string.end() - 1);
}
Expand Down Expand Up @@ -1324,7 +1324,8 @@ Vector2f WidgetTextInput::FormatText(float height_constraint)
return 0.0f;
// We could join the whole string, and compare the result of the joined width to the individual widths of each string. Instead, we take
// the two neighboring characters from each string and compare the string width with and without kerning, which should be much faster.
const Character left_back = StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&left.back(), &left.front()));
const Character left_back =
StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&left.back(), &left.front()), left.data() + left.size());
const String right_front_u8 =
right.substr(0, size_t(StringUtilities::SeekForwardUTF8(right.c_str() + 1, right.c_str() + right.size()) - right.c_str()));
const int width_kerning = ElementUtilities::GetStringWidth(text_element, right_front_u8, left_back);
Expand Down
8 changes: 7 additions & 1 deletion Source/Core/StringUtilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,10 @@ bool StringUtilities::StringCompareCaseInsensitive(const StringView lhs, const S
return true;
}

Character StringUtilities::ToCharacter(const char* p)
Character StringUtilities::ToCharacter(const char* p, const char* p_end)
{
RMLUI_ASSERTMSG(p && p != p_end, "ToCharacter expects a valid, non-empty input string");

if ((*p & (1 << 7)) == 0)
return static_cast<Character>(*p);

Expand Down Expand Up @@ -459,6 +461,9 @@ Character StringUtilities::ToCharacter(const char* p)
return Character::Null;
}

if (p_end - p < num_bytes)
return Character::Null;

for (int i = 1; i < num_bytes; i++)
{
const char byte = *(p + i);
Expand Down Expand Up @@ -586,6 +591,7 @@ bool StringView::operator==(const StringView& other) const
}

StringIteratorU8::StringIteratorU8(const char* p_begin, const char* p, const char* p_end) : view(p_begin, p_end), p(p) {}
StringIteratorU8::StringIteratorU8(StringView string) : view(string), p(view.begin()) {}
StringIteratorU8::StringIteratorU8(const String& string) : view(string), p(string.data()) {}
StringIteratorU8::StringIteratorU8(const String& string, size_t offset) : view(string), p(string.data() + offset) {}
StringIteratorU8::StringIteratorU8(const String& string, size_t offset, size_t count) : view(string, 0, offset + count), p(string.data() + offset) {}
Expand Down

0 comments on commit 574c78f

Please sign in to comment.