Skip to content

Commit

Permalink
LibWeb: Support finding text split across multiple text nodes
Browse files Browse the repository at this point in the history
Previously, the find in page function would fail to find text which was
split across multiple text nodes. For example, given the following
markup: `WH<span>F` the query `WHF` would previously fail to be
matched.

This is done by traversing all of the document's text nodes -
constructing a complete string to query against and keeping track of
the locations where that string is split across multiple nodes.
  • Loading branch information
tcl3 authored and awesomekling committed Jun 14, 2024
1 parent dbc94ce commit ec4d298
Showing 1 changed file with 53 additions and 17 deletions.
70 changes: 53 additions & 17 deletions Userland/Libraries/LibWeb/DOM/Document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5113,29 +5113,65 @@ Vector<JS::Handle<DOM::Range>> Document::find_matching_text(String const& query,
if (!document_element() || !document_element()->layout_node())
return {};

Vector<JS::Handle<DOM::Range>> matches;
document_element()->layout_node()->for_each_in_inclusive_subtree_of_type<Layout::TextNode>([&](auto const& text_node) {
auto const& text = text_node.text_for_rendering();
size_t offset = 0;
while (true) {
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
? text.find_byte_offset_ignoring_case(query, offset)
: text.find_byte_offset(query, offset);
if (!match_index.has_value())
break;

auto range = create_range();
auto& dom_node = const_cast<DOM::Text&>(text_node.dom_node());
(void)range->set_start(dom_node, match_index.value());
(void)range->set_end(dom_node, match_index.value() + query.code_points().length());
struct TextPositionNode {
DOM::Text& dom_node;
size_t start_offset { 0 };
};

matches.append(range);
offset = match_index.value() + 1;
StringBuilder builder;
Vector<TextPositionNode> text_positions;
size_t current_start_position = 0;
String current_node_text;
document_element()->layout_node()->for_each_in_inclusive_subtree_of_type<Layout::TextNode>([&](auto const& text_node) {
auto& dom_node = const_cast<DOM::Text&>(text_node.dom_node());
if (text_positions.is_empty()) {
text_positions.empend(dom_node);
} else {
current_start_position += current_node_text.bytes_as_string_view().length();
text_positions.empend(dom_node, current_start_position);
}

current_node_text = text_node.text_for_rendering();
builder.append(current_node_text);
return TraversalDecision::Continue;
});

if (text_positions.is_empty())
return {};

size_t offset = 0;
auto* match_start_position = &text_positions[0];
auto text = builder.to_string_without_validation();
Vector<JS::Handle<DOM::Range>> matches;
while (true) {
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
? text.find_byte_offset_ignoring_case(query, offset)
: text.find_byte_offset(query, offset);
if (!match_index.has_value())
break;

size_t i = 0;
for (; i < text_positions.size() && match_index.value() > text_positions[i].start_offset; ++i)
match_start_position = &text_positions[i];

auto range = create_range();
auto start_position = match_index.value() - match_start_position->start_offset;
auto& start_dom_node = match_start_position->dom_node;
(void)range->set_start(start_dom_node, start_position);

auto* match_end_position = match_start_position;
for (; i < text_positions.size() && match_index.value() + query.bytes_as_string_view().length() > text_positions[i].start_offset; ++i)
match_end_position = &text_positions[i];

auto& end_dom_node = match_end_position->dom_node;
auto end_position = match_index.value() - match_end_position->start_offset + query.bytes_as_string_view().length();
(void)range->set_end(end_dom_node, end_position);

matches.append(range);
offset = match_index.value() + query.bytes_as_string_view().length() + 1;
match_start_position = match_end_position;
}

return matches;
}

Expand Down

0 comments on commit ec4d298

Please sign in to comment.