From 7b3a0942f03b7626830ed59579210f505f5599a2 Mon Sep 17 00:00:00 2001 From: Tim Ledbetter Date: Sat, 3 Aug 2024 14:54:02 +0100 Subject: [PATCH] LibXML: Include line and column numbers in error messages --- Userland/Libraries/LibXML/Parser/Parser.cpp | 54 +++++++++++---------- Userland/Libraries/LibXML/Parser/Parser.h | 6 +-- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/Userland/Libraries/LibXML/Parser/Parser.cpp b/Userland/Libraries/LibXML/Parser/Parser.cpp index 9048d7ccd995d..c1636758833c6 100644 --- a/Userland/Libraries/LibXML/Parser/Parser.cpp +++ b/Userland/Libraries/LibXML/Parser/Parser.cpp @@ -195,7 +195,7 @@ ErrorOr Parser::skip_whitespace(Required required) // S ::= (#x20 | #x9 | #xD | #xA)+ auto matched = m_lexer.consume_while(is_any_of("\x20\x09\x0d\x0a"sv)); if (required == Required::Yes && matched.is_empty()) - return parse_error(m_lexer.tell(), "Expected whitespace"); + return parse_error(m_lexer.current_position(), "Expected whitespace"); rollback.disarm(); return {}; @@ -220,12 +220,12 @@ ErrorOr Parser::parse_internal() auto matched_source = m_source.substring_view(0, m_lexer.tell()); if (auto it = find_if(matched_source.begin(), matched_source.end(), s_restricted_characters); !it.is_end()) { return parse_error( - it.index(), + m_lexer.position_for(it.index()), ByteString::formatted("Invalid character #{:x} used in document", *it)); } if (!m_lexer.is_eof()) - return parse_error(m_lexer.tell(), "Garbage after document"); + return parse_error(m_lexer.current_position(), "Garbage after document"); return {}; } @@ -235,8 +235,12 @@ ErrorOr Parser::expect(StringView expected) auto rollback = rollback_point(); if (!m_lexer.consume_specific(expected)) { - if (m_options.treat_errors_as_fatal) - return parse_error(m_lexer.tell(), ByteString::formatted("Expected '{}'", expected)); + if (m_options.treat_errors_as_fatal) { + if (expected == ") ErrorOr) ErrorOr Parser::parse_version_info() m_in_compatibility_mode = true; } else { if (version_string != "1.1" && m_options.treat_errors_as_fatal) - return parse_error(m_lexer.tell(), ByteString::formatted("Expected '1.1', found '{}'", version_string)); + return parse_error(m_lexer.current_position(), ByteString::formatted("Expected '1.1', found '{}'", version_string)); } m_version = Version::Version11; @@ -415,7 +419,7 @@ ErrorOr Parser::parse_standalone_document_decl() auto value = m_lexer.consume_quoted_string(); if (!value.is_one_of("yes", "no")) - return parse_error(m_lexer.tell() - value.length(), "Expected one of 'yes' or 'no'"); + return parse_error(m_lexer.position_for(m_lexer.tell() - value.length()), "Expected one of 'yes' or 'no'"); m_standalone = value == "yes"; @@ -445,7 +449,7 @@ ErrorOr Parser::parse_misc() return {}; } - return parse_error(m_lexer.tell(), "Expected a match for 'Misc', but found none"); + return parse_error(m_lexer.current_position(), "Expected a match for 'Misc', but found none"); } // 2.5.15 Comment, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Comment @@ -521,7 +525,7 @@ ErrorOr Parser::parse_processing_instruction_target() if (target.equals_ignoring_ascii_case("xml"sv) && m_options.treat_errors_as_fatal) { return parse_error( - m_lexer.tell() - target.length(), + m_lexer.position_for(m_lexer.tell() - target.length()), "Use of the reserved 'xml' name for processing instruction target name is disallowed"); } @@ -579,7 +583,7 @@ ErrorOr Parser::parse_doctype_decl() auto declarations = TRY(parse_external_subset()); if (!m_lexer.is_eof()) { return parse_error( - m_lexer.tell(), + m_lexer.current_position(), ByteString::formatted("Failed to resolve external subset '{}': garbage after declarations", doctype.external_id->system_id.system_literal)); } doctype.markup_declarations.extend(move(declarations)); @@ -634,7 +638,7 @@ ErrorOr Parser::parse_element() // Well-formedness constraint: The Name in an element's end-tag MUST match the element type in the start-tag. if (m_options.treat_errors_as_fatal && closing_name != tag.name) - return parse_error(tag_location, "Invalid closing tag"); + return parse_error(m_lexer.position_for(tag_location), "Invalid closing tag"); rollback.disarm(); return {}; @@ -720,7 +724,7 @@ ErrorOr Parser::parse_attribute_value_inner(StringView d if (m_lexer.next_is('<')) { // Not allowed, return a nice error to make it easier to debug. - return parse_error(m_lexer.tell(), "Unescaped '<' not allowed in attribute values"); + return parse_error(m_lexer.current_position(), "Unescaped '<' not allowed in attribute values"); } if (m_lexer.next_is('&')) { @@ -774,7 +778,7 @@ ErrorOr, ParseError> Parser::parse_ } if (!code_point.has_value() || !s_characters.contains(*code_point)) - return parse_error(reference_start, "Invalid character reference"); + return parse_error(m_lexer.position_for(reference_start), "Invalid character reference"); TRY(expect(";"sv)); @@ -995,7 +999,7 @@ ErrorOr, ParseError> Parser::parse_markup_declaratio return Optional {}; } - return parse_error(m_lexer.tell(), "Expected one of elementdecl, attlistdecl, entitydecl, notationdecl, PI or comment"); + return parse_error(m_lexer.current_position(), "Expected one of elementdecl, attlistdecl, entitydecl, notationdecl, PI or comment"); } // 2.8.28a DeclSep, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-DeclSep @@ -1016,7 +1020,7 @@ ErrorOr, ParseError> Parser::parse_declaration_separator() return Optional {}; } - return parse_error(m_lexer.tell(), "Expected either whitespace, or a PEReference"); + return parse_error(m_lexer.current_position(), "Expected either whitespace, or a PEReference"); } // 4.1.69 PEReference, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PEReference @@ -1269,7 +1273,7 @@ ErrorOr Parser::parse_content_spec( if (auto result = parse_name(); !result.is_error()) names.set(result.release_value()); else - return parse_error(m_lexer.tell(), "Expected a Name"); + return parse_error(m_lexer.current_position(), "Expected a Name"); } TRY(skip_whitespace()); TRY(expect(")*"sv)); @@ -1331,7 +1335,7 @@ ErrorOr Parser::parse_content_spec( TRY(expect(")"sv)); if (choices.size() < 2) - return parse_error(m_lexer.tell(), "Expected more than one choice"); + return parse_error(m_lexer.current_position(), "Expected more than one choice"); TRY(skip_whitespace()); auto qualifier = parse_qualifier(); @@ -1709,7 +1713,7 @@ ErrorOr Parser::resolve_reference(EntityReference const& { static HashTable reference_lookup {}; if (reference_lookup.contains(reference.name)) - return parse_error(m_lexer.tell(), ByteString::formatted("Invalid recursive definition for '{}'", reference.name)); + return parse_error(m_lexer.current_position(), ByteString::formatted("Invalid recursive definition for '{}'", reference.name)); reference_lookup.set(reference.name); ScopeGuard remove_lookup { @@ -1737,17 +1741,17 @@ ErrorOr Parser::resolve_reference(EntityReference const& }, [&](EntityDefinition const& definition) -> ErrorOr { if (placement == ReferencePlacement::AttributeValue) - return parse_error(m_lexer.tell(), ByteString::formatted("Attribute references external entity '{}'", reference.name)); + return parse_error(m_lexer.current_position(), ByteString::formatted("Attribute references external entity '{}'", reference.name)); if (definition.notation.has_value()) - return parse_error(0u, ByteString::formatted("Entity reference to unparsed entity '{}'", reference.name)); + return parse_error(m_lexer.position_for(0), ByteString::formatted("Entity reference to unparsed entity '{}'", reference.name)); if (!m_options.resolve_external_resource) - return parse_error(0u, ByteString::formatted("Failed to resolve external entity '{}'", reference.name)); + return parse_error(m_lexer.position_for(0), ByteString::formatted("Failed to resolve external entity '{}'", reference.name)); auto result = m_options.resolve_external_resource(definition.id.system_id, definition.id.public_id); if (result.is_error()) - return parse_error(0u, ByteString::formatted("Failed to resolve external entity '{}': {}", reference.name, result.error())); + return parse_error(m_lexer.position_for(0), ByteString::formatted("Failed to resolve external entity '{}': {}", reference.name, result.error())); resolved = result.release_value(); return {}; @@ -1767,7 +1771,7 @@ ErrorOr Parser::resolve_reference(EntityReference const& return "'"; if (reference.name == "quot") return "\""; - return parse_error(0u, ByteString::formatted("Reference to undeclared entity '{}'", reference.name)); + return parse_error(m_lexer.position_for(0), ByteString::formatted("Reference to undeclared entity '{}'", reference.name)); } StringView resolved_source = *resolved; diff --git a/Userland/Libraries/LibXML/Parser/Parser.h b/Userland/Libraries/LibXML/Parser/Parser.h index 4b0588c1d592e..95e0f62b511a1 100644 --- a/Userland/Libraries/LibXML/Parser/Parser.h +++ b/Userland/Libraries/LibXML/Parser/Parser.h @@ -22,7 +22,7 @@ namespace XML { struct ParseError { - size_t offset; + LineTrackingLexer::Position position {}; ByteString error; }; @@ -185,7 +185,7 @@ class Parser { if (rule_name.starts_with("parse_"sv)) rule_name = rule_name.substring_view(6); m_parse_errors.append({ - error.offset, + error.position, ByteString::formatted("{}: {}", rule_name, error.error), }); } @@ -219,6 +219,6 @@ template<> struct AK::Formatter : public AK::Formatter { ErrorOr format(FormatBuilder& builder, XML::ParseError const& error) { - return Formatter::format(builder, "{} at offset {}"sv, error.error, error.offset); + return Formatter::format(builder, "{} at line: {}, col: {} (offset {})"sv, error.error, error.position.line, error.position.column, error.position.offset); } };