From 28252ea3e578b7cecd8a5b7ea10ad60728b98755 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Jun 2023 11:08:27 -0700 Subject: [PATCH] Don't allow `[` or `]` in XML names. This is an example of a DOCTYPE that was not being parsed correctly before: ``` ]> ``` xml-conduit was parsing `language[` as the root element name. I have kept to the most minimal possible change in this PR, because I don't want to break anything inadvertently. However, the current parser is still far from correct. As I understand it, only a few symbols (`_`, `-`, `.`) are allowed in element names (in addition, `:` can be used for a namespace, but that is supported separately in this parser). The current parser would accept things like ``. --- xml-conduit/src/Text/XML/Stream/Parse.hs | 2 ++ xml-conduit/test/unit.hs | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/xml-conduit/src/Text/XML/Stream/Parse.hs b/xml-conduit/src/Text/XML/Stream/Parse.hs index 6112c5c..93c450a 100644 --- a/xml-conduit/src/Text/XML/Stream/Parse.hs +++ b/xml-conduit/src/Text/XML/Stream/Parse.hs @@ -587,6 +587,8 @@ parseIdent = valid '/' = False valid ';' = False valid '#' = False + valid '[' = False + valid ']' = False valid c = not $ isXMLSpace c parseContent :: ParseSettings diff --git a/xml-conduit/test/unit.hs b/xml-conduit/test/unit.hs index 092432d..eab1eb3 100644 --- a/xml-conduit/test/unit.hs +++ b/xml-conduit/test/unit.hs @@ -735,7 +735,7 @@ testRenderComments =do resolvedInline :: Assertion resolvedInline = do - Res.Document _ root _ <- return $ Res.parseLBS_ Res.def "]>&bar;" + Res.Document _ root _ <- return $ Res.parseLBS_ Res.def "]>&bar;" root @?= Res.Element "foo" Map.empty [Res.NodeContent "baz"] Res.Document _ root2 _ <- return $ Res.parseLBS_ Res.def "]>" root2 @?= Res.Element "foo" (Map.singleton "bar" "baz") []