diff --git a/xml-conduit/src/Text/XML/Stream/Parse.hs b/xml-conduit/src/Text/XML/Stream/Parse.hs index a6734bf..ae67cad 100644 --- a/xml-conduit/src/Text/XML/Stream/Parse.hs +++ b/xml-conduit/src/Text/XML/Stream/Parse.hs @@ -82,6 +82,7 @@ module Text.XML.Stream.Parse , psDecodeIllegalCharacters , psRetainNamespaces , psEntityExpansionSizeLimit + , psIgnoreInternalEntityDeclarations -- *** Entity decoding , decodeXmlEntities , decodeHtmlEntities @@ -465,6 +466,10 @@ data ParseSettings = ParseSettings -- Default: @8192@ -- -- Since 1.9.1 + , psIgnoreInternalEntityDeclarations :: Bool + -- ^ Whether to resolve any but the predefined entities. + -- + -- Default: @False@ } instance Default ParseSettings where @@ -473,6 +478,7 @@ instance Default ParseSettings where , psRetainNamespaces = False , psDecodeIllegalCharacters = const Nothing , psEntityExpansionSizeLimit = 8192 + , psIgnoreInternalEntityDeclarations = False } conduitToken :: MonadThrow m => ParseSettings -> ConduitT T.Text (PositionRange, Token) m () @@ -536,7 +542,10 @@ parseToken settings = do do char' '[' ents <- parseDeclarations id skipSpace - return ents + if psIgnoreInternalEntityDeclarations settings then + return [] + else + return ents _ -> return [] char' '>' newline <|> return () @@ -632,7 +641,7 @@ parseContent :: ParseSettings -> Bool -- break on double quote -> Bool -- break on single quote -> Parser Content -parseContent (ParseSettings decodeEntities _ decodeIllegalCharacters _) breakDouble breakSingle = parseReference <|> (parseTextContent "text content") where +parseContent (ParseSettings decodeEntities _ decodeIllegalCharacters _ _) breakDouble breakSingle = parseReference <|> (parseTextContent "text content") where parseReference = do char' '&' t <- parseEntityRef <|> parseHexCharRef <|> parseDecCharRef diff --git a/xml-conduit/test/unit.hs b/xml-conduit/test/unit.hs index a7e4379..aa6d8f9 100644 --- a/xml-conduit/test/unit.hs +++ b/xml-conduit/test/unit.hs @@ -88,6 +88,7 @@ main = hspec $ do it "identifies unresolved entities" resolvedIdentifies it "decodeHtmlEntities" testHtmlEntities it "works for resolvable entities" resolvedAllGood + it "ignores custom entities when psResolveEntities is False" dontResolveEntities it "merges adjacent content nodes" resolvedMergeContent it "understands inline entity declarations" resolvedInline it "understands complex inline with markup" resolvedInlineComplex @@ -671,8 +672,8 @@ cursorParent, cursorAncestor, cursorOrSelf, cursorPreceding, cursorFollowing, cursorAnyElement, cursorElement, cursorLaxElement, cursorContent, cursorAttribute, cursorLaxAttribute, cursorHasAttribute, cursorAttributeIs, cursorDeep, cursorForce, cursorForceM, - resolvedIdentifies, resolvedAllGood, resolvedMergeContent, - testHtmlEntities + resolvedIdentifies, resolvedAllGood, dontResolveEntities, + resolvedMergeContent, testHtmlEntities :: Assertion cursorParent = name (Cu.parent bar2) @?= ["foo"] cursorAncestor = name (Cu.ancestor baz2) @?= ["bar2", "foo"] @@ -749,6 +750,25 @@ resolvedAllGood = where xml = "" +dontResolveEntities = + D.parseLBS_ settings xml @=? expectedDocument + where + settings = def { P.psIgnoreInternalEntityDeclarations = True } + xml = mconcat + [ " ]>" + , ">&foo;&bar;" + ] + + expectedDocument = + Document + (Prologue [] (Just (Doctype "mydt" Nothing)) []) + (Element "root" mempty + [ NodeContent (ContentText ">") + , NodeContent (ContentEntity "foo") + , NodeContent (ContentEntity "bar") + ]) + [] + resolvedMergeContent = Res.documentRoot (Res.parseLBS_ def xml) @=? Res.Element "foo" Map.empty [Res.NodeContent "bar&baz"]