Skip to content

Commit

Permalink
Add psIgnoreInternalEntityDeclarations flag
Browse files Browse the repository at this point in the history
  • Loading branch information
poorlyknitsweater authored and k0ral committed Mar 31, 2023
1 parent 1a2825b commit d91f6bf
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
13 changes: 11 additions & 2 deletions xml-conduit/src/Text/XML/Stream/Parse.hs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ module Text.XML.Stream.Parse
, psDecodeIllegalCharacters
, psRetainNamespaces
, psEntityExpansionSizeLimit
, psIgnoreInternalEntityDeclarations
-- *** Entity decoding
, decodeXmlEntities
, decodeHtmlEntities
Expand Down Expand Up @@ -465,6 +466,10 @@ data ParseSettings = ParseSettings
-- Default: @8192@
--
-- Since 1.9.1
, psIgnoreInternalEntityDeclarations :: Bool
-- ^ Whether to resolve any but the predefined entities.
--
-- Default: @False@
}

instance Default ParseSettings where
Expand All @@ -473,6 +478,7 @@ instance Default ParseSettings where
, psRetainNamespaces = False
, psDecodeIllegalCharacters = const Nothing
, psEntityExpansionSizeLimit = 8192
, psIgnoreInternalEntityDeclarations = False
}

conduitToken :: MonadThrow m => ParseSettings -> ConduitT T.Text (PositionRange, Token) m ()
Expand Down Expand Up @@ -536,7 +542,10 @@ parseToken settings = do
do char' '['
ents <- parseDeclarations id
skipSpace
return ents
if psIgnoreInternalEntityDeclarations settings then
return []
else
return ents
_ -> return []
char' '>'
newline <|> return ()
Expand Down Expand Up @@ -632,7 +641,7 @@ parseContent :: ParseSettings
-> Bool -- break on double quote
-> Bool -- break on single quote
-> Parser Content
parseContent (ParseSettings decodeEntities _ decodeIllegalCharacters _) breakDouble breakSingle = parseReference <|> (parseTextContent <?> "text content") where
parseContent (ParseSettings decodeEntities _ decodeIllegalCharacters _ _) breakDouble breakSingle = parseReference <|> (parseTextContent <?> "text content") where
parseReference = do
char' '&'
t <- parseEntityRef <|> parseHexCharRef <|> parseDecCharRef
Expand Down
24 changes: 22 additions & 2 deletions xml-conduit/test/unit.hs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ main = hspec $ do
it "identifies unresolved entities" resolvedIdentifies
it "decodeHtmlEntities" testHtmlEntities
it "works for resolvable entities" resolvedAllGood
it "ignores custom entities when psResolveEntities is False" dontResolveEntities
it "merges adjacent content nodes" resolvedMergeContent
it "understands inline entity declarations" resolvedInline
it "understands complex inline with markup" resolvedInlineComplex
Expand Down Expand Up @@ -671,8 +672,8 @@ cursorParent, cursorAncestor, cursorOrSelf, cursorPreceding, cursorFollowing,
cursorAnyElement, cursorElement, cursorLaxElement, cursorContent,
cursorAttribute, cursorLaxAttribute, cursorHasAttribute,
cursorAttributeIs, cursorDeep, cursorForce, cursorForceM,
resolvedIdentifies, resolvedAllGood, resolvedMergeContent,
testHtmlEntities
resolvedIdentifies, resolvedAllGood, dontResolveEntities,
resolvedMergeContent, testHtmlEntities
:: Assertion
cursorParent = name (Cu.parent bar2) @?= ["foo"]
cursorAncestor = name (Cu.ancestor baz2) @?= ["bar2", "foo"]
Expand Down Expand Up @@ -749,6 +750,25 @@ resolvedAllGood =
where
xml = "<foo><bar/><baz/></foo>"

dontResolveEntities =
D.parseLBS_ settings xml @=? expectedDocument
where
settings = def { P.psIgnoreInternalEntityDeclarations = True }
xml = mconcat
[ "<!DOCTYPE mydt [ <!ENTITY foo \"fooby\" > ]>"
, "<root>&gt;&foo;&bar;</root>"
]

expectedDocument =
Document
(Prologue [] (Just (Doctype "mydt" Nothing)) [])
(Element "root" mempty
[ NodeContent (ContentText ">")
, NodeContent (ContentEntity "foo")
, NodeContent (ContentEntity "bar")
])
[]

resolvedMergeContent =
Res.documentRoot (Res.parseLBS_ def xml) @=?
Res.Element "foo" Map.empty [Res.NodeContent "bar&baz"]
Expand Down

0 comments on commit d91f6bf

Please sign in to comment.