Skip to content

Commit b04c3cf

Browse files
committed
feat: Add Parser insecurely_allow_entities param
1 parent d0edd5e commit b04c3cf

File tree

5 files changed

+70
-3
lines changed

5 files changed

+70
-3
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ adheres to [Semantic Versioning](https://semver.org/).
1717

1818
- Add documentation to explain how to import the library
1919
- Warnings due to wrong usage are now using `UserWarning` instead of `RuntimeWarning`
20+
- Undocumented parameter `insecurely_allow_entities` to `Parser`
2021

2122
### :house: Internal
2223

src/bigxml/parser.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from typing import TYPE_CHECKING, Callable, Iterator, Optional, Tuple, Union
2+
import warnings
23

34
from defusedxml.ElementTree import iterparse
45

@@ -73,12 +74,24 @@ def create_node(elem: "Element", iteration: int) -> Union[XMLElement, XMLText]:
7374

7475

7576
class Parser(HandleMgr):
76-
def __init__(self, *streams: Streamable) -> None:
77+
def __init__(
78+
self,
79+
*streams: Streamable,
80+
insecurely_allow_entities: bool = False,
81+
) -> None:
82+
if insecurely_allow_entities:
83+
warnings.warn(
84+
"Using 'insecurely_allow_entities' makes your code vulnerable to some XML attacks."
85+
" Are you sure you trust where the input streams are coming from?",
86+
UserWarning,
87+
stacklevel=1,
88+
)
7789
iterator = IterWithRollback(
7890
rewrite_exceptions(
7991
iterparse(
8092
StreamChain(*streams),
8193
("start", "end"),
94+
forbid_entities=not insecurely_allow_entities,
8295
)
8396
)
8497
)

stubs/defusedxml/ElementTree.pyi

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,27 @@
1-
# note: only used items are defined here
1+
# ruff: noqa: FBT001
2+
# note: only used items are defined here, with used typing
23

3-
from xml.etree.ElementTree import Element, ParseError, iterparse
4+
import sys
5+
from typing import Iterator, Optional, Sequence, TypeVar
6+
from xml.etree.ElementTree import Element, ParseError
7+
8+
if sys.version_info < (3, 8): # pragma: no cover
9+
from typing_extensions import Protocol
10+
else: # pragma: no cover
11+
from typing import Protocol
12+
13+
_T_co = TypeVar("_T_co", covariant=True)
14+
15+
class _SupportsRead(Protocol[_T_co]):
16+
def read(self, size: Optional[int] = None) -> _T_co: ...
17+
18+
def iterparse(
19+
source: _SupportsRead[bytes],
20+
events: Sequence[str] | None = None,
21+
forbid_dtd: bool = False,
22+
forbid_entities: bool = True,
23+
forbid_external: bool = False,
24+
) -> Iterator[tuple[str, Element]]: ...
425

526
class DefusedXmlException(ValueError): ... # noqa: N818
627

tests/integration/test_security.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,16 @@ def test_external_entities(xml: bytes, msg: str) -> None:
101101
Parser(xml).return_from(handler_get_text)
102102
assert str(exc_info.value) == msg
103103
assert exc_info.value.security
104+
105+
106+
def test_insecurely_allow_entities() -> None:
107+
xml = (
108+
b"<!DOCTYPE foobar [\n"
109+
b' <!ENTITY Omega "&#937;">\n'
110+
b"]>\n"
111+
b"<root>&Omega;</root>\n"
112+
)
113+
with pytest.warns(UserWarning):
114+
parser = Parser(xml, insecurely_allow_entities=True)
115+
value = parser.return_from(handler_get_text)
116+
assert value == "Ω"

tests/unit/test_parser.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ def test_root_level(
8080
elem_b_node = elem("bar", parents=(root_node,), attributes={"abc": "def"})
8181
text_h_node = XMLText("Hello", (root_node,))
8282
text_w_node = XMLText("World", (root_node,))
83+
text_pi_node = XMLText("π", (root_node,))
8384

8485
# to make sure that text are not in buffer, we generate huge texts
8586
BIG_TEXT_LEN = 1_000_000
@@ -186,3 +187,21 @@ def root_handler(
186187
assert list(parser.iter_from(root_handler)) == [
187188
(f"handler-yield-{i}", node) for i, node in enumerate(nodes)
188189
]
190+
191+
192+
def test_insecurely_allow_entities(
193+
# pylint: disable=redefined-outer-name
194+
handler: HANDLER_TYPE,
195+
) -> None:
196+
xml = b'<!DOCTYPE money [<!ENTITY pi "&#960;">]><root>&pi;</root>'
197+
198+
@xml_handle_element("root")
199+
def root_handler(
200+
node: XMLElement,
201+
) -> Iterator[Tuple[str, Union[XMLElement, XMLText]]]:
202+
yield from node.iter_from(handler)
203+
204+
with pytest.warns(UserWarning):
205+
parser = Parser(xml, insecurely_allow_entities=True)
206+
207+
assert list(parser.iter_from(root_handler)) == [("handler-yield-0", text_pi_node)]

0 commit comments

Comments
 (0)