Skip to content

Commit 3fa91ca

Browse files
authored
Merge pull request #2327 from ashnaaseth2325-oss/fix/zip-slip-extraction
Fix zip slip in ODT/IDML template extraction
2 parents 72db0b0 + fa7dbf7 commit 3fa91ca

File tree

3 files changed

+87
-5
lines changed

3 files changed

+87
-5
lines changed

.github/workflows/build-website.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
- name: Install Node.js
3838
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
3939
with:
40-
node-version: 20.18.2
40+
node-version: '22.12'
4141
- name: Build
4242
working-directory: cornucopia.owasp.org
4343
run: |

scripts/convert.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,31 @@ def _validate_file_paths(source_filename: str, output_pdf_filename: str) -> Tupl
8787
return True, source_path, output_dir
8888

8989

90+
def _safe_extractall(archive: zipfile.ZipFile, target_dir: str) -> None:
91+
"""Extract zip members only if their resolved paths stay within target_dir.
92+
93+
Prevents Zip Slip / path traversal (CWE-22) by resolving symlinks and all
94+
'..' components before comparing each member path against the target root.
95+
Degenerate root entries ('.', '', './') are skipped rather than extracted,
96+
because they carry no file content and resolve to the target directory itself.
97+
"""
98+
abs_target = os.path.realpath(target_dir)
99+
for member in archive.infolist():
100+
member_path = os.path.realpath(os.path.join(abs_target, member.filename))
101+
102+
# Root/degenerate entries ('.', '', './') resolve to abs_target itself.
103+
# They are directory metadata with no content; skip them safely.
104+
if member_path == abs_target:
105+
continue
106+
107+
# Block any member whose resolved path escapes the target directory.
108+
# The os.sep suffix prevents prefix collisions (e.g. /tmp/d vs /tmp/d_evil).
109+
if not member_path.startswith(abs_target + os.sep):
110+
raise ValueError(f"Zip Slip blocked: member '{member.filename}' would extract outside target directory")
111+
112+
archive.extract(member, target_dir)
113+
114+
90115
def _validate_command_args(cmd_args: List[str]) -> bool:
91116
"""Validate command arguments for dangerous characters."""
92117
dangerous_chars = ["&", "|", ";", "$", "`", "(", ")", "<", ">", "*", "?", "[", "]", "{", "}", "\\"]
@@ -338,10 +363,12 @@ def main() -> None:
338363
logging.debug(" --- args = %s", str(convert_vars.args))
339364

340365
set_can_convert_to_pdf()
341-
if convert_vars.args.pdf and not convert_vars.can_convert_to_pdf and not convert_vars.args.debug:
366+
libreoffice_available = bool(shutil.which("libreoffice") or shutil.which("soffice"))
367+
can_make_pdf = convert_vars.can_convert_to_pdf or libreoffice_available
368+
if convert_vars.args.pdf and not can_make_pdf and not convert_vars.args.debug:
342369
logging.error(
343370
"Cannot convert to pdf on this system. "
344-
"Pdf conversion is available on Windows and Mac, if MS Word is installed"
371+
"Pdf conversion is available on Windows and Mac (with MS Word), or on any system with LibreOffice."
345372
)
346373
return
347374

@@ -966,7 +993,7 @@ def save_odt_file(template_doc: str, language_dict: Dict[str, str], output_file:
966993

967994
# Unzip source xml files and place in temp output folder
968995
with zipfile.ZipFile(template_doc) as odt_archive:
969-
odt_archive.extractall(temp_output_path)
996+
_safe_extractall(odt_archive, temp_output_path)
970997

971998
# ODT text is usually in content.xml and sometimes styles.xml
972999
targets = ["content.xml", "styles.xml"]
@@ -996,7 +1023,7 @@ def save_idml_file(template_doc: str, language_dict: Dict[str, str], output_file
9961023

9971024
# Unzip source xml files and place in temp output folder
9981025
with zipfile.ZipFile(template_doc) as idml_archive:
999-
idml_archive.extractall(temp_output_path)
1026+
_safe_extractall(idml_archive, temp_output_path)
10001027
logging.debug(" --- namelist of first few files in archive = %s", str(idml_archive.namelist()[:5]))
10011028

10021029
xml_files = get_files_from_of_type(temp_output_path, "xml")

tests/scripts/convert_utest.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
import unittest
22
import unittest.mock as mock
33
import argparse
4+
import io
45
import os
56
import platform
67
import sys
8+
import tempfile
9+
import zipfile
710
import docx # type: ignore
811
import logging
912
import glob
@@ -2156,5 +2159,57 @@ def test_get_paragraphs_from_table_in_doc(self) -> None:
21562159
self.assertGreater(len(paragraphs), want_min_len_paragraphs)
21572160

21582161

2162+
class TestSafeExtractAll(unittest.TestCase):
2163+
"""Unit tests for _safe_extractall covering CWE-22 path traversal prevention."""
2164+
2165+
def _build_zip(self, members: typing.Dict[str, str]) -> io.BytesIO:
2166+
buf = io.BytesIO()
2167+
with zipfile.ZipFile(buf, "w") as zf:
2168+
for name, content in members.items():
2169+
zf.writestr(zipfile.ZipInfo(name), content)
2170+
buf.seek(0)
2171+
return buf
2172+
2173+
def test_blocks_parent_directory_traversal(self) -> None:
2174+
"""Zip Slip via '../' in member filename must raise ValueError."""
2175+
buf = self._build_zip({"../evil.txt": "pwned"})
2176+
with tempfile.TemporaryDirectory() as td:
2177+
with zipfile.ZipFile(buf) as zf:
2178+
with self.assertRaises(ValueError) as ctx:
2179+
c._safe_extractall(zf, td)
2180+
self.assertIn("Zip Slip blocked", str(ctx.exception))
2181+
2182+
def test_blocks_absolute_path_member(self) -> None:
2183+
"""/etc/passwd as a member filename must be blocked."""
2184+
buf = self._build_zip({"/etc/passwd": "pwned"})
2185+
with tempfile.TemporaryDirectory() as td:
2186+
with zipfile.ZipFile(buf) as zf:
2187+
with self.assertRaises(ValueError):
2188+
c._safe_extractall(zf, td)
2189+
2190+
def test_allows_legitimate_nested_members(self) -> None:
2191+
"""Normal nested paths must extract correctly."""
2192+
buf = self._build_zip({"content.xml": "<r/>", "subdir/file.xml": "<s/>"})
2193+
with tempfile.TemporaryDirectory() as td:
2194+
with zipfile.ZipFile(buf) as zf:
2195+
c._safe_extractall(zf, td)
2196+
self.assertTrue(os.path.isfile(os.path.join(td, "content.xml")))
2197+
self.assertTrue(os.path.isfile(os.path.join(td, "subdir", "file.xml")))
2198+
2199+
def test_skips_root_dot_entry(self) -> None:
2200+
"""A '.' root directory entry must be skipped without error."""
2201+
buf = io.BytesIO()
2202+
with zipfile.ZipFile(buf, "w") as zf:
2203+
dot = zipfile.ZipInfo(".")
2204+
dot.external_attr = 0o40755 << 16
2205+
zf.writestr(dot, "")
2206+
zf.writestr("content.xml", "<r/>")
2207+
buf.seek(0)
2208+
with tempfile.TemporaryDirectory() as td:
2209+
with zipfile.ZipFile(buf) as zf:
2210+
c._safe_extractall(zf, td)
2211+
self.assertTrue(os.path.isfile(os.path.join(td, "content.xml")))
2212+
2213+
21592214
if __name__ == "__main__":
21602215
unittest.main()

0 commit comments

Comments
 (0)