@@ -87,6 +87,31 @@ def _validate_file_paths(source_filename: str, output_pdf_filename: str) -> Tupl
8787 return True , source_path , output_dir
8888
8989
90+ def _safe_extractall (archive : zipfile .ZipFile , target_dir : str ) -> None :
91+ """Extract zip members only if their resolved paths stay within target_dir.
92+
93+ Prevents Zip Slip / path traversal (CWE-22) by resolving symlinks and all
94+ '..' components before comparing each member path against the target root.
95+ Degenerate root entries ('.', '', './') are skipped rather than extracted,
96+ because they carry no file content and resolve to the target directory itself.
97+ """
98+ abs_target = os .path .realpath (target_dir )
99+ for member in archive .infolist ():
100+ member_path = os .path .realpath (os .path .join (abs_target , member .filename ))
101+
102+ # Root/degenerate entries ('.', '', './') resolve to abs_target itself.
103+ # They are directory metadata with no content; skip them safely.
104+ if member_path == abs_target :
105+ continue
106+
107+ # Block any member whose resolved path escapes the target directory.
108+ # The os.sep suffix prevents prefix collisions (e.g. /tmp/d vs /tmp/d_evil).
109+ if not member_path .startswith (abs_target + os .sep ):
110+ raise ValueError (f"Zip Slip blocked: member '{ member .filename } ' would extract outside target directory" )
111+
112+ archive .extract (member , target_dir )
113+
114+
90115def _validate_command_args (cmd_args : List [str ]) -> bool :
91116 """Validate command arguments for dangerous characters."""
92117 dangerous_chars = ["&" , "|" , ";" , "$" , "`" , "(" , ")" , "<" , ">" , "*" , "?" , "[" , "]" , "{" , "}" , "\\ " ]
@@ -338,10 +363,12 @@ def main() -> None:
338363 logging .debug (" --- args = %s" , str (convert_vars .args ))
339364
340365 set_can_convert_to_pdf ()
341- if convert_vars .args .pdf and not convert_vars .can_convert_to_pdf and not convert_vars .args .debug :
366+ libreoffice_available = bool (shutil .which ("libreoffice" ) or shutil .which ("soffice" ))
367+ can_make_pdf = convert_vars .can_convert_to_pdf or libreoffice_available
368+ if convert_vars .args .pdf and not can_make_pdf and not convert_vars .args .debug :
342369 logging .error (
343370 "Cannot convert to pdf on this system. "
344- "Pdf conversion is available on Windows and Mac, if MS Word is installed "
371+ "Pdf conversion is available on Windows and Mac (with MS Word), or on any system with LibreOffice. "
345372 )
346373 return
347374
@@ -966,7 +993,7 @@ def save_odt_file(template_doc: str, language_dict: Dict[str, str], output_file:
966993
967994 # Unzip source xml files and place in temp output folder
968995 with zipfile .ZipFile (template_doc ) as odt_archive :
969- odt_archive . extractall ( temp_output_path )
996+ _safe_extractall ( odt_archive , temp_output_path )
970997
971998 # ODT text is usually in content.xml and sometimes styles.xml
972999 targets = ["content.xml" , "styles.xml" ]
@@ -996,7 +1023,7 @@ def save_idml_file(template_doc: str, language_dict: Dict[str, str], output_file
9961023
9971024 # Unzip source xml files and place in temp output folder
9981025 with zipfile .ZipFile (template_doc ) as idml_archive :
999- idml_archive . extractall ( temp_output_path )
1026+ _safe_extractall ( idml_archive , temp_output_path )
10001027 logging .debug (" --- namelist of first few files in archive = %s" , str (idml_archive .namelist ()[:5 ]))
10011028
10021029 xml_files = get_files_from_of_type (temp_output_path , "xml" )
0 commit comments