Skip to content

Commit

Permalink
feat(export-script): convert all ".zip" files within the archive to "…
Browse files Browse the repository at this point in the history
….7z"
  • Loading branch information
mabw-rte committed Apr 25, 2024
1 parent f44dc68 commit f0e1637
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 1 deletion.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ requests~=2.27.1
SQLAlchemy~=1.4.46
starlette~=0.17.1
typing_extensions~=4.7.1
uvicorn[standard]~=0.15.0
uvicorn[standard]~=0.15.0
tqdm~=4.64.1
50 changes: 50 additions & 0 deletions scripts/zip_to_7z.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""
This file is used to convert all zip files in an input archive directory to 7z format.
Please do not run this script, before extensive testing on a sample copy of the data.
"""
import argparse
import os
import shutil
import zipfile
from pathlib import Path

from py7zr import SevenZipFile
from tqdm import tqdm

if __name__ == "__main__":
# Parse the input arguments
parser = argparse.ArgumentParser(description="Convert all archives in an input directory to 7z format.")
parser.add_argument(
"archive_dir", type=str, help="The input archive directory containing the zip files to convert."
)
args = parser.parse_args()

# define the input archive directory
archive_dir = Path(args.archive_dir)

# Check if the input archive directory exists
if not archive_dir.exists():
raise FileNotFoundError(f"The input archive directory '{archive_dir}' does not exist.")

# Convert all zip files in the input archive directory to 7z format
for zip_file in tqdm(list(archive_dir.iterdir())):
# Check if the file is a zip file, otherwise skip it
if zip_file.suffix != ".zip":
continue

# Extract the zip file to a temporary directory, then compress it to 7z format
with zipfile.ZipFile(zip_file, "r") as zip_ref:
temp_dir = zip_file.parent.joinpath(zip_file.stem)
zip_ref.extractall(temp_dir.parent)
# Check if the temporary directory exists (deduced from the zip file)
assert temp_dir.exists()

try:
# Compress the temporary directory to 7z format
with SevenZipFile(zip_file.with_suffix(".7z"), "w") as seven_zip:
seven_zip.writeall(temp_dir, arcname=".")
# if the conversion is successful, remove the zip file
os.remove(zip_file)
finally:
# Remove the temporary directory even if an exception occurs
shutil.rmtree(temp_dir)
35 changes: 35 additions & 0 deletions scripts/zip_to_7z.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash

# Check if the input archive directory exists
if [ ! -d "$1" ]; then
echo "The input archive directory '$1' does not exist."
exit 1
fi

# number of zip files in the input archive directory
zip_files_total=$(find "$1" -type f -name "*.zip" | wc -l)
# progression counter
progression=0

# Convert all zip files in the input archive directory to 7z format
for zip_file in "$1"/*.zip; do
# Extract the zip file to a temporary directory
temp_dir="${zip_file%.*}"
progression=$((progression + 1))
echo "[$progression/$zip_files_total] Converting $zip_file to 7z format"
echo "unzipping $zip_file to $temp_dir"
mkdir -p "$temp_dir"
# unzip in the parent directory
unzip -q "$zip_file" -d "$1/"

# Compress the temporary directory to 7z format
7z a -t7z -m0=lzma -mx=9 -mfb=64 -md=32m -ms=on "${zip_file%.*}.7z" "$temp_dir"

# If the conversion is successful, remove the zip file
if [ $? -eq 0 ]; then
rm "$zip_file"
fi

# Remove the temporary directory even if an exception occurs
rm -rf "$temp_dir"
done

0 comments on commit f0e1637

Please sign in to comment.