Skip to content

Commit cbdbf83

Browse files
Merge pull request #725 from alephdata/release/4.1.2
Release/4.1.2
2 parents 3860024 + ef90ce5 commit cbdbf83

File tree

11 files changed

+80
-6
lines changed

11 files changed

+80
-6
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 4.1.0
2+
current_version = 4.1.2
33
tag_name = {new_version}
44
commit = True
55
tag = True

ingestors/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import logging
44

5-
__version__ = "4.1.0"
5+
__version__ = "4.1.2"
66

77
logging.getLogger("chardet").setLevel(logging.INFO)
88
logging.getLogger("PIL").setLevel(logging.INFO)

ingestors/packages/__init__.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
import tarfile
55
from pathlib import PurePath
66

7+
import py7zr
8+
from py7zr.exceptions import ArchiveError
9+
710
from ingestors.ingestor import Ingestor
811
from ingestors.support.package import PackageSupport
912
from ingestors.support.shell import ShellSupport
@@ -24,9 +27,11 @@ def unpack(self, file_path, entity, temp_dir):
2427
*pure_file_path.parts[1:-1], reconstructed_filename
2528
)
2629

27-
self.exec_command(
28-
"7z", "x", str(pure_file_path), "-y", "-r", "-bb0", "-bd", f"-oc:{temp_dir}"
29-
)
30+
try:
31+
with py7zr.SevenZipFile(str(pure_file_path), mode="r") as z:
32+
z.extractall(path=temp_dir)
33+
except ArchiveError as e:
34+
raise ProcessingException(f"Error: {e}")
3035

3136

3237
class SingleFilePackageIngestor(PackageSupport, Ingestor):

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ numpy<2.0.0 # pinned because otherwise spacy requires an incompatible numpy
1414
fingerprints==1.1.1
1515
fasttext==0.9.3
1616
pika==1.3.2
17+
py7zr==1.0.0
1718

1819
# Development
1920
pytest==8.3.5

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="ingest",
8-
version="4.1.0",
8+
version="4.1.2",
99
author="Organized Crime and Corruption Reporting Project",
1010
packages=find_packages(exclude=["tests"]),
1111
package_dir={"ingestors": "ingestors"},

tests/fixtures/bad7zip.7z

170 Bytes
Binary file not shown.

tests/fixtures/badrar.rar

115 Bytes
Binary file not shown.

tests/fixtures/badtar.tar

10 KB
Binary file not shown.

tests/fixtures/badzip.zip

206 Bytes
Binary file not shown.

tests/fixtures/secret.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This is a secret!

0 commit comments

Comments
 (0)