Skip to content

Commit 6928e10

Browse files
committed
Make filetype errors more actionable
1 parent 44bdb0f commit 6928e10

File tree

3 files changed

+46
-11
lines changed

3 files changed

+46
-11
lines changed

ingest/ingest_files.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,20 @@ class IngestFiles:
8282
"ignore", "Your application has authenticated using end user credentials"
8383
)
8484

85+
ALLOWED_FILE_EXTENSIONS = {
86+
"text/csv": [".csv"],
87+
"text/plain": [".txt"],
88+
"text/tab-separated-values": [".tsv"],
89+
"dataframe": [".tsv"],
90+
"application/x-hdf5": [".h5ad", ".h5", ".hdf5"],
91+
}
92+
8593
def __init__(self, file_path, allowed_file_types):
8694
self.file_path = file_path
87-
# define filetype for h5ad file extension
95+
# valid suffixes for Anndata (h5ad file extensions)
8896
mimetypes.add_type('application/x-hdf5', '.h5ad')
97+
mimetypes.add_type('application/x-hdf5', '.h5')
98+
mimetypes.add_type('application/x-hdf5', '.hdf5')
8999
# File is remote (in GCS bucket) when running via PAPI,
90100
# and typically local when developing
91101
self.is_remote_file = IngestFiles.is_remote_file(file_path)
@@ -237,10 +247,19 @@ def open_file(self, file_path, open_as=None, start_point: int = 0, **kwargs):
237247
open_file,
238248
)
239249
else:
240-
msg = (
241-
f"Unsupported file format. Allowed file MIME types are: "
242-
f"{' '.join(self.allowed_file_types)}"
243-
)
250+
expected_suffixes = []
251+
for t in self.allowed_file_types:
252+
expected_suffixes.extend(self.ALLOWED_FILE_EXTENSIONS[t])
253+
if file_type == None:
254+
msg = (
255+
f"File type not detected for {file_path}, expected file endings are: "
256+
f"{' '.join(expected_suffixes)}"
257+
)
258+
else:
259+
msg = (
260+
f"Unsupported file format {file_path}. Expected file suffix are: "
261+
f"{' '.join(expected_suffixes)}"
262+
)
244263
log_exception(IngestFiles.dev_logger, IngestFiles.user_logger, msg)
245264
raise ValueError(msg)
246265

ingest/monitor.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,25 @@ def setup_logger(logger_name, log_file, level=logging.DEBUG, format="default"):
4444
return logger
4545

4646

47+
def bypass_mongo_writes():
48+
"""Check if developer has set environment variable to bypass writing data to MongoDB
49+
BYPASS_MONGO_WRITES='yes'
50+
"""
51+
if os.environ.get("BYPASS_MONGO_WRITES") is not None:
52+
skip = os.environ["BYPASS_MONGO_WRITES"]
53+
if skip == "yes":
54+
return True
55+
else:
56+
return False
57+
else:
58+
return False
59+
60+
4761
def log_exception(dev_logger, user_logger, exception):
4862
user_logger.critical(str(exception))
4963
dev_logger.exception(exception)
64+
if bypass_mongo_writes():
65+
print(str(exception))
5066

5167

5268
# Modified from https://jdkandersson.com/2019/05/19/testing-decorated-python-functions/

tests/test_h5ad.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ def test_minimal_valid_h5ad(self):
2727
good_input.validate(), "expect known good file to open with scanpy"
2828
)
2929

30-
def test_truncated_h5ad(self):
30+
def test_truncated_anndata(self):
3131
truncated_input = H5adIngestor(
32-
"../tests/data/h5ad/bad.h5ad",
32+
"../tests/data/h5ad/bad.h5",
3333
"addedfeed000000000000000",
3434
"dec0dedfeed0000000000000",
3535
)
@@ -38,14 +38,14 @@ def test_truncated_h5ad(self):
3838
# an exception before assertRaises gets called
3939
self.assertRaisesRegex(
4040
ValueError,
41-
"Scanpy cannot read file, \"../tests/data/h5ad/bad.h5ad\".",
41+
"Scanpy cannot read file, \"../tests/data/h5ad/bad.h5\".",
4242
lambda: truncated_input.obtain_adata(),
4343
)
4444
self.assertFalse(truncated_input.validate())
4545

46-
def test_input_not_h5ad(self):
46+
def test_input_bad_suffix(self):
4747
bad_input = H5adIngestor(
48-
"../tests/data/h5ad/bad.h5",
48+
"../tests/data/h5ad/bad.foo",
4949
"addedfeed000000000000000",
5050
"dec0dedfeed0000000000000",
5151
)
@@ -54,7 +54,7 @@ def test_input_not_h5ad(self):
5454
# an exception before assertRaises gets called
5555
self.assertRaisesRegex(
5656
ValueError,
57-
"Unsupported file format. Allowed file MIME types are: application/x-hdf5",
57+
"File type not detected for ../tests/data/h5ad/bad.foo, expected file endings are: .h5ad .h5 .hdf5",
5858
lambda: bad_input.obtain_adata(),
5959
)
6060
self.assertFalse(bad_input.validate())

0 commit comments

Comments
 (0)