Skip to content

Commit fa76fda

Browse files
committed
address PR comments
1 parent 6928e10 commit fa76fda

File tree

9 files changed

+34
-35
lines changed

9 files changed

+34
-35
lines changed
File renamed without changes.

ingest/cli_parser.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -331,19 +331,21 @@ def create_parser():
331331
"--gene-file", help="Path to .genes.tsv file"
332332
)
333333

334-
# h5ad subparsers
335-
parser_h5ad = subparsers.add_parser(
336-
"ingest_h5ad", help="Indicates that h5ad file is being ingested"
334+
# AnnData subparsers
335+
parser_anndata = subparsers.add_parser(
336+
"ingest_anndata", help="Indicates that AnnData file is being ingested"
337337
)
338338

339-
parser_h5ad.add_argument(
340-
"--ingest-h5ad",
339+
parser_anndata.add_argument(
340+
"--ingest-anndata",
341341
required=True,
342342
action="store_true",
343-
help="Indicates that ingest of h5ad file should be invoked",
343+
help="Indicates that ingest of AnnData file should be invoked",
344344
)
345345

346-
parser_h5ad.add_argument("--h5ad-file", required=True, help="Path to h5ad file")
346+
parser_anndata.add_argument(
347+
"--anndata-file", required=True, help="Path to AnnData file"
348+
)
347349

348350
return parser
349351

ingest/ingest_files.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ class IngestFiles:
9292

9393
def __init__(self, file_path, allowed_file_types):
9494
self.file_path = file_path
95-
# valid suffixes for Anndata (h5ad file extensions)
95+
# valid suffixes for AnnData ingest (expecting .h5ad)
96+
# including hdf5 file extensions - AnnData files should be valid hdf5
9697
mimetypes.add_type('application/x-hdf5', '.h5ad')
9798
mimetypes.add_type('application/x-hdf5', '.h5')
9899
mimetypes.add_type('application/x-hdf5', '.hdf5')
@@ -209,7 +210,7 @@ def open_file(self, file_path, open_as=None, start_point: int = 0, **kwargs):
209210
"text/plain": self.open_txt,
210211
"text/tab-separated-values": self.open_tsv,
211212
"dataframe": self.open_pandas,
212-
"application/x-hdf5": self.open_h5ad,
213+
"application/x-hdf5": self.open_anndata,
213214
}
214215

215216
if start_point != 0:
@@ -330,7 +331,7 @@ def open_pandas(self, file_path, file_type, **kwargs):
330331
else:
331332
raise ValueError("File must be tab or comma delimited")
332333

333-
def open_h5ad(self, file_path, **kwargs):
334+
def open_anndata(self, file_path, **kwargs):
334335
"""Opens file as AnnData object """
335336
try:
336337
return sc.read_h5ad(file_path, backed='r')

ingest/ingest_pipeline.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
# Ingest dense file
2727
python ingest_pipeline.py --study-id 5d276a50421aa9117c982845 --study-file-id 5dd5ae25421aa910a723a337 ingest_expression --taxon-name 'Homo sapiens' --taxon-common-name human --ncbi-taxid 9606 --matrix-file ../tests/data/dense_matrix_19_genes_1000_cells.txt --matrix-file-type dense
2828
29-
# Ingest h5ad file
30-
python ingest_pipeline.py --study-id 5d276a50421aa9117c982845 --study-file-id 5dd5ae25421aa910a723a337 ingest_h5ad --h5ad-file ../tests/data/test.h5ad
29+
# Ingest AnnData file
30+
python ingest_pipeline.py --study-id 5d276a50421aa9117c982845 --study-file-id 5dd5ae25421aa910a723a337 ingest_anndata --anndata-file ../tests/data/anndata/test.h5ad
3131
3232
# Subsample cluster and metadata file
3333
python ingest_pipeline.py --study-id 5d276a50421aa9117c982845 --study-file-id 5dd5ae25421aa910a723a337 ingest_subsample --cluster-file ../tests/data/test_1k_cluster_Data.csv --name custer1 --cell-metadata-file ../tests/data/test_1k_metadata_Data.csv --subsample
@@ -51,12 +51,8 @@
5151
from contextlib import nullcontext
5252
from typing import Dict, Generator, List, Tuple, Union
5353
from wsgiref.simple_server import WSGIRequestHandler # noqa: F401
54-
55-
5654
from bson.objectid import ObjectId
5755

58-
59-
# from google.cloud.logging.resource import Resource
6056
try:
6157
# Used when importing internally and in tests
6258
from ingest_files import IngestFiles
@@ -82,10 +78,13 @@
8278
from clusters import Clusters
8379
from expression_files.mtx import MTXIngestor
8480
from expression_files.dense_ingestor import DenseIngestor
85-
from h5ad import H5adIngestor
8681
from monitor import setup_logger, log_exception
8782
from de import DifferentialExpression
8883

84+
# scanpy uses anndata python package, disamibguate local anndata
85+
# using underscore https://peps.python.org/pep-0008/#naming-conventions
86+
from anndata_ import AnnDataIngestor
87+
8988
except ImportError:
9089
# Used when importing as external package, e.g. imports in single_cell_portal code
9190
from .ingest_files import IngestFiles
@@ -103,7 +102,7 @@
103102
from .clusters import Clusters
104103
from .expression_files.dense_ingestor import DenseIngestor
105104
from .expression_files.mtx import MTXIngestor
106-
from .h5ad import H5adIngestor
105+
from .anndata import AnnDataIngestor
107106
from .cli_parser import create_parser, validate_arguments
108107
from .de import DifferentialExpression
109108

@@ -127,7 +126,7 @@ def __init__(
127126
matrix_file_type: str = None,
128127
cell_metadata_file: str = None,
129128
cluster_file: str = None,
130-
h5ad_file: str = None,
129+
anndata_file: str = None,
131130
subsample=False,
132131
ingest_cell_metadata=False,
133132
ingest_cluster=False,
@@ -147,7 +146,7 @@ def __init__(
147146
else:
148147
self.db = None
149148
self.cluster_file = cluster_file
150-
self.h5ad_file = h5ad_file
149+
self.anndata_file = anndata_file
151150
self.kwargs = kwargs
152151
self.cell_metadata_file = cell_metadata_file
153152
self.props = {}
@@ -479,15 +478,15 @@ def subsample(self):
479478
return 0
480479

481480
@custom_metric(config.get_metric_properties)
482-
def ingest_h5ad(self):
483-
"""Ingests h5ad files."""
484-
self.h5ad = H5adIngestor(
485-
self.h5ad_file, self.study_id, self.study_file_id, **self.kwargs
481+
def ingest_anndata(self):
482+
"""Ingests anndata files."""
483+
self.anndata = AnnDataIngestor(
484+
self.anndata_file, self.study_id, self.study_file_id, **self.kwargs
486485
)
487-
if self.h5ad.validate():
486+
if self.anndata.validate():
488487
self.report_validation("success")
489488
return 0
490-
# scanpy unable to open h5ad file
489+
# scanpy unable to open AnnData file
491490
else:
492491
self.report_validation("failure")
493492
return 1
@@ -541,11 +540,11 @@ def run_ingest(ingest, arguments, parsed_args):
541540
config.set_parent_event_name("ingest-pipeline:subsample:ingest")
542541
status_subsample = ingest.subsample()
543542
status.append(status_subsample)
544-
elif "ingest_h5ad" in arguments:
545-
if arguments["ingest_h5ad"]:
546-
config.set_parent_event_name("ingest-pipeline:h5ad:ingest")
547-
status_h5ad = ingest.ingest_h5ad()
548-
status.append(status_h5ad)
543+
elif "ingest_anndata" in arguments:
544+
if arguments["ingest_anndata"]:
545+
config.set_parent_event_name("ingest-pipeline:anndata:ingest")
546+
status_anndata = ingest.ingest_anndata()
547+
status.append(status_anndata)
549548
elif "differential_expression" in arguments:
550549
config.set_parent_event_name("ingest-pipeline:differential-expression")
551550
status_de = ingest.calculate_de()

ingest/monitor.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,7 @@ def bypass_mongo_writes():
5050
"""
5151
if os.environ.get("BYPASS_MONGO_WRITES") is not None:
5252
skip = os.environ["BYPASS_MONGO_WRITES"]
53-
if skip == "yes":
54-
return True
55-
else:
56-
return False
53+
return skip == "yes"
5754
else:
5855
return False
5956

0 commit comments

Comments
 (0)