Skip to content

Commit d2558de

Browse files
Merge branch 'ea-cell-metadata-pandas' of github.com:broadinstitute/scp-ingest-pipeline into ea-subsample-cluster
2 parents 2ea3907 + 67a6194 commit d2558de

File tree

2 files changed

+31
-8
lines changed

2 files changed

+31
-8
lines changed

ingest/cell_metadata.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,19 @@
2222
# Welcome comments about whether this should live here or in the class
2323
@dataclass
2424
class Document(TypedDict):
25-
name: str
26-
study_accession: str
27-
unique_values: List
28-
annotation_type: str
29-
file_id: str
25+
def __init__(
26+
self,
27+
name: str,
28+
study_accession: str,
29+
unique_values: List,
30+
annotation_type: str,
31+
file_id: str,
32+
):
33+
self.name = name
34+
self.study_accession = study_accession
35+
self.unique_values = unique_values
36+
self.annotation_type = annotation_type
37+
self.file_id = file_id
3038

3139

3240
# Welcome comments about whether this should live here or in the class

ingest/ingest_pipeline.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
# Ingest Cell Metadata file
1919
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_cell_metadata --cell-metadata-file ../tests/data/metadata_valid.tsv --ingest-cell-metadata
2020
21+
# Ingest Cell Metadata file against convention
22+
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_cell_metadata --cell-metadata-file ../tests/data/metadata_valid.tsv --ingest-cell-metadata --validate-convention
23+
2124
# Ingest dense file
2225
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_expression --taxon-name 'Homo sapiens' --taxon-common-name human --ncbi-taxid 9606 --matrix-file ../tests/data/dense_matrix_19_genes_100k_cells.txt --matrix-file-type dense
2326
@@ -58,7 +61,7 @@
5861

5962
class IngestPipeline(object):
6063
# File location for metadata json convention
61-
JSON_CONVENTION = 'DoNotTouch/AMC_v1.1.1.json'
64+
JSON_CONVENTION = 'DoNotTouch/AMC_v0.8.json'
6265

6366
def __init__(
6467
self,
@@ -270,7 +273,14 @@ def ingest_expression(self) -> None:
270273
def ingest_cell_metadata(self):
271274
"""Ingests cell metadata files into Firestore."""
272275
# TODO: Add self.has_valid_metadata_convention() to if statement
273-
if self.cell_metadata.is_valid_file and self.has_valid_metadata_convention():
276+
if self.cell_metadata.is_valid_file:
277+
# Check to see file needs to be check against metadata convention
278+
if self.kwargs['validate_convention'] is not None:
279+
if self.kwargs['validate_convention']:
280+
if self.has_valid_metadata_convention():
281+
pass
282+
else:
283+
return 1
274284
self.cell_metadata.reset_file(2, open_as="dataframe")
275285
self.cell_metadata.preproccess()
276286
for metadataModel in self.cell_metadata.transform():
@@ -418,7 +428,12 @@ def create_parser():
418428
"--ingest-cell-metadata",
419429
required=True,
420430
action="store_true",
421-
help="Indicates that subsampliing functionality should be invoked",
431+
help="Indicates that ingest of cell metadata should be invoked",
432+
)
433+
parser_cell_metadata.add_argument(
434+
"--validate-convention",
435+
action="store_true",
436+
help="Indicates that metadata file should be validated against convention",
422437
)
423438

424439
# Parser ingesting cluster files

0 commit comments

Comments
 (0)