Skip to content

Commit 425e15b

Browse files
Validate against metadata convention in google bucket
1 parent b7f370b commit 425e15b

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

ingest/ingest_files.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def open_file(self, file_path, open_as=None, start_point: int = 0):
8888
file_connections = {
8989
"text/csv": self.open_csv(open_file),
9090
"text/plain": open_file,
91+
"application/json": open_file,
9192
"text/tab-separated-values": self.open_tsv(open_file),
9293
"dataframe": self.open_pandas,
9394
}

ingest/ingest_pipeline.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_cell_metadata --cell-metadata-file ../tests/data/metadata_valid.tsv --ingest-cell-metadata
2020
2121
# Ingest Cell Metadata file against convention
22-
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_cell_metadata --cell-metadata-file ../tests/data/metadata_valid.tsv --ingest-cell-metadata --validate-convention
22+
!! Please note that you must have permission to the SCP bucket
23+
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_cell_metadata --cell-metadata-file ../tests/data/valid_array_v1.1.3.tsv --ingest-cell-metadata --validate-convention
2324
2425
# Ingest dense file
2526
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_expression --taxon-name 'Homo sapiens' --taxon-common-name human --ncbi-taxid 9606 --matrix-file ../tests/data/dense_matrix_19_genes_100k_cells.txt --matrix-file-type dense
@@ -47,6 +48,7 @@
4748
from google.api_core import exceptions
4849
from google.cloud import firestore
4950
from mtx import Mtx
51+
from ingest_files import IngestFiles
5052
from subsample import SubSample
5153
from loom import Loom
5254
from validation.validate_metadata import (
@@ -61,7 +63,7 @@
6163

6264
class IngestPipeline(object):
6365
# File location for metadata json convention
64-
JSON_CONVENTION = 'DoNotTouch/AMC_v0.8.json'
66+
JSON_CONVENTION = 'gs://fc-bcc55e6c-bec3-4b2e-9fb2-5e1526ddfcd2/metadata_conventions/AMC_v1.1.1.json'
6567

6668
def __init__(
6769
self,
@@ -232,8 +234,8 @@ def load_subsample(self, doc):
232234

233235
def has_valid_metadata_convention(self):
234236
""" Determines if cell metadata file follows metadata convention"""
235-
with open(self.JSON_CONVENTION, 'r') as f:
236-
convention = json.load(f)
237+
json_file = IngestFiles(self.JSON_CONVENTION, ['application/json'])
238+
convention = json.load(json_file.file)
237239

238240
collect_jsonschema_errors(self.cell_metadata, convention)
239241
validate_collected_ontology_data(self.cell_metadata, convention)
@@ -278,6 +280,7 @@ def ingest_cell_metadata(self):
278280
if self.kwargs['validate_convention'] is not None:
279281
if self.kwargs['validate_convention']:
280282
if self.has_valid_metadata_convention():
283+
print("it works!")
281284
pass
282285
else:
283286
return 1

0 commit comments

Comments
 (0)