Skip to content

Commit 36544aa

Browse files
Merge pull request #41 from broadinstitute/ea-subsample-cluster
Ea subsample cluster
2 parents d0a2e87 + eac7584 commit 36544aa

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

ingest/ingest_files.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def open_file(self, file_path, open_as=None, start_point: int = 0):
8888
file_connections = {
8989
"text/csv": self.open_csv(open_file),
9090
"text/plain": open_file,
91+
"application/json": open_file,
9192
"text/tab-separated-values": self.open_tsv(open_file),
9293
"dataframe": self.open_pandas,
9394
}

ingest/ingest_pipeline.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_cell_metadata --cell-metadata-file ../tests/data/metadata_valid.tsv --ingest-cell-metadata
2020
2121
# Ingest Cell Metadata file against convention
22-
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_cell_metadata --cell-metadata-file ../tests/data/metadata_valid.tsv --ingest-cell-metadata --validate-convention
22+
!! Please note that you must have permission to the SCP bucket
23+
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_cell_metadata --cell-metadata-file ../tests/data/valid_array_v1.1.3.tsv --ingest-cell-metadata --validate-convention
2324
2425
# Ingest dense file
2526
python ingest_pipeline.py --study-accession SCP1 --file-id 123abc ingest_expression --taxon-name 'Homo sapiens' --taxon-common-name human --ncbi-taxid 9606 --matrix-file ../tests/data/dense_matrix_19_genes_100k_cells.txt --matrix-file-type dense
@@ -47,6 +48,7 @@
4748
from google.api_core import exceptions
4849
from google.cloud import firestore
4950
from mtx import Mtx
51+
from ingest_files import IngestFiles
5052
from subsample import SubSample
5153
from loom import Loom
5254
from validation.validate_metadata import validate_input_metadata, report_issues
@@ -57,7 +59,7 @@
5759

5860
class IngestPipeline(object):
5961
# File location for metadata json convention
60-
JSON_CONVENTION = 'DoNotTouch/AMC_v0.8.json'
62+
JSON_CONVENTION = 'gs://fc-bcc55e6c-bec3-4b2e-9fb2-5e1526ddfcd2/metadata_conventions/AMC_v1.1.3/AMC_v1.1.3.json'
6163

6264
def __init__(
6365
self,
@@ -229,8 +231,11 @@ def load_subsample(self, doc):
229231
def has_valid_metadata_convention(self):
230232
""" Determines if cell metadata file follows metadata convention"""
231233
with open(self.JSON_CONVENTION, 'r') as f:
232-
convention = json.load(f)
234+
json_file = IngestFiles(self.JSON_CONVENTION, ['application/json'])
235+
convention = json.load(json_file.file)
233236
validate_input_metadata(self.cell_metadata, convention)
237+
238+
f.close()
234239
return not report_issues(self.cell_metadata)
235240

236241
def ingest_expression(self) -> None:
@@ -272,6 +277,7 @@ def ingest_cell_metadata(self):
272277
if self.kwargs['validate_convention'] is not None:
273278
if self.kwargs['validate_convention']:
274279
if self.has_valid_metadata_convention():
280+
print("it works!")
275281
pass
276282
else:
277283
return 1

0 commit comments

Comments
 (0)