Skip to content

Commit 0dba75b

Browse files
Close file
2 parents 425e15b + d0a2e87 commit 0dba75b

25 files changed

+2270
-419
lines changed

.circleci/config.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
#
55
version: 2.1
66

7+
orbs:
8+
codecov: codecov/[email protected]
9+
710
jobs:
811
build:
912
docker:
@@ -61,9 +64,7 @@ jobs:
6164
export FIRESTORE_EMULATOR_HOST=localhost:8081
6265
. venv/bin/activate
6366
cd tests
64-
coverage run -m pytest
65-
coverage report --include *scp-ingest-pipeline/ingest*
66-
coverage html --include *scp-ingest-pipeline/ingest*
67+
pytest --cov-report=xml --cov=../ingest/
6768
68-
- store_artifacts:
69-
path: tests/htmlcov
69+
- codecov/upload:
70+
file: tests/coverage.xml

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
File Ingest Pipeline for Single Cell Portal
33

44
[![Build status](https://img.shields.io/circleci/build/github/broadinstitute/scp-ingest-pipeline.svg)](https://circleci.com/gh/broadinstitute/scp-ingest-pipeline)
5+
[![Code coverage](https://codecov.io/gh/broadinstitute/scp-ingest-pipeline/branch/master/graph/badge.svg)](https://codecov.io/gh/broadinstitute/scp-ingest-pipeline)
56

67
The SCP Ingest Pipeline is an ETL pipeline for single-cell RNA-seq data.
78

ingest/cell_metadata.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ def __init__(self, file_path, file_id: str, study_accession: str, *args, **kwarg
6161
# lambda below initializes new key with nested dictionary as value and avoids KeyError
6262
self.issues = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
6363
self.ontology = defaultdict(lambda: defaultdict(list))
64-
self.type = defaultdict(list)
6564
self.cells = []
6665
self.is_valid_file = self.validate_format()
6766

@@ -270,6 +269,13 @@ def validate_type_annotations(self):
270269
# string for error reporting
271270
if 'Unnamed' in t:
272271
invalid_types.append('<empty value>')
272+
# Duplicated metadata header name causes type annotation issue.
273+
# Side effect of Pandas adding a suffix to uniquefy the header.
274+
# These invalid annotations should not be included in invalid
275+
# type annotation count. This exception may cause miscount of
276+
# type annot errors if user-supplied annotation has period.
277+
elif '.' in t:
278+
pass
273279
else:
274280
invalid_types.append(t)
275281
if invalid_types:

ingest/ingest_pipeline.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,15 @@
5151
from ingest_files import IngestFiles
5252
from subsample import SubSample
5353
from loom import Loom
54-
from validation.validate_metadata import (
55-
collect_jsonschema_errors,
56-
validate_collected_ontology_data,
57-
report_issues,
58-
)
54+
from validation.validate_metadata import validate_input_metadata, report_issues
5955

6056
# Ingest file types
6157
EXPRESSION_FILE_TYPES = ["dense", "mtx", "loom"]
6258

6359

6460
class IngestPipeline(object):
6561
# File location for metadata json convention
66-
JSON_CONVENTION = 'gs://fc-bcc55e6c-bec3-4b2e-9fb2-5e1526ddfcd2/metadata_conventions/AMC_v1.1.1.json'
62+
JSON_CONVENTION = 'gs://fc-bcc55e6c-bec3-4b2e-9fb2-5e1526ddfcd2/metadata_conventions/AMC_v1.1.3/AMC_v1.1.3.json'
6763

6864
def __init__(
6965
self,
@@ -234,11 +230,12 @@ def load_subsample(self, doc):
234230

235231
def has_valid_metadata_convention(self):
236232
""" Determines if cell metadata file follows metadata convention"""
237-
json_file = IngestFiles(self.JSON_CONVENTION, ['application/json'])
238-
convention = json.load(json_file.file)
233+
with open(self.JSON_CONVENTION, 'r') as f:
234+
json_file = IngestFiles(self.JSON_CONVENTION, ['application/json'])
235+
convention = json.load(json_file.file)
236+
validate_input_metadata(self.cell_metadata, convention)
239237

240-
collect_jsonschema_errors(self.cell_metadata, convention)
241-
validate_collected_ontology_data(self.cell_metadata, convention)
238+
f.close()
242239
return not report_issues(self.cell_metadata)
243240

244241
def ingest_expression(self) -> None:

0 commit comments

Comments
 (0)