Skip to content

Commit 5928217

Browse files
committed
restore rank info to DE output files
address PR comments
1 parent cfa9129 commit 5928217

File tree

4 files changed

+207
-208
lines changed

4 files changed

+207
-208
lines changed

ingest/cli_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def create_parser():
255255
)
256256

257257
parser_differential_expression.add_argument(
258-
"--differential_expression",
258+
"--differential-expression",
259259
required=True,
260260
action="store_true",
261261
help="Indicates that differential expression analysis should be invoked",

ingest/de.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ def __init__(
7272
@staticmethod
7373
def get_cluster_cells(cluster_cells):
7474
""" ID cells in cluster file """
75+
# cluster_cells.tolist() yields a list of lists that needs to be flattened
76+
# using extend converts a single-value list to a plain value
7577
cluster_cell_values = cluster_cells.tolist()
7678
cluster_cell_list = []
7779
for value in cluster_cell_values:
@@ -156,9 +158,9 @@ def execute_de(self):
156158
self.genes,
157159
self.barcodes,
158160
)
159-
DifferentialExpression.de_logger.info(f"preparing DE on sparse matrix")
161+
DifferentialExpression.de_logger.info("preparing DE on sparse matrix")
160162
else:
161-
self.prepare_h5ad(
163+
self.run_h5ad(
162164
self.cluster,
163165
self.metadata,
164166
self.matrix_file_path,
@@ -168,10 +170,10 @@ def execute_de(self):
168170
self.cluster_name,
169171
self.method,
170172
)
171-
DifferentialExpression.de_logger.info(f"preparing DE on dense matrix")
173+
DifferentialExpression.de_logger.info("preparing DE on dense matrix")
172174

173175
@staticmethod
174-
def prepare_h5ad(
176+
def run_h5ad(
175177
cluster,
176178
metadata,
177179
matrix_file_path,
@@ -183,16 +185,15 @@ def prepare_h5ad(
183185
genes=None,
184186
barcodes=None,
185187
):
186-
"""
187-
"""
188+
188189
de_cells = DifferentialExpression.get_cluster_cells(cluster.file['NAME'].values)
189190
de_annots = DifferentialExpression.subset_annots(metadata, de_cells)
190191

191192
if matrix_file_type == "dense":
192-
# will need try/except
193+
# will need try/except (SCP-4205)
193194
adata = sc.read(matrix_file_path)
194195
else:
195-
# MTX DE UNTESTED
196+
# MTX DE UNTESTED (SCP-4203)
196197
# will want try/except here to catch failed data object composition
197198
adata = sc.read_mtx(matrix_file_path)
198199
# For AnnData, obs are cells and vars are genes
@@ -206,7 +207,7 @@ def prepare_h5ad(
206207

207208
adata = DifferentialExpression.subset_adata(adata, de_cells)
208209

209-
# will need try/except
210+
# will need try/except (SCP-4205)
210211
adata.obs = DifferentialExpression.order_annots(de_annots, adata.obs_names)
211212

212213
sc.pp.normalize_total(adata, target_sum=1e4)
@@ -238,8 +239,9 @@ def prepare_h5ad(
238239
out_file = (
239240
f'{cluster_name}--{annotation}--{str(group_filename)}--{method}.tsv'
240241
)
241-
242-
rank.to_csv(out_file, sep='\t', float_format='%.4g', index=False)
242+
# Round numbers to 4 significant digits while respecting fixed point
243+
# and scientific notation (note: trailing zeros are removed)
244+
rank.to_csv(out_file, sep='\t', float_format='%.4g')
243245

244246
# Provide h5ad of DE analysis as reference computable object
245247
# DifferentialExpression.de_logger.info(f"Writing DE h5ad file")

ingest/ingest_pipeline.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@
9595
from .expression_files.dense_ingestor import DenseIngestor
9696
from .expression_files.mtx import MTXIngestor
9797
from .cli_parser import create_parser, validate_arguments
98-
from .de import DifferentialExpression, prepare_h5ad
98+
from .de import DifferentialExpression
9999

100100

101101
class IngestPipeline:
@@ -476,7 +476,7 @@ def calculate_de(self):
476476
**self.kwargs,
477477
)
478478
de.execute_de()
479-
# ToDo: surface failed DE for analytics
479+
# ToDo: surface failed DE for analytics (SCP-4206)
480480
return 0
481481

482482
def report_validation(self, status):
@@ -512,12 +512,9 @@ def run_ingest(ingest, arguments, parsed_args):
512512
status_subsample = ingest.subsample()
513513
status.append(status_subsample)
514514
elif "differential_expression" in arguments:
515-
if arguments["differential_expression"]:
516-
config.set_parent_event_name(
517-
"ingest-pipeline:differential_expression:ingest"
518-
)
519-
status_de = ingest.calculate_de()
520-
status.append(status_de)
515+
config.set_parent_event_name("ingest-pipeline:differential-expression")
516+
status_de = ingest.calculate_de()
517+
status.append(status_de)
521518

522519
return status, status_cell_metadata
523520

0 commit comments

Comments
 (0)