Skip to content

Commit 45377bc

Browse files
committed
more unit testable functions
1 parent 2ec43a4 commit 45377bc

File tree

1 file changed

+23
-15
lines changed

1 file changed

+23
-15
lines changed

ingest/de.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def load_raw_annots(metadata_file_path, allowed_file_types, headers, dtypes):
111111
return raw_annots
112112

113113
@staticmethod
114-
def prepare_annots(metadata, de_cells):
114+
def subset_annots(metadata, de_cells):
115115
""" subset metadata based on cells in cluster
116116
"""
117117
DifferentialExpression.de_logger.info(
@@ -126,6 +126,24 @@ def prepare_annots(metadata, de_cells):
126126
cluster_annots = raw_annots[raw_annots.index.isin(de_cells)]
127127
return cluster_annots
128128

129+
@staticmethod
130+
def order_annots(metadata, adata_cells):
131+
""" order metadata based on cells order in matrix
132+
"""
133+
matrix_cell_order = adata_cells.tolist()
134+
return metadata.reindex(matrix_cell_order)
135+
136+
@staticmethod
137+
def subset_adata(adata, de_cells):
138+
""" subset adata object based on cells in cluster
139+
"""
140+
DifferentialExpression.de_logger.info(
141+
f"subsetting matrix on cells in clustering"
142+
)
143+
matrix_subset_list = np.in1d(adata.obs_names, de_cells)
144+
adata = adata[matrix_subset_list]
145+
return adata
146+
129147
def execute_de(self):
130148
if self.matrix_file_type == "mtx":
131149
self.prepare_h5ad(
@@ -168,7 +186,7 @@ def prepare_h5ad(
168186
"""
169187
"""
170188
de_cells = DifferentialExpression.get_cluster_cells(cluster.file['NAME'].values)
171-
de_annots = DifferentialExpression.prepare_annots(metadata, de_cells)
189+
de_annots = DifferentialExpression.subset_annots(metadata, de_cells)
172190

173191
if matrix_file_type == "dense":
174192
# will need try/except
@@ -186,22 +204,13 @@ def prepare_h5ad(
186204

187205
adata = adata.transpose()
188206

189-
# make a testable function
190-
# subset matrix based on cells in cluster
191-
DifferentialExpression.de_logger.info(
192-
f"subsetting matrix on cells in clustering"
193-
)
194-
matrix_subset_list = np.in1d(adata.obs_names, de_cells)
195-
adata = adata[matrix_subset_list]
207+
adata = DifferentialExpression.subset_adata(adata, de_cells)
196208

197209
# will need try/except
198-
# organize metadata to match cell order in matrix
199-
matrix_cell_order = adata.obs_names.tolist()
200-
adata.obs = de_annots.reindex(matrix_cell_order)
210+
adata.obs = DifferentialExpression.order_annots(de_annots, adata.obs_names)
201211

202212
sc.pp.normalize_total(adata, target_sum=1e4)
203213
sc.pp.log1p(adata)
204-
# adata.write_h5ad(file_name)
205214
rank_key = "rank." + annotation + "." + method
206215
DifferentialExpression.de_logger.info(f"calculating DE")
207216
try:
@@ -229,9 +238,8 @@ def prepare_h5ad(
229238
out_file = (
230239
f'{cluster_name}--{annotation}--{str(group_filename)}--{method}.tsv'
231240
)
232-
# float format causes bad output (rows repeated)
241+
233242
rank.to_csv(out_file, sep='\t', float_format='%.4g', index=False)
234-
# rank.to_csv(out_file, sep='\t', index=False)
235243

236244
# Provide h5ad of DE analysis as reference computable object
237245
# DifferentialExpression.de_logger.info(f"Writing DE h5ad file")

0 commit comments

Comments
 (0)