@@ -111,7 +111,7 @@ def load_raw_annots(metadata_file_path, allowed_file_types, headers, dtypes):
111111 return raw_annots
112112
113113 @staticmethod
114- def prepare_annots (metadata , de_cells ):
114+ def subset_annots (metadata , de_cells ):
115115 """ subset metadata based on cells in cluster
116116 """
117117 DifferentialExpression .de_logger .info (
@@ -126,6 +126,24 @@ def prepare_annots(metadata, de_cells):
126126 cluster_annots = raw_annots [raw_annots .index .isin (de_cells )]
127127 return cluster_annots
128128
129+ @staticmethod
130+ def order_annots (metadata , adata_cells ):
131+ """ order metadata based on cells order in matrix
132+ """
133+ matrix_cell_order = adata_cells .tolist ()
134+ return metadata .reindex (matrix_cell_order )
135+
136+ @staticmethod
137+ def subset_adata (adata , de_cells ):
138+ """ subset adata object based on cells in cluster
139+ """
140+ DifferentialExpression .de_logger .info (
141+ f"subsetting matrix on cells in clustering"
142+ )
143+ matrix_subset_list = np .in1d (adata .obs_names , de_cells )
144+ adata = adata [matrix_subset_list ]
145+ return adata
146+
129147 def execute_de (self ):
130148 if self .matrix_file_type == "mtx" :
131149 self .prepare_h5ad (
@@ -168,7 +186,7 @@ def prepare_h5ad(
168186 """
169187 """
170188 de_cells = DifferentialExpression .get_cluster_cells (cluster .file ['NAME' ].values )
171- de_annots = DifferentialExpression .prepare_annots (metadata , de_cells )
189+ de_annots = DifferentialExpression .subset_annots (metadata , de_cells )
172190
173191 if matrix_file_type == "dense" :
174192 # will need try/except
@@ -186,22 +204,13 @@ def prepare_h5ad(
186204
187205 adata = adata .transpose ()
188206
189- # make a testable function
190- # subset matrix based on cells in cluster
191- DifferentialExpression .de_logger .info (
192- f"subsetting matrix on cells in clustering"
193- )
194- matrix_subset_list = np .in1d (adata .obs_names , de_cells )
195- adata = adata [matrix_subset_list ]
207+ adata = DifferentialExpression .subset_adata (adata , de_cells )
196208
197209 # will need try/except
198- # organize metadata to match cell order in matrix
199- matrix_cell_order = adata .obs_names .tolist ()
200- adata .obs = de_annots .reindex (matrix_cell_order )
210+ adata .obs = DifferentialExpression .order_annots (de_annots , adata .obs_names )
201211
202212 sc .pp .normalize_total (adata , target_sum = 1e4 )
203213 sc .pp .log1p (adata )
204- # adata.write_h5ad(file_name)
205214 rank_key = "rank." + annotation + "." + method
206215 DifferentialExpression .de_logger .info (f"calculating DE" )
207216 try :
@@ -229,9 +238,8 @@ def prepare_h5ad(
229238 out_file = (
230239 f'{ cluster_name } --{ annotation } --{ str (group_filename )} --{ method } .tsv'
231240 )
232- # float format causes bad output (rows repeated)
241+
233242 rank .to_csv (out_file , sep = '\t ' , float_format = '%.4g' , index = False )
234- # rank.to_csv(out_file, sep='\t', index=False)
235243
236244 # Provide h5ad of DE analysis as reference computable object
237245 # DifferentialExpression.de_logger.info(f"Writing DE h5ad file")
0 commit comments