File tree Expand file tree Collapse file tree 2 files changed +22
-3
lines changed
Expand file tree Collapse file tree 2 files changed +22
-3
lines changed Original file line number Diff line number Diff line change @@ -224,8 +224,9 @@ def execute_de(self):
224224 @staticmethod
225225 def get_genes (genes_path ):
226226 """ Genes file can have one or two columns of gene information
227- If two columns present, check if there are duplicates in 2nd col
228- If no duplicates, use as var_names, else use join columns
227+ Preferentially use gene names from second column.
228+ If duplicate gene names, check that 1st plus 2nd column provides uniqueness
229+ If unique when joined, join columns with pipe (|) for use as DE input
229230 """
230231 genes_object = IngestFiles (genes_path , None )
231232 local_genes_path = genes_object .resolve_path (genes_path )[1 ]
@@ -302,7 +303,7 @@ def remove_single_sample_data(adata, annotation):
302303 def delimiter_in_gene_name (rank ):
303304 """ Check if pipe delimiter occurs in "names" column
304305 """
305- return rank ['names' ].str .contains ('|' ).any
306+ return rank ['names' ].str .contains ('|' , regex = False ).any ()
306307
307308 @staticmethod
308309 def extract_gene_id_for_out_file (rank ):
Original file line number Diff line number Diff line change @@ -110,6 +110,24 @@ def test_detect_duplicate_gene_names(self):
110110 dup_genes = DifferentialExpression .get_genes (dup_genes_path )
111111 self .assertIn ("|" , dup_genes [0 ], f"no delimiter expected in { dup_genes [0 ]} " )
112112
113+ def test_delimiter_in_gene_name (self ):
114+ delimited_data = {"names" : ["Tns1" , "Gfra1" ], "scores" : ["10.5" , "10.34" ]}
115+ delimited_df = pd .DataFrame (delimited_data )
116+ self .assertFalse (
117+ DifferentialExpression .delimiter_in_gene_name (delimited_df ),
118+ "no pipe delimiter should be detected in the input" ,
119+ )
120+
121+ undelimited_data = {
122+ "names" : ["ENSMUST00000027035|Sox17" , "ENSMUST00000195555|Sox17" ],
123+ "scores" : ["41.459137" , "-5.058518" ],
124+ }
125+ undelimited_df = pd .DataFrame (undelimited_data )
126+ self .assertTrue (
127+ DifferentialExpression .delimiter_in_gene_name (undelimited_df ),
128+ "expected pipe delimiter undetected" ,
129+ )
130+
113131 def test_de_remove_single_sample (self ):
114132 """ Test single sample removal
115133 """
You can’t perform that action at this time.
0 commit comments