Skip to content

Commit be6bc40

Browse files
committed
fix bug in delimiter_in_gene_name and add test
1 parent e31ba9f commit be6bc40

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

ingest/de.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,9 @@ def execute_de(self):
224224
@staticmethod
225225
def get_genes(genes_path):
226226
""" Genes file can have one or two columns of gene information
227-
If two columns present, check if there are duplicates in 2nd col
228-
If no duplicates, use as var_names, else use join columns
227+
Preferentially use gene names from second column.
228+
If duplicate gene names, check that 1st plus 2nd column provides uniqueness
229+
If unique when joined, join columns with pipe (|) for use as DE input
229230
"""
230231
genes_object = IngestFiles(genes_path, None)
231232
local_genes_path = genes_object.resolve_path(genes_path)[1]
@@ -302,7 +303,7 @@ def remove_single_sample_data(adata, annotation):
302303
def delimiter_in_gene_name(rank):
303304
""" Check if pipe delimiter occurs in "names" column
304305
"""
305-
return rank['names'].str.contains('|').any
306+
return rank['names'].str.contains('|', regex=False).any()
306307

307308
@staticmethod
308309
def extract_gene_id_for_out_file(rank):

tests/test_de.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,24 @@ def test_detect_duplicate_gene_names(self):
110110
dup_genes = DifferentialExpression.get_genes(dup_genes_path)
111111
self.assertIn("|", dup_genes[0], f"no delimiter expected in {dup_genes[0]}")
112112

113+
def test_delimiter_in_gene_name(self):
114+
delimited_data = {"names": ["Tns1", "Gfra1"], "scores": ["10.5", "10.34"]}
115+
delimited_df = pd.DataFrame(delimited_data)
116+
self.assertFalse(
117+
DifferentialExpression.delimiter_in_gene_name(delimited_df),
118+
"no pipe delimiter should be detected in the input",
119+
)
120+
121+
undelimited_data = {
122+
"names": ["ENSMUST00000027035|Sox17", "ENSMUST00000195555|Sox17"],
123+
"scores": ["41.459137", "-5.058518"],
124+
}
125+
undelimited_df = pd.DataFrame(undelimited_data)
126+
self.assertTrue(
127+
DifferentialExpression.delimiter_in_gene_name(undelimited_df),
128+
"expected pipe delimiter undetected",
129+
)
130+
113131
def test_de_remove_single_sample(self):
114132
""" Test single sample removal
115133
"""

0 commit comments

Comments
 (0)