@@ -35,12 +35,11 @@ def find_expected_files(labels, cluster_name, annotation, scope, method):
3535 """ Check that files were created for all expected annotation labels
3636 """
3737 found = []
38+ sanitized_cluster_name = re .sub (r'\W' , '_' , cluster_name )
39+ sanitized_annotation = re .sub (r'\W' , '_' , annotation )
3840 for label in labels :
39- # line below will conflict with bugfix in SCP-4459
40- sanitized_label = re .sub (r'\W+' , '_' , label )
41- expected_file = (
42- f"{ cluster_name } --{ annotation } --{ sanitized_label } --{ scope } --{ method } .tsv"
43- )
41+ sanitized_label = re .sub (r'\W' , '_' , label )
42+ expected_file = f"{ sanitized_cluster_name } --{ sanitized_annotation } --{ sanitized_label } --{ scope } --{ method } .tsv"
4443 assert os .path .exists (expected_file )
4544 found .append (expected_file )
4645 return found
@@ -446,3 +445,49 @@ def test_de_process_na(self):
446445 os .remove (file )
447446 except :
448447 print (f"Error while deleting file : { file } " )
448+
449+ def test_de_process_sanitize (self ):
450+ """ Run DE on small test case with na-type values in matrix
451+ confirm expected output filenames
452+ """
453+ test_annotation = "misc++cellaneous"
454+ test_config = {
455+ "test_annotation" : test_annotation ,
456+ "test_scope" : "study" ,
457+ "test_method" : "wilcoxon" ,
458+ "annot_path" : "../tests/data/differential_expression/de_dense_metadata_sanitize.txt" ,
459+ "study_accession" : "SCPsanitize" ,
460+ "cluster_path" : "../tests/data/differential_expression/de_dense_cluster.tsv" ,
461+ "cluster_name" : "UMAP, pre-QC" ,
462+ "matrix_file" : "../tests/data/differential_expression/de_dense_matrix.tsv" ,
463+ "matrix_type" : "dense" ,
464+ }
465+
466+ found_labels = run_de (** test_config )
467+ found_label_count = len (found_labels )
468+
469+ self .assertEqual (
470+ found_label_count ,
471+ 5 ,
472+ f"expected five annotation labels for { test_annotation } " ,
473+ )
474+
475+ expected_file = (
476+ "UMAP__pre_QC--misc__cellaneous--cholinergic__neuron_--study--wilcoxon.tsv"
477+ )
478+
479+ # confirm expected results filename was generated in found result files
480+ self .assertIn (
481+ expected_file , found_labels , "Expected filename not in found files list"
482+ )
483+
484+ expected_output_match = "UMAP__pre_QC--misc__cellaneous*.tsv"
485+
486+ # clean up DE outputs
487+ files = glob .glob (expected_output_match )
488+
489+ for file in files :
490+ try :
491+ os .remove (file )
492+ except :
493+ print (f"Error while deleting file : { file } " )
0 commit comments