Skip to content

Commit eb9d04b

Browse files
Evals correlation (#15)
* Refactor evals * Add correlation analysis between metrics
1 parent a047be6 commit eb9d04b

File tree

4 files changed

+333
-48
lines changed

4 files changed

+333
-48
lines changed

experiments/evals/config/config.yaml

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,22 @@ scorings:
3030
- LLR.minus.score
3131
- absLLR.plus.score
3232

33-
datasets:
34-
- traitgym_mendelian_promoter
35-
- traitgym_complex_promoter
36-
- gnomad_promoter
37-
- sat_mut_mpra_promoter_F9
38-
- sat_mut_mpra_promoter_GP1BA
39-
- sat_mut_mpra_promoter_HBB
40-
- sat_mut_mpra_promoter_HBG1
41-
- sat_mut_mpra_promoter_HNF4A
42-
- sat_mut_mpra_promoter_LDLR
43-
- sat_mut_mpra_promoter_MSMB
44-
- sat_mut_mpra_promoter_PKLR
45-
- sat_mut_mpra_promoter_TERT
33+
# Dataset evaluation configurations
34+
# Each dataset specifies which metrics and scoring functions to compute
35+
dataset_configs:
36+
traitgym_mendelian_promoter:
37+
metrics: [AUPRC]
38+
scorings: [LLR.minus.score]
39+
40+
traitgym_complex_promoter:
41+
metrics: [AUPRC]
42+
scorings: [absLLR.plus.score]
43+
44+
gnomad_promoter:
45+
metrics: [AUROC]
46+
scorings: [LLR.minus.score]
47+
48+
sat_mut_mpra_promoter:
49+
# This applies to all promoter-specific datasets
50+
metrics: [Spearman]
51+
scorings: [absLLR.plus.score]
Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,64 @@
11
configfile: "config/config.yaml"
22

33

4+
def get_all_datasets():
5+
"""Get list of all dataset names for wildcard constraints."""
6+
datasets = []
7+
for dataset in config["dataset_configs"].keys():
8+
if dataset == "sat_mut_mpra_promoter":
9+
# Expand for each promoter
10+
for promoter in config["sat_mut_mpra_promoter"]:
11+
datasets.append(f"sat_mut_mpra_promoter_{promoter}")
12+
else:
13+
datasets.append(dataset)
14+
return datasets
15+
16+
17+
def get_all_metric_files():
18+
"""Generate list of all metric files based on dataset_configs."""
19+
files = []
20+
21+
for dataset, cfg in config["dataset_configs"].items():
22+
# Handle sat_mut_mpra_promoter specially - expand for each promoter
23+
if dataset == "sat_mut_mpra_promoter":
24+
for promoter in config["sat_mut_mpra_promoter"]:
25+
dataset_name = f"sat_mut_mpra_promoter_{promoter}"
26+
for metric in cfg["metrics"]:
27+
for model in config["models"].keys():
28+
for scoring in cfg["scorings"]:
29+
files.append(
30+
f"results/metrics/{dataset_name}/{metric}/{model}_{scoring}.tsv"
31+
)
32+
else:
33+
# Regular datasets
34+
for metric in cfg["metrics"]:
35+
for model in config["models"].keys():
36+
for scoring in cfg["scorings"]:
37+
files.append(
38+
f"results/metrics/{dataset}/{metric}/{model}_{scoring}.tsv"
39+
)
40+
41+
return files
42+
43+
44+
def get_all_correlation_files():
45+
"""Generate list of all correlation analysis output files."""
46+
return [
47+
"results/correlations/metrics_wide.parquet",
48+
"results/correlations/metrics_long.parquet",
49+
"results/correlations/pearson.tsv",
50+
"results/correlations/spearman.tsv",
51+
"results/correlations/pearson_heatmap.png",
52+
"results/correlations/pearson_heatmap.pdf",
53+
"results/correlations/spearman_heatmap.png",
54+
"results/correlations/spearman_heatmap.pdf",
55+
"results/correlations/metrics_vs_step.png",
56+
"results/correlations/metrics_vs_step.pdf",
57+
"results/correlations/metric_pairs.png",
58+
"results/correlations/metric_pairs.pdf",
59+
]
60+
61+
462
include: "rules/common.smk"
563
include: "rules/gnomad.smk"
664
include: "rules/metrics.smk"
@@ -11,33 +69,5 @@ include: "rules/traitgym.smk"
1169

1270
rule all:
1371
input:
14-
expand(
15-
"results/metrics/traitgym_mendelian_promoter/AUPRC/{model}_{scoring}.tsv",
16-
model=config["models"].keys(),
17-
scoring=[
18-
"LLR.minus.score",
19-
]
20-
),
21-
expand(
22-
"results/metrics/traitgym_complex_promoter/AUPRC/{model}_{scoring}.tsv",
23-
model=config["models"].keys(),
24-
scoring=[
25-
"absLLR.plus.score",
26-
]
27-
),
28-
expand(
29-
"results/metrics/sat_mut_mpra_promoter_{promoter}/Spearman/{model}_{scoring}.tsv",
30-
promoter=config["sat_mut_mpra_promoter"],
31-
model=config["models"].keys(),
32-
scoring=[
33-
"absLLR.plus.score",
34-
]
35-
),
36-
expand(
37-
"results/metrics/gnomad_promoter/{metric}/{model}_{scoring}.tsv",
38-
metric=["AUPRC", "AUROC"],
39-
model=config["models"].keys(),
40-
scoring=[
41-
"LLR.minus.score",
42-
]
43-
),
72+
get_all_metric_files(),
73+
get_all_correlation_files()

0 commit comments

Comments
 (0)