From 44fcaea511e4c9f8384527eb4f3fa3631228e823 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Tue, 18 Nov 2025 17:37:29 -0800 Subject: [PATCH 1/3] feat: tiedie Co-Authored-By: Yancheng Liu <89893105+lyce24@users.noreply.github.com> --- docker-wrappers/TieDIE/Dockerfile | 11 ++ docker-wrappers/TieDIE/README.md | 18 +++ docker-wrappers/TieDIE/requirements.txt | 3 + docs/prms/tiedie.rst | 16 ++ spras/runner.py | 2 + spras/tiedie.py | 152 ++++++++++++++++++ test/TieDIE/__init__.py | 0 test/TieDIE/input/pathway1.txt | 11 ++ test/TieDIE/input/pathway2.txt | 6 + test/TieDIE/input/source1.txt | 4 + test/TieDIE/input/source2.txt | 3 + test/TieDIE/input/target1.txt | 4 + test/TieDIE/input/target2.txt | 3 + test/TieDIE/test_tiedie.py | 86 ++++++++++ .../expected/tiedie-edges-expected.txt | 2 + test/generate-inputs/test_generate_inputs.py | 3 +- .../expected/tiedie-pathway-expected.txt | 10 ++ .../duplicate-edges/tiedie-raw-pathway.txt | 20 +++ .../input/empty/tiedie-empty-raw-pathway.txt | 0 .../input/tiedie-raw-pathway.txt | 10 ++ test/parse-outputs/test_parse_outputs.py | 1 + 21 files changed, 364 insertions(+), 1 deletion(-) create mode 100644 docker-wrappers/TieDIE/Dockerfile create mode 100644 docker-wrappers/TieDIE/README.md create mode 100644 docker-wrappers/TieDIE/requirements.txt create mode 100644 docs/prms/tiedie.rst create mode 100644 spras/tiedie.py create mode 100644 test/TieDIE/__init__.py create mode 100644 test/TieDIE/input/pathway1.txt create mode 100644 test/TieDIE/input/pathway2.txt create mode 100644 test/TieDIE/input/source1.txt create mode 100644 test/TieDIE/input/source2.txt create mode 100644 test/TieDIE/input/target1.txt create mode 100644 test/TieDIE/input/target2.txt create mode 100644 test/TieDIE/test_tiedie.py create mode 100644 test/generate-inputs/expected/tiedie-edges-expected.txt create mode 100644 test/parse-outputs/expected/tiedie-pathway-expected.txt create mode 100644 test/parse-outputs/input/duplicate-edges/tiedie-raw-pathway.txt create mode 100644 test/parse-outputs/input/empty/tiedie-empty-raw-pathway.txt create mode 100644 test/parse-outputs/input/tiedie-raw-pathway.txt diff --git a/docker-wrappers/TieDIE/Dockerfile b/docker-wrappers/TieDIE/Dockerfile new file mode 100644 index 000000000..3e1960723 --- /dev/null +++ b/docker-wrappers/TieDIE/Dockerfile @@ -0,0 +1,11 @@ +FROM python:2.7.15 + +WORKDIR /TieDIE + +COPY requirements.txt . +RUN pip install -r requirements.txt && \ + commit=c64ab5c4b4e0f6cfac4b5151c7d9f1d7ea331e65 && \ + wget https://github.com/Reed-CompBio/TieDIE/tarball/$commit && \ + tar -zxvf $commit && \ + rm $commit && \ + mv Reed-CompBio-TieDIE-*/* . diff --git a/docker-wrappers/TieDIE/README.md b/docker-wrappers/TieDIE/README.md new file mode 100644 index 000000000..f75cb86b7 --- /dev/null +++ b/docker-wrappers/TieDIE/README.md @@ -0,0 +1,18 @@ +# TieDIE Docker image + +A Docker image for [TieDIE](https://github.com/Reed-CompBio/TieDIE) that is available on [DockerHub](https://hub.docker.com/r/reedcompbio/tiedie). + +To create the Docker image run: +``` +docker build -t reedcompbio/tiedie -f Dockerfile . +``` +from this directory. + +## Testing +Test code is located in `test/TieDIE`. +The `input` subdirectory contains test files `pathway.txt`, `target.txt` and `source.txt`. +The Docker wrapper can be tested with `pytest` or a unit test with `pytest -k test_tiedie.py`. + +## Versions + +- `v1`: Initial version diff --git a/docker-wrappers/TieDIE/requirements.txt b/docker-wrappers/TieDIE/requirements.txt new file mode 100644 index 000000000..d650c03ac --- /dev/null +++ b/docker-wrappers/TieDIE/requirements.txt @@ -0,0 +1,3 @@ +networkx==1.11 +numpy==1.11.3 +scipy==0.18.1 diff --git a/docs/prms/tiedie.rst b/docs/prms/tiedie.rst new file mode 100644 index 000000000..6a00b0738 --- /dev/null +++ b/docs/prms/tiedie.rst @@ -0,0 +1,16 @@ +TieDIE +====== + +TieDIE is a pathway reconstruction algorithm which TODO. +See the `original paper `_ and SPRAS's fork of the codebase: +https://github.com/Reed-CompBio/TieDIE. + +TieDIE takes several optional parameters: + +* s: +* d_expr: +* a: +* c: +* p: +* pagerank: +* all_paths: diff --git a/spras/runner.py b/spras/runner.py index 9a5a04f32..f19fd250b 100644 --- a/spras/runner.py +++ b/spras/runner.py @@ -14,6 +14,7 @@ from spras.responsenet import ResponseNet from spras.rwr import RWR from spras.strwr import ST_RWR +from spras.tiedie import TieDIE algorithms: dict[str, type[PRM]] = { "allpairs": AllPairs, @@ -27,6 +28,7 @@ "responsenet": ResponseNet, "rwr": RWR, "strwr": ST_RWR, + "tiedie": TieDIE } def get_algorithm(algorithm: str) -> type[PRM]: diff --git a/spras/tiedie.py b/spras/tiedie.py new file mode 100644 index 000000000..23a62cd99 --- /dev/null +++ b/spras/tiedie.py @@ -0,0 +1,152 @@ +import warnings +from pathlib import Path + +import pandas as pd + +from spras.config.container_schema import ProcessedContainerSettings +from spras.containers import prepare_volume, run_container_and_log +from spras.interactome import convert_directed_to_undirected, reinsert_direction_col_directed, reinsert_direction_col_undirected +from spras.prm import PRM +from spras.util import add_rank_column, duplicate_edges, raw_pathway_df + +__all__ = ["TieDIE"] + +class TieDIE(PRM): + # we need edges (weighted), source set (with prizes), and target set (with prizes). + required_inputs = ["edges", "sources", "targets"] + dois = ["10.1093/bioinformatics/btt471"] + + @staticmethod + def generate_inputs(data, filename_map): + """ + Access fields from the dataset and write the required input files + @param data: dataset + @param filename_map: a dict mapping file types in the required_inputs to the filename for that type + """ + # ensures the required input are within the filename_map + for input_type in TieDIE.required_inputs: + if input_type not in filename_map: + raise ValueError(f"{input_type} filename is missing") + + # will take the sources and write them to files, and repeats with targets + for node_type in ["sources", "targets"]: + nodes = data.get_node_columns([node_type]) + # check if the nodes have prizes or not + if data.contains_node_columns("prize"): + node_df = data.get_node_columns(["prize"]) + nodes = pd.merge(nodes, node_df, on="NODEID") + nodes["sign"] = "+" + # creates with the node type without headers + nodes.to_csv(filename_map[node_type],index=False,sep="\t",columns=["NODEID", "prize", "sign"],header=False) + else: + # If there aren't prizes but are sources and targets, make prizes based on them + nodes = data.get_node_columns([node_type]) + # make all nodes have a prize of 1 + nodes["prize"] = 1.0 + nodes["sign"] = "+" + # creates with the node type without headers + nodes.to_csv(filename_map[node_type],index=False,sep="\t",columns=["NODEID", "prize", "sign"],header=False) + + # create the network of edges + edges = data.get_interactome() + + edges = convert_directed_to_undirected(edges) + + edges["type"] = "-a>" + # drop the weight column + edges = edges.drop(columns=["Weight"]) + # creates the edges files that contains the head and tail nodes and the weights after them + edges.to_csv(filename_map["edges"],sep="\t",index=False,columns=["Interactor1", "type", "Interactor2"],header=False) + + # Skips parameter validation step + @staticmethod + def run(edges=None, sources=None, targets=None, output_file=None, s: float = 1.0 , c: int = 3 , p: int = 1000, pagerank: bool = False, all_paths: bool = False, container_settings=None): + """ + Run TieDIE with Docker + @param source: input node types with sources (required) + @param target: input node types with targets (required) + @param edges: input edges file (required) + @param output_file: path to the output pathway file (required) + @param s: Network size control factor (optional) (default 1) + @param d_expr: List of significantly differentially expressed genes, along with log-FC or FC values (i.e. by edgeR for RNA-Seq or SAM for microarray data. Generated by a sample-dichotomy of interest. (optional) + @param a: Linker Cutoff (overrides the Size factor) (optional) + @param c: Search depth for causal paths (optional) (default 3) + @param p: Number of random permutations performed for significance analysis (optional) (default 1000) + @param pagerank: Use Personalized PageRank to Diffuse (optional) + @param all_paths: Use all paths instead of only causal paths (optional) (default False) + @param singularity: if True, run using the Singularity container instead of the Docker container + """ + + if not container_settings: container_settings = ProcessedContainerSettings() + if not edges or not sources or not targets or not output_file: + raise ValueError("Required TieDIE arguments are missing") + + work_dir = "/spras" + + # Each volume is a tuple (src, dest) - data generated by Docker + volumes = list() + + bind_path, edges_file = prepare_volume(edges, work_dir, container_settings) + volumes.append(bind_path) + + bind_path, sources_file = prepare_volume(sources, work_dir, container_settings) + volumes.append(bind_path) + + bind_path, targets_file = prepare_volume(targets, work_dir, container_settings) + volumes.append(bind_path) + + out_dir = Path(output_file).parent + + # TieDIE requires that the output directory exist + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir, container_settings) + volumes.append(bind_path) # Use posix path inside the container + + command = [ + "python", + "/TieDIE/bin/tiedie", + "--up_heats", sources_file, + "--down_heats", targets_file, + "--network", edges_file, + "--size", str(s), + "--depth", str(c), + "--permute", str(p), + "--pagerank", "True" if pagerank else "False", + "--all_paths", "False" if all_paths else "False", + "--output_folder", mapped_out_dir, + ] + + print('Running TieDIE with arguments: {}'.format(' '.join(command)), flush=True) + + container_suffix = 'tiedie:v1' + run_container_and_log('TieDIE', + container_suffix, + command, + volumes, + work_dir, + out_dir, + container_settings) + + # Rename the primary output file to match the desired output filename + output = Path(out_dir, "tiedie.sif") + target = Path(output_file) + output.rename(target) + + @staticmethod + def parse_output(raw_pathway_file, standardized_pathway_file, params): + """ + Convert a predicted pathway into the universal format + @param raw_pathway_file: pathway file produced by an algorithm's run function + @param standardized_pathway_file: the same pathway written in the universal format + """ + df = raw_pathway_df(raw_pathway_file, sep='\t', header=None) + if not df.empty: + # get rid of the relationship (second) column (since all relationships are the same "-a>") + df = df.drop(df.columns[1], axis=1) + df = add_rank_column(df) + df = reinsert_direction_col_undirected(df) + df. columns = ['Node1', 'Node2', 'Rank', "Direction"] + df, has_duplicates = duplicate_edges(df) + if has_duplicates: + print(f"Duplicate edges were removed from {raw_pathway_file}") + df.to_csv(standardized_pathway_file, header=True, index=False, sep='\t') diff --git a/test/TieDIE/__init__.py b/test/TieDIE/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/TieDIE/input/pathway1.txt b/test/TieDIE/input/pathway1.txt new file mode 100644 index 000000000..d62845880 --- /dev/null +++ b/test/TieDIE/input/pathway1.txt @@ -0,0 +1,11 @@ +G -a> N +G -a> L +B -a> E +C -a> G +E -a> F +B -a> F +D -a> G +F -a> G +K -a> G +A -a> E +E -a> G diff --git a/test/TieDIE/input/pathway2.txt b/test/TieDIE/input/pathway2.txt new file mode 100644 index 000000000..5e594e1bf --- /dev/null +++ b/test/TieDIE/input/pathway2.txt @@ -0,0 +1,6 @@ +A -a> D +B -a> D +C -a> D +D -a> E +D -a> F +D -a> G \ No newline at end of file diff --git a/test/TieDIE/input/source1.txt b/test/TieDIE/input/source1.txt new file mode 100644 index 000000000..5c37c0bae --- /dev/null +++ b/test/TieDIE/input/source1.txt @@ -0,0 +1,4 @@ +B 1 + +K 1 + +D 1 + +A 1 + \ No newline at end of file diff --git a/test/TieDIE/input/source2.txt b/test/TieDIE/input/source2.txt new file mode 100644 index 000000000..b4d341bab --- /dev/null +++ b/test/TieDIE/input/source2.txt @@ -0,0 +1,3 @@ +A 2 + +B 9 + +C 4 + \ No newline at end of file diff --git a/test/TieDIE/input/target1.txt b/test/TieDIE/input/target1.txt new file mode 100644 index 000000000..c551c63f6 --- /dev/null +++ b/test/TieDIE/input/target1.txt @@ -0,0 +1,4 @@ +L 1 + +F 1 + +C 1 + +N 1 + \ No newline at end of file diff --git a/test/TieDIE/input/target2.txt b/test/TieDIE/input/target2.txt new file mode 100644 index 000000000..563a5ed19 --- /dev/null +++ b/test/TieDIE/input/target2.txt @@ -0,0 +1,3 @@ +E 3 + +F 1 + +G 2 + \ No newline at end of file diff --git a/test/TieDIE/test_tiedie.py b/test/TieDIE/test_tiedie.py new file mode 100644 index 000000000..e7fc27e5c --- /dev/null +++ b/test/TieDIE/test_tiedie.py @@ -0,0 +1,86 @@ +import shutil +from pathlib import Path + +import pytest + +from spras.config.container_schema import ContainerFramework, ProcessedContainerSettings +from spras.tiedie import TieDIE + +TEST_DIR = Path('test', 'TieDIE') +OUT_FILES = TEST_DIR / 'output' / 'output1' / 'tiedie_pathway.txt' +OUT_FILES_1 = TEST_DIR / 'output' / 'output2' / 'tiedie_pathway_alternative.txt' + +class TestTieDIE: + """ + Run the TieDIE algorithm on the example input files + """ + + def test_tiedie_required(self): + out_path = Path(OUT_FILES) + out_path.unlink(missing_ok=True) + # Only include required arguments + TieDIE.run(sources=TEST_DIR / 'input' / 'source1.txt', + targets=TEST_DIR / 'input' / 'target1.txt', + edges=TEST_DIR / 'input' / 'pathway1.txt', + output_file=OUT_FILES) + assert out_path.exists() + + def test_tiedie_alternative_graph(self): + out_path = Path(OUT_FILES_1) + out_path.unlink(missing_ok=True) + TieDIE.run(sources=TEST_DIR / 'input' / 'source2.txt', + targets=TEST_DIR / 'input' / 'target2.txt', + edges=TEST_DIR / 'input' / 'pathway2.txt', + output_file=OUT_FILES_1) + assert out_path.exists() + + def test_tiedie_some_optional(self): + out_path = Path(OUT_FILES) + out_path.unlink(missing_ok=True) + # Include optional argument + TieDIE.run(sources=TEST_DIR / 'input' / 'source1.txt', + targets=TEST_DIR / 'input' / 'target1.txt', + edges=TEST_DIR / 'input' / 'pathway1.txt', + output_file=OUT_FILES, + s=1.1, + p=2000, + pagerank = True) + assert out_path.exists() + + def test_tiedie_all_optional(self): + out_path = Path(OUT_FILES) + out_path.unlink(missing_ok=True) + # Include optional argument + TieDIE.run(sources=TEST_DIR / 'input' / 'source1.txt', + targets=TEST_DIR / 'input' / 'target1.txt', + edges=TEST_DIR / 'input' / 'pathway1.txt', + output_file=OUT_FILES, + s=1.1, + c=4, + p=2000, + pagerank=True, + all_paths=True) + assert out_path.exists() + + def test_tiedie_missing(self): + # Test the expected error is raised when required arguments are missing + with pytest.raises(ValueError): + # No edges file + TieDIE.run(sources=TEST_DIR / 'input' / '/source1.txt', + targets=TEST_DIR / 'input' / '/target1.txt', + output_file=OUT_FILES) + + @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system') + def test_tiedie_singularity(self): + out_path = Path(OUT_FILES) + out_path.unlink(missing_ok=True) + # Only include required arguments and run with Singularity + TieDIE.run(sources=TEST_DIR / 'input' / 'source1.txt', + targets=TEST_DIR / 'input' / 'target1.txt', + edges=TEST_DIR / 'input' / 'pathway1.txt', + output_file=OUT_FILES, + s=1.1, + p=2000, + pagerank=True, + container_settings=ProcessedContainerSettings(framework=ContainerFramework.singularity)) + assert out_path.exists() \ No newline at end of file diff --git a/test/generate-inputs/expected/tiedie-edges-expected.txt b/test/generate-inputs/expected/tiedie-edges-expected.txt new file mode 100644 index 000000000..45e7b1a60 --- /dev/null +++ b/test/generate-inputs/expected/tiedie-edges-expected.txt @@ -0,0 +1,2 @@ +test_A -a> B +B -a> C diff --git a/test/generate-inputs/test_generate_inputs.py b/test/generate-inputs/test_generate_inputs.py index f79b673fc..1868d34d4 100644 --- a/test/generate-inputs/test_generate_inputs.py +++ b/test/generate-inputs/test_generate_inputs.py @@ -20,7 +20,8 @@ 'bowtiebuilder': 'edges', 'strwr': 'network', 'rwr': 'network', - 'responsenet': 'edges' + 'responsenet': 'edges', + 'tiedie': 'edges', } diff --git a/test/parse-outputs/expected/tiedie-pathway-expected.txt b/test/parse-outputs/expected/tiedie-pathway-expected.txt new file mode 100644 index 000000000..efe575b54 --- /dev/null +++ b/test/parse-outputs/expected/tiedie-pathway-expected.txt @@ -0,0 +1,10 @@ +Node1 Node2 Rank Direction +A E 1 U +B E 1 U +B F 1 U +D G 1 U +E F 1 U +E G 1 U +G L 1 U +G N 1 U +G K 1 U diff --git a/test/parse-outputs/input/duplicate-edges/tiedie-raw-pathway.txt b/test/parse-outputs/input/duplicate-edges/tiedie-raw-pathway.txt new file mode 100644 index 000000000..28180c85f --- /dev/null +++ b/test/parse-outputs/input/duplicate-edges/tiedie-raw-pathway.txt @@ -0,0 +1,20 @@ +F -a> G +E -a> G +B -a> E +E -a> F +A -a> E +G -a> L +K -a> G +B -a> F +D -a> G +G -a> N +F -a> G +E -a> G +B -a> E +E -a> F +A -a> E +G -a> L +K -a> G +B -a> F +D -a> G +G -a> N \ No newline at end of file diff --git a/test/parse-outputs/input/empty/tiedie-empty-raw-pathway.txt b/test/parse-outputs/input/empty/tiedie-empty-raw-pathway.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/parse-outputs/input/tiedie-raw-pathway.txt b/test/parse-outputs/input/tiedie-raw-pathway.txt new file mode 100644 index 000000000..34698612e --- /dev/null +++ b/test/parse-outputs/input/tiedie-raw-pathway.txt @@ -0,0 +1,10 @@ +F -a> G +E -a> G +B -a> E +E -a> F +A -a> E +G -a> L +K -a> G +B -a> F +D -a> G +G -a> N \ No newline at end of file diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index 2977c7f5d..f6ade1249 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -24,6 +24,7 @@ 'domino': {}, 'bowtiebuilder': {}, 'responsenet': {}, + 'tiedie': {}, 'strwr': { 'threshold': 3, 'dataset': Dataset({ From d07a5a879e9d69815f7edc39acc4f009f08e2324 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Tue, 18 Nov 2025 19:04:42 -0800 Subject: [PATCH 2/3] docs: weak documentation --- docs/prms/tiedie.rst | 14 +++++++------- spras/tiedie.py | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/prms/tiedie.rst b/docs/prms/tiedie.rst index 6a00b0738..d40829e7e 100644 --- a/docs/prms/tiedie.rst +++ b/docs/prms/tiedie.rst @@ -7,10 +7,10 @@ https://github.com/Reed-CompBio/TieDIE. TieDIE takes several optional parameters: -* s: -* d_expr: -* a: -* c: -* p: -* pagerank: -* all_paths: +* s: (int, default 1) Network size control factor +* d_expr: List of significantly differentially expressed genes, along with log-FC or FC values (i.e. by edgeR for RNA-Seq or SAM for microarray data. Generated by a sample-dichotomy of interest.) +* a: (int) Linker Cutoff (overrides the Size factor) +* c: (int, default 3) Search depth for causal paths +* p: (int, default 1000) Number of random permutations performed for significance analysis +* pagerank: (boolean, default false) uses PageRank for diffusion +* all_paths: (boolean, default false) Use all paths instead of only causal paths diff --git a/spras/tiedie.py b/spras/tiedie.py index 23a62cd99..4f8c8d2ff 100644 --- a/spras/tiedie.py +++ b/spras/tiedie.py @@ -68,7 +68,7 @@ def run(edges=None, sources=None, targets=None, output_file=None, s: float = 1.0 @param edges: input edges file (required) @param output_file: path to the output pathway file (required) @param s: Network size control factor (optional) (default 1) - @param d_expr: List of significantly differentially expressed genes, along with log-FC or FC values (i.e. by edgeR for RNA-Seq or SAM for microarray data. Generated by a sample-dichotomy of interest. (optional) + @param d_expr: List of significantly differentially expressed genes, along with log-FC or FC values (i.e. by edgeR for RNA-Seq or SAM for microarray data.) Generated by a sample-dichotomy of interest. (optional) @param a: Linker Cutoff (overrides the Size factor) (optional) @param c: Search depth for causal paths (optional) (default 3) @param p: Number of random permutations performed for significance analysis (optional) (default 1000) From 5d7a04fcbadc990d920da70f1f0e5692c83f43ce Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Tue, 18 Nov 2025 19:04:58 -0800 Subject: [PATCH 3/3] style: fmt --- spras/tiedie.py | 6 ++++-- test/TieDIE/test_tiedie.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/spras/tiedie.py b/spras/tiedie.py index 4f8c8d2ff..4d1ee3a92 100644 --- a/spras/tiedie.py +++ b/spras/tiedie.py @@ -1,11 +1,13 @@ -import warnings from pathlib import Path import pandas as pd from spras.config.container_schema import ProcessedContainerSettings from spras.containers import prepare_volume, run_container_and_log -from spras.interactome import convert_directed_to_undirected, reinsert_direction_col_directed, reinsert_direction_col_undirected +from spras.interactome import ( + convert_directed_to_undirected, + reinsert_direction_col_undirected, +) from spras.prm import PRM from spras.util import add_rank_column, duplicate_edges, raw_pathway_df diff --git a/test/TieDIE/test_tiedie.py b/test/TieDIE/test_tiedie.py index e7fc27e5c..47753c268 100644 --- a/test/TieDIE/test_tiedie.py +++ b/test/TieDIE/test_tiedie.py @@ -83,4 +83,4 @@ def test_tiedie_singularity(self): p=2000, pagerank=True, container_settings=ProcessedContainerSettings(framework=ContainerFramework.singularity)) - assert out_path.exists() \ No newline at end of file + assert out_path.exists()