From 3d27264c1e7468790c6eb31c6194302ccde6b294 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Wed, 27 Dec 2023 12:20:49 -0800 Subject: [PATCH 01/18] ideas for directionality --- .DS_Store | Bin 0 -> 6148 bytes graph5_flow1_c1.0.sif | 2 ++ graph_dir_col_flow1_c1.0.sif | 2 ++ graphs/graph5/edges.txt | 9 +++++++++ graphs/graph5/sources.txt | 1 + graphs/graph5/targets.txt | 4 ++++ graphs/graph_dir_col/edges.txt | 9 +++++++++ graphs/graph_dir_col/sources.txt | 1 + graphs/graph_dir_col/targets.txt | 4 ++++ minCostFlow.py | 18 ++++++++++++++++++ 10 files changed, 50 insertions(+) create mode 100644 .DS_Store create mode 100644 graph5_flow1_c1.0.sif create mode 100644 graph_dir_col_flow1_c1.0.sif create mode 100644 graphs/graph5/edges.txt create mode 100644 graphs/graph5/sources.txt create mode 100644 graphs/graph5/targets.txt create mode 100644 graphs/graph_dir_col/edges.txt create mode 100644 graphs/graph_dir_col/sources.txt create mode 100644 graphs/graph_dir_col/targets.txt diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..1b1a152d13e4c564da9c380295416588d8a60e18 GIT binary patch literal 6148 zcmeHKL2uJA6n^f?nrZ^&0Maf!$>S?ll7Buk63 z*ZUzFTg@BWEvMz|IPZfqSp{WS&Wb@eealy`q|BnrdKkS}m%#I@&FMUt=9$b-5ob<|UQkLWlu%9uB`Z%izcNqZtLogMF|ujo>7KcMO|q7d5!yRjoYSUIzo>_dI{{Jr?}1#ch?v$RxP1VG{cx&8>_o4F>)7U zUr<@+^WBR3D{G1FtXT!D0@qXl^G$SiuIc>kw5 Date: Wed, 27 Dec 2023 12:21:10 -0800 Subject: [PATCH 02/18] updates to mcf --- minCostFlow.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/minCostFlow.py b/minCostFlow.py index 17e0891..49bfb0e 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -149,8 +149,6 @@ def main(args): targets = parse_nodes(args.targets_file) - print(args.edges_file) - G,idDict = construct_digraph(args.edges_file, args.capacity) add_sources_targets(G, sources, targets, idDict, flow) From 472ba79a8ab896a148941cb1c231ec503e75d60a Mon Sep 17 00:00:00 2001 From: ntalluri Date: Fri, 29 Dec 2023 11:44:53 -0800 Subject: [PATCH 03/18] more ideas --- minCostFlow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/minCostFlow.py b/minCostFlow.py index 49bfb0e..55848e9 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -11,7 +11,7 @@ import argparse from ortools.graph.python.min_cost_flow import SimpleMinCostFlow -# global dict for edge, directionality +# global dict for edge, directionality or adjacney matrix def parse_nodes(node_file): ''' Parse a list of sources or targets and return a set ''' From 24055bb73a0600588ae18f43701ed2d5ed6e7082 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Fri, 29 Dec 2023 13:17:38 -0800 Subject: [PATCH 04/18] testing out using a dict --- graph_dir_col_flow1_c1.0.sif | 2 -- graphs/graph_dir_col/edges.txt | 2 +- minCostFlow.py | 37 ++++++++++++++++++---------------- 3 files changed, 21 insertions(+), 20 deletions(-) delete mode 100644 graph_dir_col_flow1_c1.0.sif diff --git a/graph_dir_col_flow1_c1.0.sif b/graph_dir_col_flow1_c1.0.sif deleted file mode 100644 index 684842d..0000000 --- a/graph_dir_col_flow1_c1.0.sif +++ /dev/null @@ -1,2 +0,0 @@ -B A -C B diff --git a/graphs/graph_dir_col/edges.txt b/graphs/graph_dir_col/edges.txt index 439c556..3ac04d7 100644 --- a/graphs/graph_dir_col/edges.txt +++ b/graphs/graph_dir_col/edges.txt @@ -1,6 +1,6 @@ A B 0.98 U B C 0.77 U -A D 0.12 U +A D 0.12 D C D 0.89 U C E 0.59 U C F 0.50 U diff --git a/minCostFlow.py b/minCostFlow.py index 55848e9..bd7edbf 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -11,7 +11,7 @@ import argparse from ortools.graph.python.min_cost_flow import SimpleMinCostFlow -# global dict for edge, directionality or adjacney matrix +edges_dict = dict() def parse_nodes(node_file): ''' Parse a list of sources or targets and return a set ''' @@ -35,7 +35,6 @@ def construct_digraph(edges_file, cap): with open(edges_file) as edges_f: for line in edges_f: tokens = line.strip().split() - print(tokens) node1 = tokens[0] if not node1 in idDict: idDict[node1] = curID @@ -46,17 +45,22 @@ def construct_digraph(edges_file, cap): curID += 1 #Google's solver can only handle int weights, so round to the 100th w = int((1-(float(tokens[2])))*100) - # grab directionality d = tokens[3] - # append edgepair: d to global dict - # might need to store it twice in both directions for undirected egde? - - # if d = U: - # make a pair of directed edges - # elif d = D - # make one directed edge from node1 to node2 - # else: raise Error print(f"d = {d}"") - G.add_arc_with_capacity_and_unit_cost(idDict[node1],idDict[node2], default_capacity, int(w)) - G.add_arc_with_capacity_and_unit_cost(idDict[node2],idDict[node1], default_capacity, int(w)) + d = tokens[3] + + if d == "U": + edges_dict[(node1, node2)] = "U" + edges_dict[(node2, node1)] = "U" + G.add_arc_with_capacity_and_unit_cost(idDict[node1],idDict[node2], default_capacity, int(w)) + G.add_arc_with_capacity_and_unit_cost(idDict[node2],idDict[node1], default_capacity, int(w)) + + elif d == "D": + edges_dict[(node1, node2)] = "D" + G.add_arc_with_capacity_and_unit_cost(idDict[node1],idDict[node2], default_capacity, int(w)) + else: + raise ValueError (f"d = {d}") + + print(edges_dict) + idDict["maxID"] = curID return G,idDict @@ -110,10 +114,9 @@ def write_output_to_sif(G,out_file_name,idDict): continue numE+=1 - # check the edge in global dict and grab directionality - # d = global_dict[(node1, node2)] - # out_file.write(node1+"\t"+node2+"\t"+d+"\n") - out_file.write(node1+"\t"+node2+"\n") + d = edges_dict[(node1, node2)] + out_file.write(node1+"\t"+node2+"\t"+d+"\n") + print("Final network had %d edges" % numE) out_file.close() From 26cb3b1c535988522647e240546b55271c7d0619 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Fri, 29 Dec 2023 14:31:30 -0800 Subject: [PATCH 05/18] fixing the code --- graphs/graph1/edges.txt | 8 ++++---- graphs/graph2/edges.txt | 10 +++++----- graphs/graph3/edges.txt | 8 ++++---- graphs/graph4/edges.txt | 8 ++++---- minCostFlow.py | 7 +++---- 5 files changed, 20 insertions(+), 21 deletions(-) diff --git a/graphs/graph1/edges.txt b/graphs/graph1/edges.txt index e002179..c771da0 100644 --- a/graphs/graph1/edges.txt +++ b/graphs/graph1/edges.txt @@ -1,4 +1,4 @@ -A B 0.9 -A C 0.1 -B D 0.9 -C D 0.1 \ No newline at end of file +A B 0.9 D +A C 0.1 D +B D 0.9 D +C D 0.1 D \ No newline at end of file diff --git a/graphs/graph2/edges.txt b/graphs/graph2/edges.txt index d5212d2..33cf94d 100644 --- a/graphs/graph2/edges.txt +++ b/graphs/graph2/edges.txt @@ -1,5 +1,5 @@ -A B 0.9 -A C 0.1 -B D 0.9 -C D 0.1 -A D 0.8 \ No newline at end of file +A B 0.9 D +A C 0.1 D +B D 0.9 D +C D 0.1 D +A D 0.8 D \ No newline at end of file diff --git a/graphs/graph3/edges.txt b/graphs/graph3/edges.txt index 15e48ba..ccef531 100644 --- a/graphs/graph3/edges.txt +++ b/graphs/graph3/edges.txt @@ -1,4 +1,4 @@ -A B 0.9 -A C 0.1 -B D 0.1 -C D 0.9 +A B 0.9 D +A C 0.1 D +B D 0.1 D +C D 0.9 D diff --git a/graphs/graph4/edges.txt b/graphs/graph4/edges.txt index e1cf8cd..5d440a3 100644 --- a/graphs/graph4/edges.txt +++ b/graphs/graph4/edges.txt @@ -1,4 +1,4 @@ -A B 0.9 -A C 0.9 -B D 0.9 -C D 0.9 +A B 0.9 D +A C 0.9 D +B D 0.9 D +C D 0.9 D diff --git a/minCostFlow.py b/minCostFlow.py index bd7edbf..5e2fe56 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -59,8 +59,6 @@ def construct_digraph(edges_file, cap): else: raise ValueError (f"d = {d}") - print(edges_dict) - idDict["maxID"] = curID return G,idDict @@ -103,8 +101,9 @@ def write_output_to_sif(G,out_file_name,idDict): names = {v: k for k, v in idDict.items()} numE = 0 for i in range(G.num_arcs()): - node1 = names[G.head(i)] - node2 = names[G.tail(i)] + node1 = names[G.tail(i)] + node2 = names[G.head(i)] + flow = G.flow(i) if flow <= 0: continue From d16a07029c686b585336683e7e1873e4dc280c08 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 2 Jan 2024 12:50:47 -0800 Subject: [PATCH 06/18] new idea for duplicate edges --- .gitignore | 3 +++ graph5_flow1_c1.0.sif | 2 -- graphs/graph5/edges.txt | 21 ++++++++++++--------- graphs/graph_dir_col/edges.txt | 9 --------- graphs/graph_dir_col/sources.txt | 1 - graphs/graph_dir_col/targets.txt | 4 ---- minCostFlow.py | 8 +++++--- 7 files changed, 20 insertions(+), 28 deletions(-) delete mode 100644 graph5_flow1_c1.0.sif delete mode 100644 graphs/graph_dir_col/edges.txt delete mode 100644 graphs/graph_dir_col/sources.txt delete mode 100644 graphs/graph_dir_col/targets.txt diff --git a/.gitignore b/.gitignore index f1c6544..d77c715 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,6 @@ venv.bak/ # data files referencePathways/reactome/*.gOut + +# output files +*.sif \ No newline at end of file diff --git a/graph5_flow1_c1.0.sif b/graph5_flow1_c1.0.sif deleted file mode 100644 index 684842d..0000000 --- a/graph5_flow1_c1.0.sif +++ /dev/null @@ -1,2 +0,0 @@ -B A -C B diff --git a/graphs/graph5/edges.txt b/graphs/graph5/edges.txt index bb629d3..2ca040c 100644 --- a/graphs/graph5/edges.txt +++ b/graphs/graph5/edges.txt @@ -1,9 +1,12 @@ -A B 0.98 -B C 0.77 -A D 0.12 -C D 0.89 -C E 0.59 -C F 0.50 -F G 0.76 -G H 0.92 -G I 0.66 +A B 0.98 U +B C 0.77 U +A D 0.12 U +A D 0.12 D +C D 0.89 U +C E 0.59 U +A D 0.12 D +C F 0.50 U +F G 0.76 U +G H 0.92 U +G I 0.66 U +B C 0.77 U \ No newline at end of file diff --git a/graphs/graph_dir_col/edges.txt b/graphs/graph_dir_col/edges.txt deleted file mode 100644 index 3ac04d7..0000000 --- a/graphs/graph_dir_col/edges.txt +++ /dev/null @@ -1,9 +0,0 @@ -A B 0.98 U -B C 0.77 U -A D 0.12 D -C D 0.89 U -C E 0.59 U -C F 0.50 U -F G 0.76 U -G H 0.92 U -G I 0.66 U diff --git a/graphs/graph_dir_col/sources.txt b/graphs/graph_dir_col/sources.txt deleted file mode 100644 index 8c7e5a6..0000000 --- a/graphs/graph_dir_col/sources.txt +++ /dev/null @@ -1 +0,0 @@ -A \ No newline at end of file diff --git a/graphs/graph_dir_col/targets.txt b/graphs/graph_dir_col/targets.txt deleted file mode 100644 index 2c29ca5..0000000 --- a/graphs/graph_dir_col/targets.txt +++ /dev/null @@ -1,4 +0,0 @@ -C -D -H -I \ No newline at end of file diff --git a/minCostFlow.py b/minCostFlow.py index 5e2fe56..9f2b2ec 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -47,17 +47,19 @@ def construct_digraph(edges_file, cap): w = int((1-(float(tokens[2])))*100) d = tokens[3] - if d == "U": + # TODO: is it better to overwrite the edges or just to throw an error + if (node1, node2) in edges_dict: + raise KeyError(f"Failed to add the edge ({node1}, {node2}) with weight '{tokens[2]}' and directionality '{d}'. This edge conflicts with an existing '{edges_dict[(node1, node2)]}' edge in the graph.") + elif d == "U": edges_dict[(node1, node2)] = "U" edges_dict[(node2, node1)] = "U" G.add_arc_with_capacity_and_unit_cost(idDict[node1],idDict[node2], default_capacity, int(w)) G.add_arc_with_capacity_and_unit_cost(idDict[node2],idDict[node1], default_capacity, int(w)) - elif d == "D": edges_dict[(node1, node2)] = "D" G.add_arc_with_capacity_and_unit_cost(idDict[node1],idDict[node2], default_capacity, int(w)) else: - raise ValueError (f"d = {d}") + raise ValueError (f"Cannot add egdge: d = {d}") idDict["maxID"] = curID return G,idDict From e7fe10ba86134ec08a5cc6de6db2b858eb034e65 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 2 Jan 2024 14:52:15 -0800 Subject: [PATCH 07/18] new graphs to test on --- graphs/graph10/edges.txt | 4 ++++ graphs/graph10/sources.txt | 1 + graphs/graph10/targets.txt | 1 + graphs/graph11/edges.txt | 4 ++++ graphs/graph11/sources.txt | 1 + graphs/graph11/targets.txt | 1 + graphs/graph5/edges.txt | 16 ++++------------ graphs/graph5/targets.txt | 5 +---- graphs/graph6/edges.txt | 5 +++++ graphs/graph6/sources.txt | 1 + graphs/graph6/targets.txt | 1 + graphs/graph7/edges.txt | 4 ++++ graphs/graph7/sources.txt | 1 + graphs/graph7/targets.txt | 1 + graphs/graph8/edges.txt | 4 ++++ graphs/graph8/sources.txt | 1 + graphs/graph8/targets.txt | 1 + graphs/graph9/edges.txt | 4 ++++ graphs/graph9/sources.txt | 1 + graphs/graph9/targets.txt | 1 + 20 files changed, 42 insertions(+), 16 deletions(-) create mode 100644 graphs/graph10/edges.txt create mode 100644 graphs/graph10/sources.txt create mode 100644 graphs/graph10/targets.txt create mode 100644 graphs/graph11/edges.txt create mode 100644 graphs/graph11/sources.txt create mode 100644 graphs/graph11/targets.txt create mode 100644 graphs/graph6/edges.txt create mode 100644 graphs/graph6/sources.txt create mode 100644 graphs/graph6/targets.txt create mode 100644 graphs/graph7/edges.txt create mode 100644 graphs/graph7/sources.txt create mode 100644 graphs/graph7/targets.txt create mode 100644 graphs/graph8/edges.txt create mode 100644 graphs/graph8/sources.txt create mode 100644 graphs/graph8/targets.txt create mode 100644 graphs/graph9/edges.txt create mode 100644 graphs/graph9/sources.txt create mode 100644 graphs/graph9/targets.txt diff --git a/graphs/graph10/edges.txt b/graphs/graph10/edges.txt new file mode 100644 index 0000000..71c125d --- /dev/null +++ b/graphs/graph10/edges.txt @@ -0,0 +1,4 @@ +A B 0.9 D +A C 0.1 U +B D 0.9 D +C D 0.1 U diff --git a/graphs/graph10/sources.txt b/graphs/graph10/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/graphs/graph10/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/graphs/graph10/targets.txt b/graphs/graph10/targets.txt new file mode 100644 index 0000000..02358d2 --- /dev/null +++ b/graphs/graph10/targets.txt @@ -0,0 +1 @@ +D \ No newline at end of file diff --git a/graphs/graph11/edges.txt b/graphs/graph11/edges.txt new file mode 100644 index 0000000..eb56b16 --- /dev/null +++ b/graphs/graph11/edges.txt @@ -0,0 +1,4 @@ +A B 0.9 U +A C 0.1 D +B D 0.9 U +C D 0.1 D diff --git a/graphs/graph11/sources.txt b/graphs/graph11/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/graphs/graph11/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/graphs/graph11/targets.txt b/graphs/graph11/targets.txt new file mode 100644 index 0000000..02358d2 --- /dev/null +++ b/graphs/graph11/targets.txt @@ -0,0 +1 @@ +D \ No newline at end of file diff --git a/graphs/graph5/edges.txt b/graphs/graph5/edges.txt index 2ca040c..74294cf 100644 --- a/graphs/graph5/edges.txt +++ b/graphs/graph5/edges.txt @@ -1,12 +1,4 @@ -A B 0.98 U -B C 0.77 U -A D 0.12 U -A D 0.12 D -C D 0.89 U -C E 0.59 U -A D 0.12 D -C F 0.50 U -F G 0.76 U -G H 0.92 U -G I 0.66 U -B C 0.77 U \ No newline at end of file +A B 0.9 U +A C 0.1 U +B D 0.9 U +C D 0.1 U \ No newline at end of file diff --git a/graphs/graph5/targets.txt b/graphs/graph5/targets.txt index 2c29ca5..02358d2 100644 --- a/graphs/graph5/targets.txt +++ b/graphs/graph5/targets.txt @@ -1,4 +1 @@ -C -D -H -I \ No newline at end of file +D \ No newline at end of file diff --git a/graphs/graph6/edges.txt b/graphs/graph6/edges.txt new file mode 100644 index 0000000..1064f97 --- /dev/null +++ b/graphs/graph6/edges.txt @@ -0,0 +1,5 @@ +A B 0.9 U +A C 0.1 U +B D 0.9 U +C D 0.1 U +A D 0.8 U \ No newline at end of file diff --git a/graphs/graph6/sources.txt b/graphs/graph6/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/graphs/graph6/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/graphs/graph6/targets.txt b/graphs/graph6/targets.txt new file mode 100644 index 0000000..02358d2 --- /dev/null +++ b/graphs/graph6/targets.txt @@ -0,0 +1 @@ +D \ No newline at end of file diff --git a/graphs/graph7/edges.txt b/graphs/graph7/edges.txt new file mode 100644 index 0000000..a964d9f --- /dev/null +++ b/graphs/graph7/edges.txt @@ -0,0 +1,4 @@ +A B 0.9 U +A C 0.1 U +B D 0.1 U +C D 0.9 U diff --git a/graphs/graph7/sources.txt b/graphs/graph7/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/graphs/graph7/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/graphs/graph7/targets.txt b/graphs/graph7/targets.txt new file mode 100644 index 0000000..02358d2 --- /dev/null +++ b/graphs/graph7/targets.txt @@ -0,0 +1 @@ +D \ No newline at end of file diff --git a/graphs/graph8/edges.txt b/graphs/graph8/edges.txt new file mode 100644 index 0000000..f5496da --- /dev/null +++ b/graphs/graph8/edges.txt @@ -0,0 +1,4 @@ +A B 0.9 U +A C 0.9 U +B D 0.9 U +C D 0.9 U diff --git a/graphs/graph8/sources.txt b/graphs/graph8/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/graphs/graph8/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/graphs/graph8/targets.txt b/graphs/graph8/targets.txt new file mode 100644 index 0000000..02358d2 --- /dev/null +++ b/graphs/graph8/targets.txt @@ -0,0 +1 @@ +D \ No newline at end of file diff --git a/graphs/graph9/edges.txt b/graphs/graph9/edges.txt new file mode 100644 index 0000000..6edc4bb --- /dev/null +++ b/graphs/graph9/edges.txt @@ -0,0 +1,4 @@ +A B 0.9 D +A C 0.1 U +B D 0.9 U +C D 0.1 D diff --git a/graphs/graph9/sources.txt b/graphs/graph9/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/graphs/graph9/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/graphs/graph9/targets.txt b/graphs/graph9/targets.txt new file mode 100644 index 0000000..02358d2 --- /dev/null +++ b/graphs/graph9/targets.txt @@ -0,0 +1 @@ +D \ No newline at end of file From 258116cc61fa1d424dabf0632b2bf00acb3b51a3 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 16 Jan 2024 18:38:27 -0800 Subject: [PATCH 08/18] update code --- minCostFlow.py | 76 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 17 deletions(-) diff --git a/minCostFlow.py b/minCostFlow.py index 9f2b2ec..1c8d097 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -11,7 +11,9 @@ import argparse from ortools.graph.python.min_cost_flow import SimpleMinCostFlow -edges_dict = dict() +# (node1, node2) : weight +directed_dict = dict() +undirected_dict = dict() def parse_nodes(node_file): ''' Parse a list of sources or targets and return a set ''' @@ -43,24 +45,49 @@ def construct_digraph(edges_file, cap): if not node2 in idDict: idDict[node2] = curID curID += 1 - #Google's solver can only handle int weights, so round to the 100th + #Google's solver can only handle int weights, so round to the 100th w = int((1-(float(tokens[2])))*100) d = tokens[3] + edge = (node1, node2) + sorted_edge = tuple(sorted(edge)) + + if d == "D": + if edge in directed_dict: + if w > directed_dict[edge]: + directed_dict[edge] = w + # check if sorted edge is in undirected_dict + elif sorted_edge in undirected_dict: + # remove edge from undirected_dict + del undirected_dict[sorted_edge] + # add edge to directed_dict (not sorted) + directed_dict[edge] = w + else: + # add edge to directed_dict + directed_dict[edge] = w - # TODO: is it better to overwrite the edges or just to throw an error - if (node1, node2) in edges_dict: - raise KeyError(f"Failed to add the edge ({node1}, {node2}) with weight '{tokens[2]}' and directionality '{d}'. This edge conflicts with an existing '{edges_dict[(node1, node2)]}' edge in the graph.") elif d == "U": - edges_dict[(node1, node2)] = "U" - edges_dict[(node2, node1)] = "U" - G.add_arc_with_capacity_and_unit_cost(idDict[node1],idDict[node2], default_capacity, int(w)) - G.add_arc_with_capacity_and_unit_cost(idDict[node2],idDict[node1], default_capacity, int(w)) - elif d == "D": - edges_dict[(node1, node2)] = "D" - G.add_arc_with_capacity_and_unit_cost(idDict[node1],idDict[node2], default_capacity, int(w)) - else: - raise ValueError (f"Cannot add egdge: d = {d}") - + # check for edge in directed_edges: skip + if edge in directed_dict: + continue + elif sorted_edge in undirected_dict: + if w < undirected_dict[sorted_edge]: + undirected_dict[sorted_edge] = w + # check if edge in undirected_edges: if weight is lower than newer edge, then replace with newer edges weight + # otherwise add edge to undirected_edges dictionary as (node1, node2) = (U, weight) + else: + undirected_dict[sorted_edge] = w + else: + raise ValueError (f"Cannot add edge: d = {d}") + + # go through and add the edges from directed_dict and undirected_dict to G + for key, value in directed_dict.items(): + print(key, value) + G.add_arc_with_capacity_and_unit_cost(idDict[key[0]],idDict[key[1]], default_capacity, int(value)) + for key, value in undirected_dict.items(): + print(key, value) + G.add_arc_with_capacity_and_unit_cost(idDict[key[0]],idDict[key[1]], default_capacity, int(value)) + G.add_arc_with_capacity_and_unit_cost(idDict[key[1]],idDict[key[0]], default_capacity, int(value)) + idDict["maxID"] = curID return G,idDict @@ -115,8 +142,23 @@ def write_output_to_sif(G,out_file_name,idDict): continue numE+=1 - d = edges_dict[(node1, node2)] - out_file.write(node1+"\t"+node2+"\t"+d+"\n") + # for the edge chosen, + # // node1 is the tail + # // node2 is the head + edge = (node1, node2) + sorted_edge = tuple(sorted(edge)) + + if edge in directed_dict: + out_file.write(node1+"\t"+node2+"\t"+"D"+"\n") + elif sorted_edge in undirected_dict: + out_file.write(node1+"\t"+node2+"\t"+"U"+"\n") + else: + raise KeyError(f"edge {edge} is not in the dicts") + # check if (node1, node2) is in directed_edges dict: + # write output with a directed edge + # otherwise check if (node1, node2) or (node2, node1) is in the undirected_edges dict: + # write output with an undirected edge + print("Final network had %d edges" % numE) out_file.close() From 40e40510715102bb70d212f7e7b34b499a871b65 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Wed, 17 Jan 2024 12:41:38 -0800 Subject: [PATCH 09/18] update to code and made test cases for the code --- minCostFlow.py | 35 +++++++++-------------------------- test_minCostFlow.py | 19 +++++++++++++++++++ tests/graph1/edges.txt | 3 +++ tests/graph1/sources.txt | 1 + tests/graph1/targets.txt | 1 + tests/graph2/edges.txt | 4 ++++ tests/graph2/sources.txt | 1 + tests/graph2/targets.txt | 1 + tests/graph3/edges.txt | 4 ++++ tests/graph3/sources.txt | 1 + tests/graph3/targets.txt | 1 + tests/graph4/edges.txt | 3 +++ tests/graph4/sources.txt | 1 + tests/graph4/targets.txt | 1 + tests/graph5/edges.txt | 6 ++++++ tests/graph5/sources.txt | 1 + tests/graph5/targets.txt | 1 + tests/graph6/edges.txt | 4 ++++ tests/graph6/sources.txt | 1 + tests/graph6/targets.txt | 1 + 20 files changed, 64 insertions(+), 26 deletions(-) create mode 100644 test_minCostFlow.py create mode 100644 tests/graph1/edges.txt create mode 100644 tests/graph1/sources.txt create mode 100644 tests/graph1/targets.txt create mode 100644 tests/graph2/edges.txt create mode 100644 tests/graph2/sources.txt create mode 100644 tests/graph2/targets.txt create mode 100644 tests/graph3/edges.txt create mode 100644 tests/graph3/sources.txt create mode 100644 tests/graph3/targets.txt create mode 100644 tests/graph4/edges.txt create mode 100644 tests/graph4/sources.txt create mode 100644 tests/graph4/targets.txt create mode 100644 tests/graph5/edges.txt create mode 100644 tests/graph5/sources.txt create mode 100644 tests/graph5/targets.txt create mode 100644 tests/graph6/edges.txt create mode 100644 tests/graph6/sources.txt create mode 100644 tests/graph6/targets.txt diff --git a/minCostFlow.py b/minCostFlow.py index 1c8d097..eec70b3 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -30,7 +30,7 @@ def construct_digraph(edges_file, cap): capacity of 1. ''' G = SimpleMinCostFlow() - idDict = dict() #Hold names to number ids + idDict = dict() # Hold names to number ids curID = 0 default_capacity = int(cap) @@ -45,7 +45,7 @@ def construct_digraph(edges_file, cap): if not node2 in idDict: idDict[node2] = curID curID += 1 - #Google's solver can only handle int weights, so round to the 100th + # Google's solver can only handle int weights, so round to the 100th w = int((1-(float(tokens[2])))*100) d = tokens[3] edge = (node1, node2) @@ -55,36 +55,27 @@ def construct_digraph(edges_file, cap): if edge in directed_dict: if w > directed_dict[edge]: directed_dict[edge] = w - # check if sorted edge is in undirected_dict elif sorted_edge in undirected_dict: - # remove edge from undirected_dict del undirected_dict[sorted_edge] - # add edge to directed_dict (not sorted) directed_dict[edge] = w - else: - # add edge to directed_dict + else: # edge not in directed_dict directed_dict[edge] = w elif d == "U": - # check for edge in directed_edges: skip - if edge in directed_dict: - continue + if edge not in directed_dict and sorted_edge not in directed_dict and sorted_edge not in undirected_dict: + undirected_dict[sorted_edge] = w elif sorted_edge in undirected_dict: - if w < undirected_dict[sorted_edge]: + if w > undirected_dict[sorted_edge]: undirected_dict[sorted_edge] = w - # check if edge in undirected_edges: if weight is lower than newer edge, then replace with newer edges weight - # otherwise add edge to undirected_edges dictionary as (node1, node2) = (U, weight) - else: - undirected_dict[sorted_edge] = w else: raise ValueError (f"Cannot add edge: d = {d}") + # print("undirected_dict: ", undirected_dict) + # print("directed_dict: ", directed_dict) # go through and add the edges from directed_dict and undirected_dict to G for key, value in directed_dict.items(): - print(key, value) G.add_arc_with_capacity_and_unit_cost(idDict[key[0]],idDict[key[1]], default_capacity, int(value)) for key, value in undirected_dict.items(): - print(key, value) G.add_arc_with_capacity_and_unit_cost(idDict[key[0]],idDict[key[1]], default_capacity, int(value)) G.add_arc_with_capacity_and_unit_cost(idDict[key[1]],idDict[key[0]], default_capacity, int(value)) @@ -142,24 +133,16 @@ def write_output_to_sif(G,out_file_name,idDict): continue numE+=1 - # for the edge chosen, - # // node1 is the tail - # // node2 is the head edge = (node1, node2) sorted_edge = tuple(sorted(edge)) - + if edge in directed_dict: out_file.write(node1+"\t"+node2+"\t"+"D"+"\n") elif sorted_edge in undirected_dict: out_file.write(node1+"\t"+node2+"\t"+"U"+"\n") else: raise KeyError(f"edge {edge} is not in the dicts") - # check if (node1, node2) is in directed_edges dict: - # write output with a directed edge - # otherwise check if (node1, node2) or (node2, node1) is in the undirected_edges dict: - # write output with an undirected edge - print("Final network had %d edges" % numE) out_file.close() diff --git a/test_minCostFlow.py b/test_minCostFlow.py new file mode 100644 index 0000000..2ae4f36 --- /dev/null +++ b/test_minCostFlow.py @@ -0,0 +1,19 @@ +import subprocess + +# uncomment line 73 and 74 to check over dictionaries + +command = "python" +script = "minCostFlow.py" + +for i in range (1,7): + print("test: ",i) + args = [ + "--edges_file", f"tests/graph{i}/edges.txt", + "--sources_file", f"tests/graph{i}/sources.txt", + "--targets_file", f"tests/graph{i}/targets.txt", + "--output", f"test_graph{i}" + ] + cmd = [command, script] + args + + # Run the command + subprocess.run(cmd) \ No newline at end of file diff --git a/tests/graph1/edges.txt b/tests/graph1/edges.txt new file mode 100644 index 0000000..768b395 --- /dev/null +++ b/tests/graph1/edges.txt @@ -0,0 +1,3 @@ +A B 0.1 D +B C 0.1 D +B C 0.1 D diff --git a/tests/graph1/sources.txt b/tests/graph1/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/tests/graph1/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/tests/graph1/targets.txt b/tests/graph1/targets.txt new file mode 100644 index 0000000..96d80cd --- /dev/null +++ b/tests/graph1/targets.txt @@ -0,0 +1 @@ +C \ No newline at end of file diff --git a/tests/graph2/edges.txt b/tests/graph2/edges.txt new file mode 100644 index 0000000..8ff596c --- /dev/null +++ b/tests/graph2/edges.txt @@ -0,0 +1,4 @@ +A B 0.2 D +B C 0.2 D +A B 0.1 D +B C 0.3 D \ No newline at end of file diff --git a/tests/graph2/sources.txt b/tests/graph2/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/tests/graph2/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/tests/graph2/targets.txt b/tests/graph2/targets.txt new file mode 100644 index 0000000..96d80cd --- /dev/null +++ b/tests/graph2/targets.txt @@ -0,0 +1 @@ +C \ No newline at end of file diff --git a/tests/graph3/edges.txt b/tests/graph3/edges.txt new file mode 100644 index 0000000..85d22e4 --- /dev/null +++ b/tests/graph3/edges.txt @@ -0,0 +1,4 @@ +A B 0.1 U +B C 0.1 D +A B 0.1 D +B C 0.1 U \ No newline at end of file diff --git a/tests/graph3/sources.txt b/tests/graph3/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/tests/graph3/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/tests/graph3/targets.txt b/tests/graph3/targets.txt new file mode 100644 index 0000000..96d80cd --- /dev/null +++ b/tests/graph3/targets.txt @@ -0,0 +1 @@ +C \ No newline at end of file diff --git a/tests/graph4/edges.txt b/tests/graph4/edges.txt new file mode 100644 index 0000000..48feaa9 --- /dev/null +++ b/tests/graph4/edges.txt @@ -0,0 +1,3 @@ +A B 0.1 U +B C 0.1 U +B C 0.1 U diff --git a/tests/graph4/sources.txt b/tests/graph4/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/tests/graph4/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/tests/graph4/targets.txt b/tests/graph4/targets.txt new file mode 100644 index 0000000..96d80cd --- /dev/null +++ b/tests/graph4/targets.txt @@ -0,0 +1 @@ +C \ No newline at end of file diff --git a/tests/graph5/edges.txt b/tests/graph5/edges.txt new file mode 100644 index 0000000..9b62be9 --- /dev/null +++ b/tests/graph5/edges.txt @@ -0,0 +1,6 @@ +A B 0.1 D +A B 0.1 U +B A 0.1 U +B A 0.1 D +B C 0.1 D +B C 0.1 U \ No newline at end of file diff --git a/tests/graph5/sources.txt b/tests/graph5/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/tests/graph5/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/tests/graph5/targets.txt b/tests/graph5/targets.txt new file mode 100644 index 0000000..96d80cd --- /dev/null +++ b/tests/graph5/targets.txt @@ -0,0 +1 @@ +C \ No newline at end of file diff --git a/tests/graph6/edges.txt b/tests/graph6/edges.txt new file mode 100644 index 0000000..9812a6e --- /dev/null +++ b/tests/graph6/edges.txt @@ -0,0 +1,4 @@ +A B 0.2 U +B C 0.2 U +A B 0.1 U +B C 0.3 U \ No newline at end of file diff --git a/tests/graph6/sources.txt b/tests/graph6/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/tests/graph6/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/tests/graph6/targets.txt b/tests/graph6/targets.txt new file mode 100644 index 0000000..96d80cd --- /dev/null +++ b/tests/graph6/targets.txt @@ -0,0 +1 @@ +C \ No newline at end of file From 3b156e0fa3173b1d77cf08172ee15b1eef11acf5 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Wed, 17 Jan 2024 12:58:23 -0800 Subject: [PATCH 10/18] added an additional test case --- test_minCostFlow.py | 2 +- tests/graph7/edges.txt | 0 tests/graph7/sources.txt | 1 + tests/graph7/targets.txt | 1 + 4 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tests/graph7/edges.txt create mode 100644 tests/graph7/sources.txt create mode 100644 tests/graph7/targets.txt diff --git a/test_minCostFlow.py b/test_minCostFlow.py index 2ae4f36..7624697 100644 --- a/test_minCostFlow.py +++ b/test_minCostFlow.py @@ -5,7 +5,7 @@ command = "python" script = "minCostFlow.py" -for i in range (1,7): +for i in range (1,8): print("test: ",i) args = [ "--edges_file", f"tests/graph{i}/edges.txt", diff --git a/tests/graph7/edges.txt b/tests/graph7/edges.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/graph7/sources.txt b/tests/graph7/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/tests/graph7/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/tests/graph7/targets.txt b/tests/graph7/targets.txt new file mode 100644 index 0000000..96d80cd --- /dev/null +++ b/tests/graph7/targets.txt @@ -0,0 +1 @@ +C \ No newline at end of file From 001dc5a84a76acdc93d158271eb9553cfb7dd094 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Thu, 18 Jan 2024 17:05:54 -0800 Subject: [PATCH 11/18] rename files and add correctness files --- .../edges.txt => graphs/correct_outputs.txt | 0 minCostFlow.py | 2 +- test_minCostFlow.py | 25 ++++++++++++++++--- tests/correct_outputs.txt | 0 tests/{graph1 => test1}/edges.txt | 0 tests/{graph1 => test1}/sources.txt | 0 tests/{graph1 => test1}/targets.txt | 0 tests/{graph2 => test2}/edges.txt | 0 tests/{graph2 => test2}/sources.txt | 0 tests/{graph2 => test2}/targets.txt | 0 tests/{graph3 => test3}/edges.txt | 0 tests/{graph3 => test3}/sources.txt | 0 tests/{graph3 => test3}/targets.txt | 0 tests/{graph4 => test4}/edges.txt | 0 tests/{graph4 => test4}/sources.txt | 0 tests/{graph4 => test4}/targets.txt | 0 tests/{graph5 => test5}/edges.txt | 0 tests/{graph5 => test5}/sources.txt | 0 tests/{graph5 => test5}/targets.txt | 0 tests/{graph6 => test6}/edges.txt | 0 tests/{graph6 => test6}/sources.txt | 0 tests/{graph6 => test6}/targets.txt | 0 tests/test7/edges.txt | 0 tests/{graph7 => test7}/sources.txt | 0 tests/{graph7 => test7}/targets.txt | 0 25 files changed, 22 insertions(+), 5 deletions(-) rename tests/graph7/edges.txt => graphs/correct_outputs.txt (100%) create mode 100644 tests/correct_outputs.txt rename tests/{graph1 => test1}/edges.txt (100%) rename tests/{graph1 => test1}/sources.txt (100%) rename tests/{graph1 => test1}/targets.txt (100%) rename tests/{graph2 => test2}/edges.txt (100%) rename tests/{graph2 => test2}/sources.txt (100%) rename tests/{graph2 => test2}/targets.txt (100%) rename tests/{graph3 => test3}/edges.txt (100%) rename tests/{graph3 => test3}/sources.txt (100%) rename tests/{graph3 => test3}/targets.txt (100%) rename tests/{graph4 => test4}/edges.txt (100%) rename tests/{graph4 => test4}/sources.txt (100%) rename tests/{graph4 => test4}/targets.txt (100%) rename tests/{graph5 => test5}/edges.txt (100%) rename tests/{graph5 => test5}/sources.txt (100%) rename tests/{graph5 => test5}/targets.txt (100%) rename tests/{graph6 => test6}/edges.txt (100%) rename tests/{graph6 => test6}/sources.txt (100%) rename tests/{graph6 => test6}/targets.txt (100%) create mode 100644 tests/test7/edges.txt rename tests/{graph7 => test7}/sources.txt (100%) rename tests/{graph7 => test7}/targets.txt (100%) diff --git a/tests/graph7/edges.txt b/graphs/correct_outputs.txt similarity index 100% rename from tests/graph7/edges.txt rename to graphs/correct_outputs.txt diff --git a/minCostFlow.py b/minCostFlow.py index eec70b3..a1efb99 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -46,7 +46,7 @@ def construct_digraph(edges_file, cap): idDict[node2] = curID curID += 1 # Google's solver can only handle int weights, so round to the 100th - w = int((1-(float(tokens[2])))*100) + w = int((1-(float(tokens[2])))*100) # lower the weight from token[2], higher the cost d = tokens[3] edge = (node1, node2) sorted_edge = tuple(sorted(edge)) diff --git a/test_minCostFlow.py b/test_minCostFlow.py index 7624697..1482d59 100644 --- a/test_minCostFlow.py +++ b/test_minCostFlow.py @@ -5,13 +5,30 @@ command = "python" script = "minCostFlow.py" +print("testing code functionality") for i in range (1,8): + print("test: ",i) args = [ - "--edges_file", f"tests/graph{i}/edges.txt", - "--sources_file", f"tests/graph{i}/sources.txt", - "--targets_file", f"tests/graph{i}/targets.txt", - "--output", f"test_graph{i}" + "--edges_file", f"tests/test{i}/edges.txt", + "--sources_file", f"tests/test{i}/sources.txt", + "--targets_file", f"tests/test{i}/targets.txt", + "--output", f"test{i}" + ] + cmd = [command, script] + args + + # Run the command + subprocess.run(cmd) + + +print("\ntesting code correctness") +for i in range (1,12): + print("graph: ",i) + args = [ + "--edges_file", f"graphs/graph{i}/edges.txt", + "--sources_file", f"graphs/graph{i}/sources.txt", + "--targets_file", f"graphs/graph{i}/targets.txt", + "--output", f"graph{i}" ] cmd = [command, script] + args diff --git a/tests/correct_outputs.txt b/tests/correct_outputs.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/graph1/edges.txt b/tests/test1/edges.txt similarity index 100% rename from tests/graph1/edges.txt rename to tests/test1/edges.txt diff --git a/tests/graph1/sources.txt b/tests/test1/sources.txt similarity index 100% rename from tests/graph1/sources.txt rename to tests/test1/sources.txt diff --git a/tests/graph1/targets.txt b/tests/test1/targets.txt similarity index 100% rename from tests/graph1/targets.txt rename to tests/test1/targets.txt diff --git a/tests/graph2/edges.txt b/tests/test2/edges.txt similarity index 100% rename from tests/graph2/edges.txt rename to tests/test2/edges.txt diff --git a/tests/graph2/sources.txt b/tests/test2/sources.txt similarity index 100% rename from tests/graph2/sources.txt rename to tests/test2/sources.txt diff --git a/tests/graph2/targets.txt b/tests/test2/targets.txt similarity index 100% rename from tests/graph2/targets.txt rename to tests/test2/targets.txt diff --git a/tests/graph3/edges.txt b/tests/test3/edges.txt similarity index 100% rename from tests/graph3/edges.txt rename to tests/test3/edges.txt diff --git a/tests/graph3/sources.txt b/tests/test3/sources.txt similarity index 100% rename from tests/graph3/sources.txt rename to tests/test3/sources.txt diff --git a/tests/graph3/targets.txt b/tests/test3/targets.txt similarity index 100% rename from tests/graph3/targets.txt rename to tests/test3/targets.txt diff --git a/tests/graph4/edges.txt b/tests/test4/edges.txt similarity index 100% rename from tests/graph4/edges.txt rename to tests/test4/edges.txt diff --git a/tests/graph4/sources.txt b/tests/test4/sources.txt similarity index 100% rename from tests/graph4/sources.txt rename to tests/test4/sources.txt diff --git a/tests/graph4/targets.txt b/tests/test4/targets.txt similarity index 100% rename from tests/graph4/targets.txt rename to tests/test4/targets.txt diff --git a/tests/graph5/edges.txt b/tests/test5/edges.txt similarity index 100% rename from tests/graph5/edges.txt rename to tests/test5/edges.txt diff --git a/tests/graph5/sources.txt b/tests/test5/sources.txt similarity index 100% rename from tests/graph5/sources.txt rename to tests/test5/sources.txt diff --git a/tests/graph5/targets.txt b/tests/test5/targets.txt similarity index 100% rename from tests/graph5/targets.txt rename to tests/test5/targets.txt diff --git a/tests/graph6/edges.txt b/tests/test6/edges.txt similarity index 100% rename from tests/graph6/edges.txt rename to tests/test6/edges.txt diff --git a/tests/graph6/sources.txt b/tests/test6/sources.txt similarity index 100% rename from tests/graph6/sources.txt rename to tests/test6/sources.txt diff --git a/tests/graph6/targets.txt b/tests/test6/targets.txt similarity index 100% rename from tests/graph6/targets.txt rename to tests/test6/targets.txt diff --git a/tests/test7/edges.txt b/tests/test7/edges.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/graph7/sources.txt b/tests/test7/sources.txt similarity index 100% rename from tests/graph7/sources.txt rename to tests/test7/sources.txt diff --git a/tests/graph7/targets.txt b/tests/test7/targets.txt similarity index 100% rename from tests/graph7/targets.txt rename to tests/test7/targets.txt From dab0a3e80edcb901929324965e8cb46589159ae6 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 23 Jan 2024 14:11:37 -0600 Subject: [PATCH 12/18] cleaned up code and added correct_outputs.txt for test graphs --- graphs/correct_outputs.txt | 53 ++++++++++++++++++++++++++++++++++++++ graphs/graph12/edges.txt | 4 +++ graphs/graph12/sources.txt | 1 + graphs/graph12/targets.txt | 1 + graphs/graph13/edges.txt | 4 +++ graphs/graph13/sources.txt | 1 + graphs/graph13/targets.txt | 1 + test_minCostFlow.py | 2 +- tests/correct_outputs.txt | 31 ++++++++++++++++++++++ 9 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 graphs/graph12/edges.txt create mode 100644 graphs/graph12/sources.txt create mode 100644 graphs/graph12/targets.txt create mode 100644 graphs/graph13/edges.txt create mode 100644 graphs/graph13/sources.txt create mode 100644 graphs/graph13/targets.txt diff --git a/graphs/correct_outputs.txt b/graphs/correct_outputs.txt index e69de29..7bd3a5b 100644 --- a/graphs/correct_outputs.txt +++ b/graphs/correct_outputs.txt @@ -0,0 +1,53 @@ +The graphs series of graphs are used to check the code's correctness. Except for internal tiebreaking by the solver, each result is deterministic. + +graph1: +A B D +B D D + +graph2: +A B D +B D D + +graph3: deals with tiebreaking +A C D +C D D + +graph4: +A B D +B D D + +graph5: +A B U +B D U + +graph6: +A B U +B D U + +graph7: deals with tiebreaking +A B U +B D U + +graph8: +A B U +B D U + +graph9: +A B D +B D U + +graph10: +A B D +B D D + +graph11: +A B U +B D U + +graph12: +A B D +B D D + +graph13: +A B U +B D U diff --git a/graphs/graph12/edges.txt b/graphs/graph12/edges.txt new file mode 100644 index 0000000..828be53 --- /dev/null +++ b/graphs/graph12/edges.txt @@ -0,0 +1,4 @@ +A B 0.9 D +A C 0.1 D +B D 0.8 D +C D 0.2 D diff --git a/graphs/graph12/sources.txt b/graphs/graph12/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/graphs/graph12/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/graphs/graph12/targets.txt b/graphs/graph12/targets.txt new file mode 100644 index 0000000..02358d2 --- /dev/null +++ b/graphs/graph12/targets.txt @@ -0,0 +1 @@ +D \ No newline at end of file diff --git a/graphs/graph13/edges.txt b/graphs/graph13/edges.txt new file mode 100644 index 0000000..5b5ce23 --- /dev/null +++ b/graphs/graph13/edges.txt @@ -0,0 +1,4 @@ +A B 0.9 U +A C 0.1 U +B D 0.8 U +C D 0.2 U diff --git a/graphs/graph13/sources.txt b/graphs/graph13/sources.txt new file mode 100644 index 0000000..8c7e5a6 --- /dev/null +++ b/graphs/graph13/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/graphs/graph13/targets.txt b/graphs/graph13/targets.txt new file mode 100644 index 0000000..02358d2 --- /dev/null +++ b/graphs/graph13/targets.txt @@ -0,0 +1 @@ +D \ No newline at end of file diff --git a/test_minCostFlow.py b/test_minCostFlow.py index 1482d59..77b1382 100644 --- a/test_minCostFlow.py +++ b/test_minCostFlow.py @@ -22,7 +22,7 @@ print("\ntesting code correctness") -for i in range (1,12): +for i in range (1,14): print("graph: ",i) args = [ "--edges_file", f"graphs/graph{i}/edges.txt", diff --git a/tests/correct_outputs.txt b/tests/correct_outputs.txt index e69de29..6ac896b 100644 --- a/tests/correct_outputs.txt +++ b/tests/correct_outputs.txt @@ -0,0 +1,31 @@ +The tests series of graphs are used to verify whether the code is executing appropriately depending on distinct edge cases. + +test 1: check if unique directed edges are added to directed_dict +Output: +A B D +B C D + +test2: check if higher edge weight is selected for the same directed edge from the input +Output: +A B D +B D D + +test3: If a directed edge is present in the input and an undirected edge from that edge already exists, the directed edge is prioritized and added to directed_dict and the undirected edge is deleted from undirected_dict. +A C D +C D D + +test4: check if unique undirected edges are added to undirected_dict +A B D +B D D + +test5: check that an undirected edge is not added if a directed edge of that edge already exists +A B U +B D U + +test6: check if higher edge weight is selected for the same undirected edge from the input +A B U +B D U + +test7: check that code still runs and outputs an error message with an empty edges.txt +A B U +B D U From 5d1f66e9fd61116766f4c9f75eb7fec7585056ba Mon Sep 17 00:00:00 2001 From: ntalluri Date: Mon, 29 Jan 2024 13:31:54 -0600 Subject: [PATCH 13/18] updated mcf code --- README.md | 17 +++++++++++++++++ minCostFlow.py | 8 ++++---- test_minCostFlow.py | 4 ++-- tests/correct_outputs.txt | 24 ++++++++++++++---------- tests/test2/edges.txt | 4 ++-- tests/test5/edges.txt | 1 - tests/test6/edges.txt | 4 ++-- 7 files changed, 41 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 049a4d8..27dd3a1 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,18 @@ More details of the algorithm can be found in: Chris S Magnano, Anthony Gitter. *npj Systems Biology and Applications*, 7:12, 2021. +## Edge Handling +The code is designed to process both undirected and directed edges, prioritizing directed edges in scenarios where an equivalent undirected edge exists and selecting higher edge weights in the case of duplicate edges. + +## Input Format Example +The input should be formatted as follows, with columns for node1, node2, rank, and direction: +``` +A B 0.9 U +B A 0.1 D +... +``` +In this format, "U" represents an undirected edge, and "D" represents a directed edge. + ## Dependencies Google's [OR-Tools library](https://developers.google.com/optimization/flow/mincostflow) is required to run this script. @@ -32,3 +44,8 @@ Python 3 is required to run this script > --output Prefix for all output files. > > --capacity The amount of flow which can pass through a single edge. + +## Testing +`python test_minCostFlow.py` + +The code executes two sets of graph series, namely the 'graph series' and the 'test series' The graphs series of graphs are used to check the code's correctness. Except for internal tiebreaking by the solver, each result is deterministic. The tests series of graphs are used to verify whether the code is executing appropriately depending on distinct edge cases. The expected results for both series can be found in graphs/correct_outputs.txt for the graph series and tests/correct_outputs.txt for the test series. \ No newline at end of file diff --git a/minCostFlow.py b/minCostFlow.py index a1efb99..37875c7 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -53,7 +53,7 @@ def construct_digraph(edges_file, cap): if d == "D": if edge in directed_dict: - if w > directed_dict[edge]: + if w < directed_dict[edge]: directed_dict[edge] = w elif sorted_edge in undirected_dict: del undirected_dict[sorted_edge] @@ -65,13 +65,13 @@ def construct_digraph(edges_file, cap): if edge not in directed_dict and sorted_edge not in directed_dict and sorted_edge not in undirected_dict: undirected_dict[sorted_edge] = w elif sorted_edge in undirected_dict: - if w > undirected_dict[sorted_edge]: + if w < undirected_dict[sorted_edge]: undirected_dict[sorted_edge] = w else: raise ValueError (f"Cannot add edge: d = {d}") - # print("undirected_dict: ", undirected_dict) - # print("directed_dict: ", directed_dict) + print("undirected_dict: ", undirected_dict) + print("directed_dict: ", directed_dict) # go through and add the edges from directed_dict and undirected_dict to G for key, value in directed_dict.items(): G.add_arc_with_capacity_and_unit_cost(idDict[key[0]],idDict[key[1]], default_capacity, int(value)) diff --git a/test_minCostFlow.py b/test_minCostFlow.py index 77b1382..55d22d8 100644 --- a/test_minCostFlow.py +++ b/test_minCostFlow.py @@ -5,7 +5,7 @@ command = "python" script = "minCostFlow.py" -print("testing code functionality") +print("TEST SERIES") for i in range (1,8): print("test: ",i) @@ -21,7 +21,7 @@ subprocess.run(cmd) -print("\ntesting code correctness") +print("\nGRAPHS SERIES") for i in range (1,14): print("graph: ",i) args = [ diff --git a/tests/correct_outputs.txt b/tests/correct_outputs.txt index 6ac896b..fd5ea5f 100644 --- a/tests/correct_outputs.txt +++ b/tests/correct_outputs.txt @@ -8,24 +8,28 @@ B C D test2: check if higher edge weight is selected for the same directed edge from the input Output: A B D -B D D +B C D test3: If a directed edge is present in the input and an undirected edge from that edge already exists, the directed edge is prioritized and added to directed_dict and the undirected edge is deleted from undirected_dict. -A C D -C D D +Output: +B C D +A B D test4: check if unique undirected edges are added to undirected_dict -A B D -B D D +Output: +A B U +B C U test5: check that an undirected edge is not added if a directed edge of that edge already exists -A B U -B D U +Output: +A B D +B C D test6: check if higher edge weight is selected for the same undirected edge from the input +Output: A B U -B D U +B C U test7: check that code still runs and outputs an error message with an empty edges.txt -A B U -B D U +Output: +N/A diff --git a/tests/test2/edges.txt b/tests/test2/edges.txt index 8ff596c..de81734 100644 --- a/tests/test2/edges.txt +++ b/tests/test2/edges.txt @@ -1,4 +1,4 @@ A B 0.2 D B C 0.2 D -A B 0.1 D -B C 0.3 D \ No newline at end of file +A B 0.9 D +B C 0.1 D \ No newline at end of file diff --git a/tests/test5/edges.txt b/tests/test5/edges.txt index 9b62be9..1647c45 100644 --- a/tests/test5/edges.txt +++ b/tests/test5/edges.txt @@ -1,6 +1,5 @@ A B 0.1 D A B 0.1 U B A 0.1 U -B A 0.1 D B C 0.1 D B C 0.1 U \ No newline at end of file diff --git a/tests/test6/edges.txt b/tests/test6/edges.txt index 9812a6e..aad1cb9 100644 --- a/tests/test6/edges.txt +++ b/tests/test6/edges.txt @@ -1,4 +1,4 @@ A B 0.2 U B C 0.2 U -A B 0.1 U -B C 0.3 U \ No newline at end of file +A B 0.9 U +B C 0.1 U \ No newline at end of file From dcdae9438e4c00c435329e8c1bd8178e1f4c243e Mon Sep 17 00:00:00 2001 From: ntalluri Date: Mon, 29 Jan 2024 13:33:31 -0600 Subject: [PATCH 14/18] clean code --- minCostFlow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/minCostFlow.py b/minCostFlow.py index 37875c7..dc25700 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -70,8 +70,8 @@ def construct_digraph(edges_file, cap): else: raise ValueError (f"Cannot add edge: d = {d}") - print("undirected_dict: ", undirected_dict) - print("directed_dict: ", directed_dict) + # print("undirected_dict: ", undirected_dict) + # print("directed_dict: ", directed_dict) # go through and add the edges from directed_dict and undirected_dict to G for key, value in directed_dict.items(): G.add_arc_with_capacity_and_unit_cost(idDict[key[0]],idDict[key[1]], default_capacity, int(value)) From 65d37501558f5f53807cef0e21b6292eb8d240e0 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Wed, 18 Sep 2024 13:03:29 -0500 Subject: [PATCH 15/18] update .DS_Store checking and delete file --- .DS_Store | Bin 6148 -> 0 bytes .gitignore | 5 ++++- 2 files changed, 4 insertions(+), 1 deletion(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 1b1a152d13e4c564da9c380295416588d8a60e18..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKL2uJA6n^f?nrZ^&0Maf!$>S?ll7Buk63 z*ZUzFTg@BWEvMz|IPZfqSp{WS&Wb@eealy`q|BnrdKkS}m%#I@&FMUt=9$b-5ob<|UQkLWlu%9uB`Z%izcNqZtLogMF|ujo>7KcMO|q7d5!yRjoYSUIzo>_dI{{Jr?}1#ch?v$RxP1VG{cx&8>_o4F>)7U zUr<@+^WBR3D{G1FtXT!D0@qXl^G$SiuIc>kw5 Date: Wed, 18 Sep 2024 13:50:33 -0500 Subject: [PATCH 16/18] updates to minCostFlow.py code based on comments --- minCostFlow.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/minCostFlow.py b/minCostFlow.py index dc25700..0dde89f 100644 --- a/minCostFlow.py +++ b/minCostFlow.py @@ -37,6 +37,10 @@ def construct_digraph(edges_file, cap): with open(edges_file) as edges_f: for line in edges_f: tokens = line.strip().split() + + if len(tokens) != 4 : + raise ValueError (f"Each row in the edges file {edges_file} should contain 4 values to define an edge. Currently a row has {len(tokens)} values.") + node1 = tokens[0] if not node1 in idDict: idDict[node1] = curID @@ -49,29 +53,31 @@ def construct_digraph(edges_file, cap): w = int((1-(float(tokens[2])))*100) # lower the weight from token[2], higher the cost d = tokens[3] edge = (node1, node2) - sorted_edge = tuple(sorted(edge)) + sorted_edge = tuple(sorted(edge, reverse=False)) # all undirected edges are sorted edges + sorted_edge_reverse = tuple(sorted(edge, reverse=True)) if d == "D": if edge in directed_dict: - if w < directed_dict[edge]: + if w < directed_dict[edge]: # if weight is lower than the current edge, replace with newer edge weight directed_dict[edge] = w - elif sorted_edge in undirected_dict: + elif sorted_edge in undirected_dict: # priorize directed edges over undirected edges del undirected_dict[sorted_edge] directed_dict[edge] = w else: # edge not in directed_dict directed_dict[edge] = w elif d == "U": - if edge not in directed_dict and sorted_edge not in directed_dict and sorted_edge not in undirected_dict: + # add new edge to undirected dict; check for edge existing in directed_edges or undirected_dict + # if edge == sorted_edge, there is a chance reverse of edge (sorted_edge_reverse) is still in the directed_dict + if edge not in directed_dict and sorted_edge not in directed_dict and sorted_edge_reverse not in directed_dict and sorted_edge not in undirected_dict: undirected_dict[sorted_edge] = w elif sorted_edge in undirected_dict: - if w < undirected_dict[sorted_edge]: + if w < undirected_dict[sorted_edge]: # if weight is lower than the current edge, replace with newer edge weight undirected_dict[sorted_edge] = w else: raise ValueError (f"Cannot add edge: d = {d}") - # print("undirected_dict: ", undirected_dict) - # print("directed_dict: ", directed_dict) + # go through and add the edges from directed_dict and undirected_dict to G for key, value in directed_dict.items(): G.add_arc_with_capacity_and_unit_cost(idDict[key[0]],idDict[key[1]], default_capacity, int(value)) @@ -137,9 +143,9 @@ def write_output_to_sif(G,out_file_name,idDict): sorted_edge = tuple(sorted(edge)) if edge in directed_dict: - out_file.write(node1+"\t"+node2+"\t"+"D"+"\n") + out_file.write(edge[0]+"\t"+edge[1]+"\t"+"D"+"\n") elif sorted_edge in undirected_dict: - out_file.write(node1+"\t"+node2+"\t"+"U"+"\n") + out_file.write(sorted_edge[0]+"\t"+sorted_edge[1]+"\t"+"U"+"\n") else: raise KeyError(f"edge {edge} is not in the dicts") From 461da96337de23fd26c25daf53a9bfdaed1f4913 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Wed, 18 Sep 2024 13:51:50 -0500 Subject: [PATCH 17/18] update comments in test_minCostFlow.py --- test_minCostFlow.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test_minCostFlow.py b/test_minCostFlow.py index 55d22d8..5833cf2 100644 --- a/test_minCostFlow.py +++ b/test_minCostFlow.py @@ -1,7 +1,5 @@ import subprocess -# uncomment line 73 and 74 to check over dictionaries - command = "python" script = "minCostFlow.py" From d902a3c9635bff965911343d2c7308f74c40cf18 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Wed, 18 Sep 2024 14:00:15 -0500 Subject: [PATCH 18/18] each case is now deterministic --- graphs/correct_outputs.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/graphs/correct_outputs.txt b/graphs/correct_outputs.txt index 7bd3a5b..7a36547 100644 --- a/graphs/correct_outputs.txt +++ b/graphs/correct_outputs.txt @@ -1,4 +1,4 @@ -The graphs series of graphs are used to check the code's correctness. Except for internal tiebreaking by the solver, each result is deterministic. +The graphs series of graphs are used to check the code's correctness. Each result is deterministic. graph1: A B D @@ -8,7 +8,7 @@ graph2: A B D B D D -graph3: deals with tiebreaking +graph3: A C D C D D @@ -24,7 +24,7 @@ graph6: A B U B D U -graph7: deals with tiebreaking +graph7: A B U B D U