-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCost_Update.py
344 lines (307 loc) · 11.1 KB
/
Cost_Update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
#!/usr/bin/env python
import Header
from Header import *
import UtilFunc
from UtilFunc import *
#----------------------------------------------------------------
"""
this function fills the support score values for individual cluster pairs
also accumulates the frequency values for individual relations (except the relation R3)
"""
def Form_Support_Score_Queue_Cluster_Pair(outfile):
no_of_clusters = len(CURRENT_CLUST_IDX_LIST)
for i in range(no_of_clusters - 1):
clust1 = CURRENT_CLUST_IDX_LIST[i]
taxa_list1 = Cluster_Info_Dict[clust1]._GetSpeciesList()
for j in range(i+1, no_of_clusters):
clust2 = CURRENT_CLUST_IDX_LIST[j]
taxa_list2 = Cluster_Info_Dict[clust2]._GetSpeciesList()
"""
compute frequency of individual relations for this cluster pair
"""
R1_freq, R2_freq, R3_freq, R4_freq = GetFreqScore_ClusterPair(taxa_list1, taxa_list2)
"""
priority of individual relations
"""
R1_priority = R1_freq + R3_freq - R2_freq - R4_freq
R2_priority = R2_freq + R3_freq - R1_freq - R4_freq
R4_priority = R4_freq - R1_freq - R2_freq
"""
support score of individual relations
"""
R1_score = R1_freq + R1_priority
R2_score = R2_freq + R2_priority
R4_score = R4_freq + R4_priority
if (DEBUG_LEVEL > 2):
fp = open(outfile, 'a')
fp.write('\n **** Examine cluster pair ' + str(clust1) \
+ ' and ' + str(clust2) + ' R1_freq: ' + str(R1_freq) + \
' R2_freq: ' + str(R2_freq) + ' R3_freq: ' \
+ str(R3_freq) + ' R4_freq: ' + str(R4_freq))
fp.close()
"""
initialize this cluster pair only if they are related by one of the
four relations r1 to r4 in at least one input tree
"""
support_tree_count = R1_freq + R2_freq + R3_freq + R4_freq
if (support_tree_count > 0):
"""
frequency of the consensus relation
"""
max_freq = max(R1_freq, R2_freq, R3_freq, R4_freq)
"""
add R4 relation if it has positive frequency
"""
if (R4_freq > 0):
subl = [clust1, clust2, RELATION_R4, R4_freq, R4_score]
Queue_Score_Cluster_Pair.append(subl)
if (R1_freq > 0):
if ((R1_freq >= PERCENT_MAX_FREQ1 * max_freq) or ((R1_freq + R3_freq) >= (PERCENT_MAX_FREQ2 * max_freq))):
if (R1_freq >= R2_freq):
subl = [clust1, clust2, RELATION_R1, R1_freq, R1_score]
Queue_Score_Cluster_Pair.append(subl)
if (R2_freq > 0):
if ((R2_freq >= PERCENT_MAX_FREQ1 * max_freq) or ((R2_freq + R3_freq) >= (PERCENT_MAX_FREQ2 * max_freq))):
if (R2_freq >= R1_freq):
subl = [clust1, clust2, RELATION_R2, R2_freq, R2_score]
Queue_Score_Cluster_Pair.append(subl)
return
#----------------------------------------------------------------
"""
here we fill the support score queues with the couplet relations and the support score values
from which the DAG will be constructed
"""
def Fill_Support_Score_Queues_Couplet_Based():
"""
here, we process all the couplets
individual couplets lead to individual cluster pairs
for individual relations between a couplet, corresponding edge between the cluster pair is established
"""
for l in TaxaPair_Reln_Dict:
# add - sourya
"""
here we clear the list of taxa indices underlying the LCA nodes for this couplet
for all the input trees
"""
TaxaPair_Reln_Dict[l]._ResetLCAUndList()
# end add - sourya
r3_freq = TaxaPair_Reln_Dict[l]._GetEdgeWeight(RELATION_R3)
r4_freq = TaxaPair_Reln_Dict[l]._GetEdgeWeight(RELATION_R4)
"""
single_edge_exist: if TRUE, means that only one type of relation
is supported (with respect to input trees) between this couplet
detection of it during setting the priority values of different relations
basically, we are looking for the consensus relation
"""
single_edge_exist_list = TaxaPair_Reln_Dict[l]._CheckNonConflictingCouplet()
single_edge_exist = single_edge_exist_list[0]
consensus_reln_type = single_edge_exist_list[1]
"""
maximum frequency among all 4 relations
"""
max_freq = TaxaPair_Reln_Dict[l]._GetConsensusFreq()
"""
for other types of couplets, not included in above criterion
"""
for reln_type in [RELATION_R4, RELATION_R1, RELATION_R2, RELATION_R3]:
curr_reln_freq = TaxaPair_Reln_Dict[l]._GetEdgeWeight(reln_type)
if (curr_reln_freq > 0):
if (reln_type == RELATION_R4):
"""
non zero frequency of R4 relation is considered
"""
TaxaPair_Reln_Dict[l]._AddAllowedReln(reln_type)
elif (reln_type == RELATION_R1) or (reln_type == RELATION_R2):
if (r4_freq == 0) or (curr_reln_freq >= (PERCENT_MAX_FREQ2 * max_freq)):
"""
either zero frequency of R4 relation, or sufficient frequency of R1/R2 relations are considered
for their inclusion in the
"""
TaxaPair_Reln_Dict[l]._AddAllowedReln(reln_type)
else: #if (reln_type == RELATION_R3):
if (len(TaxaPair_Reln_Dict[l]._GetAllowedRelnList()) <= 1) \
and (TaxaPair_Reln_Dict[l]._CheckTargetRelnConsensus(reln_type) == True):
"""
first add relation R3 in the allowed relation list
"""
TaxaPair_Reln_Dict[l]._AddAllowedReln(reln_type)
"""
differentiate between two cases
1) When R3 is the sole relation between this couplet
2) When R3 is the majority consensus relation between this couplet
"""
if (consensus_reln_type == RELATION_R3) and (single_edge_exist == 1):
"""
we also add R1, R2, R4 relations as the allowed relations
"""
TaxaPair_Reln_Dict[l]._AddAllowedReln(RELATION_R1)
TaxaPair_Reln_Dict[l]._AddAllowedReln(RELATION_R2)
TaxaPair_Reln_Dict[l]._AddAllowedReln(RELATION_R4)
"""
add the support score relation in the queue
"""
reln_freq = TaxaPair_Reln_Dict[l]._GetEdgeWeight(consensus_reln_type)
sublist = [l[0], l[1], consensus_reln_type, reln_freq, reln_freq]
Queue_Score_R3_SingleReln.append(sublist)
elif (TaxaPair_Reln_Dict[l]._CheckTargetRelnMajorityConsensus(RELATION_R3) == True) and (r3_freq > 1):
"""
we impose one condition that the no of trees bearing this R3 relation should be > 1
add the support score relation in the queue
"""
sublist = [l[0], l[1], RELATION_R3, r3_freq, r3_freq]
Queue_Score_R3_MajCons.append(sublist)
return
#------------------------------------------------------
""" this code section implements the max priority queue """
#------------------------------------------------------
# parent node of current node
def Parent(idx):
return int((idx-1) / 2)
# left child node of current node
def Left(idx):
return (2*idx+1)
# right child node of current node
def Right(idx):
return (2*idx+2)
#------------------------------------------------------------------------
# version 1X - 15th Sep, 2015
#-------------------------
"""
this function is used for sorting the support score queue (priority queue structure)
Parameters:
1) inp_queue: Input priority queue
can be either Nonconflicting support score queue or the conflicting support score queue
2) i, j denotes indices of two different elements within the priority queue
these two elements are to be compared
output:
returns index with the higher support score measure
"""
def Higher_Score_Value(inp_queue, i, j):
key1 = (inp_queue[i][0], inp_queue[i][1])
reln1 = inp_queue[i][2]
freq1 = inp_queue[i][3]
score1 = inp_queue[i][4]
key2 = (inp_queue[j][0], inp_queue[j][1])
reln2 = inp_queue[j][2]
freq2 = inp_queue[j][3]
score2 = inp_queue[j][4]
"""
function when non-conflicting support score queue is used
"""
# case A - if both scores are strictly positive
if (score1 > 0) or (score2 > 0):
if (score1 < score2):
return j
elif (score1 > score2):
return i
else: #if (score1 == score2):
if (freq1 < freq2):
return j
elif (freq2 < freq1):
return i
#elif ((reln1 == RELATION_R1) or (reln1 == RELATION_R2)) and ((reln2 != RELATION_R1) and (reln2 != RELATION_R2)):
#return i
#elif ((reln2 == RELATION_R1) or (reln2 == RELATION_R2)) and ((reln1 != RELATION_R1) and (reln1 != RELATION_R2)):
#return j
elif (reln1 == RELATION_R4) and (reln2 != RELATION_R4):
return i
elif (reln1 != RELATION_R4) and (reln2 == RELATION_R4):
return j
else:
"""
here one or both scores are either zero or negative
"""
if (freq1 < freq2):
return j
elif (freq2 < freq1):
return i
#elif ((reln1 == RELATION_R1) or (reln1 == RELATION_R2)) and ((reln2 != RELATION_R1) and (reln2 != RELATION_R2)):
#return i
#elif ((reln2 == RELATION_R1) or (reln2 == RELATION_R2)) and ((reln1 != RELATION_R1) and (reln1 != RELATION_R2)):
#return j
elif (reln1 == RELATION_R4) and (reln2 != RELATION_R4):
return i
elif (reln1 != RELATION_R4) and (reln2 == RELATION_R4):
return j
# default return condition
return i
#------------------------------------------------------------------------
"""
this function exchanges two elements in the heap
"""
def Exchange_Elem(inp_queue, i, j):
"""
here r corresponds to different fields (of the sublists)
of the entries in the support score queue
as individual queue entry consists of 5 different fields
the copy operation should take care of the 5 fields
"""
total_elem = 5
# swap individual fields
for r in range(total_elem): #(4):
temp_key = inp_queue[i][r]
inp_queue[i][r] = inp_queue[j][r]
inp_queue[j][r] = temp_key
return
#-----------------------------------------------
"""
maintain max heap property
note: heap_size may not be the actual length of the queue
but the working length (on which the remaining sorting operation will take place)
"""
def Max_Heapify(inp_queue, idx, heap_size):
l = Left(idx)
r = Right(idx)
if (l < heap_size) and (Higher_Score_Value(inp_queue, idx, l) == l):
largest_idx = l
else:
largest_idx = idx
if (r < heap_size) and (Higher_Score_Value(inp_queue, largest_idx, r) == r):
largest_idx = r
if (largest_idx != idx):
Exchange_Elem(inp_queue, idx, largest_idx)
Max_Heapify(inp_queue, largest_idx, heap_size)
#-----------------------------------------------
"""
extract the current maximum and also pop the element from the heap
"""
def Heap_Extract_Max(inp_queue):
if (len(inp_queue) < 1):
print 'underflow of max priority queue'
"""
1st element is the maximum
"""
max_elem = list(inp_queue[0])
"""
replace the first element with the last element of the queue
"""
Exchange_Elem(inp_queue, 0, len(inp_queue) - 1)
"""
delete the last element of the queue
"""
del inp_queue[len(inp_queue) - 1]
heap_size = len(inp_queue)
"""
call the max_heapify function to maintain the heap property
0 is the starting index of the list storing the heap structure
"""
Max_Heapify(inp_queue, 0, heap_size)
return max_elem
#-----------------------------------------------
"""
this function builds the priority queue (max heap property)
"""
def Build_Max_Heap(inp_queue):
heap_size = len(inp_queue)
for idx in range(int(len(inp_queue) / 2), -1, -1):
Max_Heapify(inp_queue, idx, heap_size)
#-----------------------------------------------
"""
this is the heap sort algorithm
parameters:
1) inp_queue: Input priority queue
can be either Nonconflicting support score queue or the conflicting support score queue
"""
def Sort_Priority_Queue(inp_queue):
Build_Max_Heap(inp_queue)
heap_size = len(inp_queue)