-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
473 lines (360 loc) · 20.2 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
import copy
import pm4py
from pm4py.algo.discovery.inductive import algorithm as inductive_miner
from pm4py.objects.petri_net.importer import importer as pnml_importer
import re
import Subdue.src.Parameters as Parameters
import Subdue.src.Subdue as Subdue
import Subdue.src.Graph as Graph
import argparse
# implement to avoid mining with prom
def mine_petri_net(log, miner):
net, initial_marking, final_marking = miner.apply(log)
return net, initial_marking, final_marking
def import_xes(file_path):
return pm4py.read_xes(file_path)
def recursive_loop_removal(graph, vertex, path=[], back_tracking_path=[], coloring={}):
# based on dfs, removes loops (backwards edge in back_tracking_path) and updates adjacency list of node.
# Coloring used to be sure that all nodes have been worked on => node in coloring == white, 1==grey, 2==black
path.append(str(vertex[0]))
coloring[str(vertex[0])] = "1" # grey
back_tracking_path.append(str(vertex[0]))
for neighbour in list(set(graph[str(vertex[0])])):
if neighbour in back_tracking_path or (type(neighbour) is not str and (neighbour.label) in back_tracking_path):
# remove vertex from adjacency list of predecessor
old_neighbours = graph[str(vertex[0])]
del old_neighbours[old_neighbours.index(neighbour)]
new_neighbours = old_neighbours
graph[str(vertex[0])] = new_neighbours
print("$*+***$$***+*$ Removed egde from dag", f'{vertex[0]}->{neighbour.label}')
# this is only because manually inserted first node is of type string and has no label
label = neighbour.label if type(neighbour) is not str else neighbour
if label not in coloring:
neighbour_vertex = [label, graph[str(neighbour)]]
path = recursive_loop_removal(graph, neighbour_vertex, path, back_tracking_path, coloring)[0]
back_tracking_path.remove(str(vertex[0]))
coloring[str(vertex[0])] = "2" # black
return path, graph
def make_graph_to_dag(graph, first_node):
dag = graph.copy()
# vertex represented as (predecessor, neighbour_list)
first_vertex = (first_node, (graph[str(first_node)]))
dag = recursive_loop_removal(dag, first_vertex)[1]
return dag
def find_adjacent_transitions_for_single_transition(transition):
list_of_adjacent_transitions = []
list_of_adjacent_transitions = recursively_scrape_neighbours_of_transition(list_of_adjacent_transitions, transition)
if list_of_adjacent_transitions is None:
return []
# removes duplicates
return list(set(list_of_adjacent_transitions))
def recursively_scrape_neighbours_of_transition(list_of_adjacent_transitions, transition):
# for a transition, find its adjacent named transitions. For each adjacent transition, check if it is named. If so,
# append it to adjacency list, else dig deeper in the nameless transitions neighbours.
current_neighbours = []
outgoing_arcs_of_transition = transition.out_arcs
# for each outgoing arc of adjacent places of transition
for arc in outgoing_arcs_of_transition:
targeted_place = arc.target
outgoing_arcs_from_place = targeted_place.out_arcs
if len(outgoing_arcs_from_place) == 0:
return
# for each adjacent transition of place
for outgoing_arc in outgoing_arcs_from_place:
two_step_transition = outgoing_arc.target
if two_step_transition.label is not None and is_transition_nameless(two_step_transition.label) is False:
current_neighbours.append(two_step_transition)
else:
further_down_neighbours = recursively_scrape_neighbours_of_transition(list_of_adjacent_transitions,
two_step_transition)
if further_down_neighbours is not None:
for neighbour in further_down_neighbours:
current_neighbours.append(neighbour)
return current_neighbours
def construct_graph(net):
# for each transition in petri net, make each named transition adjacent that can be reached by only traversing
# nameless transitions
adjacency_list_of_transitions = {}
for transition in net.transitions:
if transition.label is None:
transition.label = str(transition)
if transition.label is not None and is_transition_nameless(transition.label) is False:
adjacent_transitions = find_adjacent_transitions_for_single_transition(transition)
adjacency_list_of_transitions[transition.label] = adjacent_transitions
return adjacency_list_of_transitions
def is_transition_nameless(label):
# nameless transitions are not in the event log, also called silent transitions
x = re.search("n\d+", label)
if x is None:
return False
return True
def build_instance_graphs(adjacency_list_of_transitions, log, pattern_belongings_of_node={}):
# build the instance graphs by iterating over each trace and in this each activity of a trace. For each activity
# the neighbours that are in the trace log are added to the instance graph, therefore taking the subgraph of only the
# trace activities and the edges between them
all_instance_graphs = []
for trace in log:
instance_graph = {}
trace_event_names = []
for event in trace:
# TODO check if concept:name ist standard way for names in xes"
trace_event_names.append(event["concept:name"])
trace_event_names = list(set(trace_event_names))
# add pattern to trace if pattern activity is in trace
for trace_event_name in trace_event_names:
if trace_event_name in pattern_belongings_of_node \
and f'Pattern-{pattern_belongings_of_node[trace_event_name]}' not in trace_event_names:
trace_event_names.append(f'Pattern-{pattern_belongings_of_node[trace_event_name]}')
for event_name in trace_event_names:
if event_name in adjacency_list_of_transitions:
neighbours = adjacency_list_of_transitions[event_name]
copy_of_neighbours = neighbours.copy()
# remove neighbour if not related to subgraph or included in pattern
for neighbour in neighbours:
if ((neighbour.label if type(neighbour) != str else neighbour) not in trace_event_names) \
or ((neighbour.label if type(neighbour) != str else neighbour)
in pattern_belongings_of_node):
copy_of_neighbours.remove(neighbour)
# filter self loops
if event_name in copy_of_neighbours:
copy_of_neighbours.remove(event_name)
instance_graph[event_name] = copy_of_neighbours
all_instance_graphs.append(instance_graph)
return all_instance_graphs
def parse_data(instance_graphs):
# target format: list of instance graphs
# graph = (list of vertex, list of edges)
# vertex = {vertex: {id, attributes: {label}, timestamp}
# edges = [{edge: {id, source, target, directed:"false", attributes: { label}, timestamp}]
node_id_dictionary = {}
final_graph = []
timestamp = 1
vertex_id = 1
edge_id = 1
for instance_graph in instance_graphs:
upcoming_node_id = vertex_id
for node in instance_graph:
if node not in node_id_dictionary.keys():
node_id_dictionary[node] = upcoming_node_id
upcoming_node_id += 1
for node in instance_graph:
vertex = {"vertex": {
"id": str(vertex_id),
"attributes": {"label": node},
"timestamp": str(timestamp)
}}
final_graph.append(vertex)
vertex_id += 1
for node in instance_graph:
adjacency_list_for_node = instance_graph[node]
for neighbour in adjacency_list_for_node:
edge = {"edge": {
"id": str(edge_id),
"source": str(node_id_dictionary[node]),
"target": str(node_id_dictionary[(neighbour.label if type(neighbour) != str else neighbour)]),
"attributes": {"label": f'{node}->{neighbour}'},
"directed": "false",
"timestamp": str(timestamp)
}}
final_graph.append(edge)
edge_id += 1
node_id_dictionary = {}
timestamp += 1
return final_graph
def subdue(json_file, args):
# is not an actual json, just a list containing dicts.
# Set up graph
graph = Graph.Graph()
graph.load_from_json(json_file)
# set up parameters
# for flags see Subdue.src.Parameters
parameters = Parameters.Parameters()
parameters.writeCompressed = args.writecompressed
parameters.writePattern = args.writepattern
parameters.outputFileName = args.outputfile
parameters.set_defaults_for_graph(graph)
parameters.numBest = 1
parameters.limit = 1000 # hard coded, should be v^2
parameters.maxSize = 1000 # hard coded should be V^2
# finally, execute subdue
return Subdue.Subdue(parameters, graph)
def recursive_transition_traversal(place, found_neighbours=[], path=[]):
# checks for each place if adjacent transitions are "nameless" (=not in event log). If not, add to list of found
# neighbours of original place. Else go deeper and check adjacent places of place (skip over transition)
path.append(place.name)
outgoing_arcs = place.out_arcs
for outgoing_arc in outgoing_arcs:
# regex checks if transition is "nameless"
x = re.search("n\d+", outgoing_arc.target.label)
if x is not None and outgoing_arc.target.label not in path:
outgoing_arc_of_transition = outgoing_arc.target.out_arcs
for outgoing_arc_of_transition in outgoing_arc_of_transition:
found_neighbours = recursive_transition_traversal(outgoing_arc_of_transition.target, found_neighbours,
path)
else:
if outgoing_arc.target not in found_neighbours:
found_neighbours.append(outgoing_arc.target)
return found_neighbours
def get_start_node(net, graph):
# get first node in graph or build a new node that is adjazent to all nodes that can be reached from the first
# place in petri net
first_place = [place for place in list(net.places) if place.name == "n1"][0]
start_node_neighbours = recursive_transition_traversal(first_place)
if len(start_node_neighbours) > 1:
start_node = "manually_inserted_start_node"
graph[start_node] = start_node_neighbours
else:
start_node = start_node_neighbours[0]
return graph, start_node
def contract_discovered_nodes(current_pattern_graph, pattern_index, old_dag):
# contracts pattern to node and returns new DAG
contracted_node = f'Pattern-{pattern_index}'
adjacent_nodes_to_pattern = []
for node in current_pattern_graph:
node_neighbours = old_dag[node]
for neighbour in node_neighbours:
if (str(neighbour) not in current_pattern_graph.keys()):
adjacent_nodes_to_pattern.append(neighbour)
del old_dag[node]
for activity_key in old_dag:
activity_neighbours = old_dag[activity_key]
neighbour_labels = []
for neighbour in activity_neighbours:
neighbour_labels.append(neighbour.label if type(neighbour) != str else neighbour)
# remove self from adjacency lists and then add pattern node as new neighbour
if node in neighbour_labels:
activity_neighbours.remove([neighbour for neighbour in activity_neighbours if
type(neighbour) is not str and neighbour.label == node or type(
neighbour) is str and neighbour == node][0] if type(
neighbour) != str else neighbour)
if f'Pattern-{pattern_index}' not in activity_neighbours:
activity_neighbours.append(f'Pattern-{pattern_index}')
old_dag[activity_key] = activity_neighbours
adjacent_nodes_to_pattern = list(set(adjacent_nodes_to_pattern))
old_dag[contracted_node] = adjacent_nodes_to_pattern
return old_dag
def reconstruct_pattern_graph(pattern):
# reconstructs pattern adjacency list from the subdue graph object generated by subdue algorithm
pattern_graph = {}
edges = pattern.definition.edges
vertices = pattern.definition.vertices
for vertex_key in vertices:
pattern_graph[vertices[vertex_key].attributes["label"]] = []
for edge_key in edges:
edge_source = edges[edge_key].source.attributes["label"]
edge_target = edges[edge_key].target.attributes["label"]
pattern_graph[edge_source].append(edge_target)
return pattern_graph
def construct_pattern_node_edges(found_patterns, pattern_belongings_of_node, inverted_adjacency_list, dag):
# checks for every found pattern, which nodes in it can be reached from a earlier found patterns and stores node
# and its incoming edges for each pattern. Additionally it tracks which nodes in previously found patterns can be
# reached from the current node to save the outgoing edges to higher up patterns
possible_pattern_to_node_edges = {}
possible_node_to_pattern_edges = {}
for pattern in found_patterns:
# evade first pattern
if pattern == "Pattern-0":
continue
edges_from_pattern_node_to_higher_patterns = {}
edges_to_pattern_nodes_in_higher_patterns = {}
for node in found_patterns[pattern]:
is_node_not_a_pattern = True if re.search("Pattern-\d+", node) is None else False
if is_node_not_a_pattern:
# skip nodes that have no incoming edges from other nodes
inverted_adjacency_list_keys = list(inverted_adjacency_list.keys())
string_keys = []
for key in inverted_adjacency_list_keys:
string_keys.append(str(key))
if node not in string_keys:
continue
# get all the neighbours that have an edge towards node
possible_incoming_neighbours_in_higher_patterns = inverted_adjacency_list[node]
for possible_incoming_neighbour in possible_incoming_neighbours_in_higher_patterns:
# if node is in one of the patterns above the one you are currently in
if pattern_belongings_of_node[possible_incoming_neighbour] is not None and \
pattern_belongings_of_node[possible_incoming_neighbour] < pattern_belongings_of_node[node]:
if node in edges_from_pattern_node_to_higher_patterns.keys() and \
possible_incoming_neighbour not in edges_from_pattern_node_to_higher_patterns[node]:
edges_from_pattern_node_to_higher_patterns[node].append(possible_incoming_neighbour)
else:
edges_from_pattern_node_to_higher_patterns[node] = [possible_incoming_neighbour]
# get all the neighbours that have an edge towards node
possible_outgoing_neighbours_in_higher_patterns = dag[node]
for possible_outgoing_neighbour in possible_outgoing_neighbours_in_higher_patterns:
# if node is in one of the patterns above the one you are currently in
if pattern_belongings_of_node[str(possible_outgoing_neighbour)] is not None and \
pattern_belongings_of_node[str(possible_outgoing_neighbour)] < pattern_belongings_of_node[node]:
if node in edges_to_pattern_nodes_in_higher_patterns.keys() and \
str(possible_outgoing_neighbour) not in edges_to_pattern_nodes_in_higher_patterns[node]:
edges_to_pattern_nodes_in_higher_patterns[node].append(possible_outgoing_neighbour.label)
else:
edges_to_pattern_nodes_in_higher_patterns[node] = [possible_outgoing_neighbour.label]
possible_pattern_to_node_edges[pattern] = edges_from_pattern_node_to_higher_patterns
possible_node_to_pattern_edges[pattern] = edges_to_pattern_nodes_in_higher_patterns
return possible_pattern_to_node_edges, possible_node_to_pattern_edges
def main(args):
# import log
log = import_xes('testing/test_workout_log.xes')
# mine to petri net
# make sure the miner you want to use has been imported already
miner = inductive_miner
# net, initial_marking, final_marking = mine_petri_net(log, miner)
net, initial_marking, final_marking = pnml_importer.apply(
"testing/test_workout_log.pnml")
# construct graph from petri net - removes also transitions that are not on event log (=nameless)
activity_graph = construct_graph(net)
# activity_graph = filter_nameless_transitions(adjacency_list_of_transitions)
activity_graph, start_node = get_start_node(net, activity_graph)
# removes loops from graph
dag = make_graph_to_dag(activity_graph, start_node)
print(dag)
# prepares input format for subdue using log and dag structure
instance_graphs = build_instance_graphs(dag, log)
json_instance_graphs = parse_data(instance_graphs)
# Construct inverted adjacency list, so for node, who has an edge towards me
inverted_adjacency_list = {}
for node in dag:
for neighbour in dag[node]:
if neighbour.label in inverted_adjacency_list.keys() and node not in inverted_adjacency_list[neighbour.label]:
inverted_adjacency_list[neighbour.label].append(node)
else:
inverted_adjacency_list[neighbour.label] = [node]
# find patterns that each activity belongs to
subdue_can_still_find_patterns = True
found_patterns = {}
pattern_belongings_of_node = {}
dag_with_contractions = copy.deepcopy(dag)
while subdue_can_still_find_patterns:
subdue_output = subdue(json_instance_graphs, args)
if len(subdue_output) == 0:
subdue_can_still_find_patterns = False
break
current_pattern_graph = reconstruct_pattern_graph(subdue_output[0][0])
pattern_index = len(found_patterns)
found_patterns[f'Pattern-{pattern_index}'] = current_pattern_graph
for node in current_pattern_graph:
pattern_belongings_of_node[node] = pattern_index
dag_with_contractions = contract_discovered_nodes(current_pattern_graph, pattern_index, dag_with_contractions)
instance_graphs = build_instance_graphs(dag_with_contractions, log, pattern_belongings_of_node)
json_instance_graphs = parse_data(instance_graphs)
# add nodes that have not been added to pattern to belongings
for node in dag:
if node not in pattern_belongings_of_node.keys():
pattern_belongings_of_node[node] = None
# construct the pattern-node edges for virtual data objects
possible_pattern_to_node_edges, possible_node_to_pattern_edges = construct_pattern_node_edges(found_patterns, pattern_belongings_of_node, inverted_adjacency_list, dag)
print(found_patterns)
print(pattern_belongings_of_node)
print("Virtual Dataobjects to start a fragment", possible_pattern_to_node_edges)
print("Virtual Dataobjects to end a fragment", possible_node_to_pattern_edges)
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-wc", "--writecompressed", dest="writecompressed",
action="store_true", default=False, help="writes compressed graph at each iteration")
parser.add_argument("-w", "--writepattern", dest="writepattern",
action="store_true", default=False, help="writes best pattern at each iteration")
parser.add_argument("-of", "--outputfile", dest="outputfile",
type=str, default=" ", help="file where outputs will be stored, without .json ending")
args = parser.parse_args()
main(args)