-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathkMeanClusteringSklearn.py
103 lines (88 loc) · 3.66 KB
/
kMeanClusteringSklearn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# The MIT License (MIT)
#
# Copyright (C) 2019 - David Piché
###############################################################################
# This file is an example of a python script using machine learning on trace
# data. This is the first of a two-part python script. This script is meant to
# be run with the jython scripting engine, as it does an event request on the
# currently active trace. It works on a kernel trace.
#
# The second part of the script is expected to be in the same directory as this
# script
#
# @param arg1
# Number of clusters for the kmean algorithm
###############################################################################
# Modules
loadModule("/TraceCompass/Trace")
loadModule("/System/Resources")
loadModule("/System/Scripting")
import json
# Verify the arguments
if (len(argv) < 1):
print("Required arguments : <number of clusters for kmeans>")
exit()
# Is the first argument an integer
try:
int(argv[0])
except:
print("First argument must be an integer")
exit()
# Does the companion script exists
currentFile = getScriptEngine().getExecutedFile()
parentPath = currentFile.getParent().getFullPath()
filePath = "workspace://" + str(parentPath) + "/kMeanClusteringSklearn_py4j.py"
file = getFile(filePath);
if file is None:
print("Callee script not found: " + filePath)
exit()
# Get the active trace
trace = getActiveTrace()
if trace is None:
print("There is no active trace. Please open the trace to run this script on")
exit()
# Counts the number of distinct syscall names
def distinctSyscallCount(syscallName, syscallNameCount, syscallNameIndex):
if (len(syscallNameCount) == 0 or syscallNameCount.get(str(syscallName),"invalidSyscall") == "invalidSyscall" ):
syscallNameCount[str(syscallName)] = syscallNameIndex
syscallNameIndex += 1
return syscallNameCount, syscallNameIndex
# Extract the necessary data
def extractAspects():
mapInitialInfo = java.util.HashMap()
layout = trace.getKernelEventLayout()
iter = getEventIterator(trace)
event = None
syscallNameIndex = 0
durations = []
syscallNameList = []
syscallNameCount = {}
while iter.hasNext():
event = iter.next();
eventName = str(event.getName())
if (eventName.startswith(layout.eventSyscallEntryPrefix()) or eventName.startswith(layout.eventCompatSyscallEntryPrefix())):
tid = org.eclipse.tracecompass.analysis.os.linux.core.kernel.KernelTidAspect.INSTANCE.resolve(event)
startTime = event.getTimestamp().toNanos()
syscallName = eventName[len(layout.eventSyscallEntryPrefix()):]
syscallInfo = [startTime, syscallName]
mapInitialInfo.put(tid, syscallInfo)
elif (eventName.startswith(layout.eventSyscallExitPrefix())):
tid = org.eclipse.tracecompass.analysis.os.linux.core.kernel.KernelTidAspect.INSTANCE.resolve(event)
endTime = event.getTimestamp().toNanos()
syscallInfo = mapInitialInfo.remove(tid)
if not(syscallInfo is None):
durations.append(float(endTime - syscallInfo[0]))
syscallNameCount, index = distinctSyscallCount(syscallInfo[1], syscallNameCount,syscallNameIndex)
syscallNameIndex = index
syscallNameList.append(syscallInfo[1])
return syscallNameCount, syscallNameList, durations
# Calculate the data for this analysis
syscallNameCount, syscallNameList, durations = extractAspects()
# Set the variables as shared objects, dumping to json
setSharedObject("syscallNameCount", json.dumps(syscallNameCount), False, True)
setSharedObject("syscallNameList", json.dumps(syscallNameList), False, True)
setSharedObject("durations", json.dumps(durations), False, True)
# Run the companion script with the argument for kmean
result = fork(file, argv[0], "org.eclipse.ease.lang.python.py4j.engine")
result.waitForResult()
print(result)