-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprofiler.py
170 lines (149 loc) · 7.55 KB
/
profiler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from connector import Connector, LocalConnector, RemoteConnector
from event_group import EventGroup
import logging
from hperf_exception import ProfilerError
from concurrent.futures import ThreadPoolExecutor, as_completed
class Profiler:
"""
`Profiler` is responsible for collecting raw microarchitecture performance data.
It will collect raw performance data by other profilers (such as perf, sar, etc.) on SUTs through `Connector`.
"""
def __init__(self, connector: Connector, configs: dict, event_groups: EventGroup):
"""
Constructor of 'Profiler'
:param `connector`: an instance of `Connector` (`LocalConnector` or `RemoteConnector`)
:param `configs`: a dict of parsed configurations by `Parser`
:param `event_group`: an instance of 'EventGroup'
"""
self.logger = logging.getLogger("hperf")
self.connector: Connector = connector
self.configs: dict = configs
self.event_groups: EventGroup = event_groups
def profile(self):
"""
Generate and execute profiling script on SUT.
:raises:
`ConnectorError`: for `RemoteConnector`,
if fail to generate or execute script on remote SUT, or fail to pull raw performance data from remote SUT
`ProfilerError`: if the returned code of executing script does not equal to 0
"""
self.logger.info("get static information of SUT")
self.get_cpu_info()
self.get_cpu_topo()
perf_script = self.__get_perf_script()
self.logger.info("start profiling")
abnormal_flag = False
with ThreadPoolExecutor(max_workers=1) as executor:
perf_task = executor.submit(self.connector.run_script, perf_script, "perf.sh")
for future in as_completed([perf_task]):
ret_code = future.result()
if ret_code != 0:
abnormal_flag = True
if isinstance(self.connector, RemoteConnector):
self.connector.pull_remote()
if abnormal_flag:
raise ProfilerError("Executing profiling script on the SUT failed.")
self.logger.info("end profiling")
def sanity_check(self) -> bool:
"""
Check the environment on the SUT for profiling.
Since the collection of performance data requires exclusive usage of PMCs,
it is necessary to check if there is any other profiler (such as VTune, perf, etc.) is already running.
Specifically, for x86_64 platform, the NMI watchdog will occupy a generic PMC,
so that it will also be checked.
:return: if the SUT passes the sanity check, it will return `True`,
else it will return `False` and record the information through `Logger`.
:raises:
`ConnectorError`: for `RemoteConnector`, if fail to execute command for sanity check on remote SUT
"""
sanity_check_flag = True
# 1. check if there is any other profiler (such as VTune, perf, etc.) is already running
# TODO: add more pattern of profilers may interfere measurement
process_check_list = [
"linux-tools/.*/perf",
"/intel/oneapi/vtune/.*/emon"
] # process command pattern
for process in process_check_list:
process_check_cmd = f"ps -ef | awk '{{print $8}}' | grep {process}"
output = self.connector.run_command(process_check_cmd) # may raise `ConnectorError`
if output:
process_cmd = output
self.logger.warning(f"sanity check: process may interfere measurement exists. {process_cmd}")
sanity_check_flag = False
# 2. for x86_64 platform, check the NMI watchdog
if self.event_groups.isa == "x86_64":
nmi_watchdog_check_cmd = ["cat", "/proc/sys/kernel/nmi_watchdog"]
output = self.connector.run_command(nmi_watchdog_check_cmd) # may raise `ConnectorError`
if int(output) == 1:
self.logger.warning(f"sanity check: NMI watchdog is enabled.")
sanity_check_flag = False
return sanity_check_flag
def get_cpu_info(self):
"""
"""
if isinstance(self.connector, LocalConnector):
output_dir = self.connector.test_dir
elif isinstance(self.connector, RemoteConnector):
output_dir = self.connector.remote_test_dir
else:
raise ProfilerError("Fail to get test directory path on SUT when generating profiling script.")
self.connector.run_command("lscpu > " + f"{output_dir}/cpu_info")
def get_cpu_topo(self):
"""
"""
if isinstance(self.connector, LocalConnector):
output_dir = self.connector.test_dir
elif isinstance(self.connector, RemoteConnector):
output_dir = self.connector.remote_test_dir
else:
raise ProfilerError("Fail to get test directory path on SUT when generating profiling script.")
if self.event_groups.isa == "x86_64":
# output format:
# processor | socket | core id in socket
# 0 | 0 | 0
# 1 | 0 | 1
# 2 | 0 | 2
# ...
get_topo_cmd = r"awk -F: 'BEGIN{i=0;j=0;k=0}" \
r"/processor/{cpu[i]=$2;i++}" \
r"/physical id/{skt[j]=$2;j++}" \
r"/core id/{phy[k]=$2;k++}" \
r'''END{OFS="\t";for(key in cpu)print cpu[key],skt[key],phy[key]}' '''\
r"/proc/cpuinfo > " + f"{output_dir}/cpu_topo"
elif self.event_groups.isa == "aarch64":
# TODO: getting topo for arm is undone
# output format:
# processor | socket
# 0 | 0
# 1 | 0
# 2 | 0
# ...
get_topo_cmd = r"awk -F: 'BEGIN{i=0}" \
r"/processor/{cpu[i]=$2;i++}" \
r'''END{OFS="\t";for(key in cpu)print cpu[key],0}' '''\
r"/proc/cpuinfo > " + f"{output_dir}/cpu_topo"
else:
raise ProfilerError("Unsupported ISA.")
self.connector.run_command(get_topo_cmd)
def __get_perf_script(self) -> str:
"""
Based on the parsed configuration, generate the string of shell script for profiling by perf.
:return: a string of shell script for profiling
"""
# for local SUT, output raw performance data to the test directory directly will be fine.
# however, for remote SUT, raw performance data should be output to the remote temporary which can be accessd on remote SUT,
# then pull the data to the local test directory.
if isinstance(self.connector, LocalConnector):
perf_dir = self.connector.test_dir
elif isinstance(self.connector, RemoteConnector):
perf_dir = self.connector.remote_test_dir
else:
raise ProfilerError("Fail to get test directory path on SUT when generating profiling script.")
script = "#!/bin/bash\n"
script += f'TMP_DIR={perf_dir}\n'
script += 'perf_result="$TMP_DIR"/perf_result\n'
script += 'perf_error="$TMP_DIR"/perf_error\n'
script += 'date +%Y-%m-%d" "%H:%M:%S.%N | cut -b 1-23 > "$TMP_DIR"/perf_start_timestamp\n'
script += f'3>"$perf_result" perf stat -e {self.event_groups.get_event_groups_str()} -A -a -x "\t" -I 1000 --log-fd 3 {self.configs["command"]} 2>"$perf_error"\n'
self.logger.debug("profiling script by perf: \n" + script)
return script