-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmultimessage.py
191 lines (137 loc) · 6.45 KB
/
multimessage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#!/usr/bin/env python
#
# Copyright (c) 2013-2016, ETH Zurich.
# All rights reserved.
#
# This file is distributed under the terms in the attached LICENSE file.
# If you do not find this file, copies can be found by writing to:
# ETH Zurich D-INFK, Universitaetstr. 6, CH-8092 Zurich. Attn: Systems Group.
import sys
import re
import helpers
import logging
sys.path.append('machinedb/')
from topology_parser import on_same_node
class MultiMessage(object):
@staticmethod
def parse(f, topology, mode_list=[ 'last', 'sum' ]):
"""Parse the given MultiMessage benchmark file
"""
sender_core = None
data = {}
err = {}
history = {}
cores_local = 0
cores_remote = 0
tsc_overhead = -1 # << TSC overhead read from output
for line in f.readlines():
line = line.decode().replace("\n", "")
# Format:
# cores=01,02,03,04,08,12,16,20,24,28, mode=all , avg=109, stdev=13, med=106, min=98, max=355 cycles, count=1800, ignored=3200
m = re.match('cores=([0-9,]+), mode=([^,]+), avg=(\d+), stdev=(\d+), med=(\d+), min=(\d+), max=(\d+) cycles, count=(\d+), ignored=(\d+)', line)
if not m :
m = re.match('r=1,l=4, mode=([^,]+), avg=(\d+), stdev=(\d+), med=(\d+), min=(\d+), max=(\d+) cycles, count=(\d+), ignored=(\d+)', line)
if m :
cores = [int(i) for i in m.group(1).split(',')]
l_cores = []
r_cores = []
logging.info('Found cores' + str(cores))
assert sender_core != None
local = False # we execute remote sends first
for c in cores:
_local = on_same_node(topology, sender_core, c)
assert not local or _local # no local message after remote ones
local = _local
if _local:
l_cores.append(c)
else:
r_cores.append(c)
logging.info(('local', l_cores))
logging.info(('remote', r_cores))
l = len(l_cores)
r = len(r_cores)
mode = m.group(2).rstrip()
logging.info(('found l=', l, 'r=', r, 'sender=', sender_core, \
'cores=', cores, 'mode=', mode, 'value=', int(m.group(3))))
if not mode in data:
continue
data [mode][r][l] = int(m.group(3)) # arr[r][l]
err [mode][r][l] = int(m.group(4))
history[mode][r][l] = cores # remember which core where used for that batch
m = re.match('Calibrating TSC overhead is (\d+) cycles', line)
if m:
logging.info("TSC " + m.group(1))
tsc_overhead = int(m.group(1))
# num_cores: local=7 remote=3
m = re.match('num_cores: local=(\d+) remote=(\d+)', line)
if m:
cores_local = int(m.group(1)) + 1
cores_remote = int(m.group(2)) + 1
logging.info("num_local_cores " + str(cores_local))
logging.info("num_remote_cores " + str(cores_remote))
for l in mode_list: # arr[r][l]
data[l] = [[0 for i in range(cores_local)] for j in range(cores_remote)]
err[l] = [[0 for i in range(cores_local)] for j in range(cores_remote)]
history[l] = [[[] for i in range(cores_local)] for j in range(cores_remote)]
# sender: 12
m = re.match('sender: (\d+)', line)
if m:
sender_core = int(m.group(1))
logging.info("sender is: " + str(sender_core))
return (sender_core, cores_local, cores_remote, tsc_overhead, data, err, history)
def __init__(self, _input, machine):
"""Initiate the multimessage parser.
@param Instance of the machine to create the multimessage benchmark for
"""
self.machine = machine
tmp = MultiMessage.parse(_input, machine.machine_topology)
self.sender_core, self.cores_local, self.cores_remote, \
self.tsc_overhead, self.data, self.err, self.history = tmp
self.history = self.history['sum']
self.init_matrix()
def init_matrix(self):
"""Initiliaze the multimessage matrix.
This is a "correction" matrix. For each multimessage
configuration of sending r remote and l local messages,
calculate how much the cost of the same send history using
n-receive is off.
"""
self.matrix = [[ 0 for l in range(self.cores_local) ] for r in range(self.cores_remote)]
for r in range(self.cores_remote)[::-1]:
print ('>> ')
for l in range(self.cores_local):
if r==0 and l==0:
print (' |')
continue
# Determine send cost as predicted by n-receive
send_pw = self.machine.get_send_history_cost(self.sender_core, self.history[r][l])
if self.data['sum'][r][l] == 0 :
rel_error = 0
else :
rel_error = send_pw/float(self.data['sum'][r][l])
cost_last = self.data['last'][r][l] - self.tsc_overhead
self.matrix[r][l] = rel_error
print (' %5.1f %5.0f %5.0f %5.2f |' % \
(cost_last, self.data['sum'][r][l], send_pw, rel_error),)
print ('')
def get_factor(self, sender, c_batch):
"""Determine the correction factor for the given send batch <c_batch>
starting from core sender
"""
c_local = 0
c_remote = 0
for c in c_batch:
if self.machine.on_same_numa_node(sender, c):
c_local += 1
else:
c_remote += 1
if c_local > 0:
helpers.warn('mm: adding remote communication AFTER local communication')
assert c_local>0 or c_remote>0
while c_local >= self.cores_local:
helpers.warn('mm matrix local exceeded %d -> %d' % (c_local, self.cores_local))
c_local -= 1
while c_remote >= self.cores_remote:
helpers.warn('mm matrix remote exceeded %d -> %d' % (c_remote, self.cores_remote))
c_remote -= 1
return self.matrix[c_remote][c_local]