-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
488 lines (468 loc) · 29.4 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
import os
import subprocess
import datetime
import time
import yaml
import docker
import sys
import argparse
from xinda.systems import cassandra, crdb, etcd, hbase, mapred, kafka, depfast, copilot
from xinda.configs import logging, slow_fault, tool
from xinda.configs.benchmark import *
from xinda.configs.reslim import *
import traceback
parser = argparse.ArgumentParser(description="Xinda: A slow-fault testing pipeline for distributed systems.")
parser.add_argument('--sys_name', type = str, required=True,
choices=['cassandra', 'hbase', 'hadoop', 'etcd', 'crdb', 'kafka', 'depfast', 'copilot'],
help='Name of the distributed systems to be tested.')
parser.add_argument('--data_dir', type = str, required=True,
help='Name of data directory to store all the logs')
# Slow fault
parser.add_argument('--fault_type', type = str, required=True,
choices=['nw','fs','none'],
help='[Faults] Types of slow faults to be injected.')
parser.add_argument('--fault_location', type = str, required=True,
help='[Faults] Fault injection location')
parser.add_argument('--fault_duration', type = int, required=True,
help='[Faults] Fault injection duration')
parser.add_argument('--fault_severity', type = str, required=True,
help='[Faults] Fault injection severity')
parser.add_argument('--fault_start_time', type = int, required=True,
help='[Faults] Fault injection timing in seconds after the benchmark is running.')
parser.add_argument('--bench_exec_time', type = str, default = '150',
help='[Benchmark] Benchmark duration in seconds')
parser.add_argument('--unique_identifier', type = str, default = None,
help='A unique identifier of current experiment')
parser.add_argument('--batch_test_log', type = str, default = None,
help='Path to the meta log file of batch test')
parser.add_argument('--if_restart', action='store_true', default=False,
help='If we need to restart the system after fault injection')
parser.add_argument('--if_iaso', type=str, default='none', choices=['reboot', 'shutdown', 'none'],
help='If we want to mimic IASO')
parser.add_argument('--cluster_size', type=int, default=3,
help='Cluster size (default: 3)')
parser.add_argument('--cpu_limit', type=str, default=None,
help='The number of CPU cores allocated to each container instance')
parser.add_argument('--mem_limit', type=str, default=None,
help='The size of memory allocated to each container instance')
# Init
parser.add_argument('--log_root_dir', type = str, default = f"{os.path.expanduser('~')}/workdir/data/default",
help='[Init] The root directory to store logs (data)')
parser.add_argument('--xinda_software_dir', type = str, default = f"{os.path.expanduser('~')}/workdir/xinda-software",
help='[Init] The path to xinda-software')
parser.add_argument('--xinda_tools_dir', type = str, default = f"{os.path.expanduser('~')}/workdir/xinda/tools",
help='[Init] The path to xinda/tools')
parser.add_argument('--charybdefs_mount_dir', type = str, default = "/var/lib/docker/cfs_mount/tmp",
help='[Init] The path where docker volume and charybdefs use to mount')
parser.add_argument('--iter', type = str, default = '1',
help='[Init] Iteration of current experiment setup')
parser.add_argument('--test_script_dir', type = str, default = f"{os.path.expanduser('~')}/workdir/xinda/test_scripts/RQ1_1",
help='[Init] The path to test_scripts/RQ1_1')
parser.add_argument('--version', type = str, default = None,
help='[Init] Version of the system to be tested')
parser.add_argument('--coverage', action='store_true', default=False,
help="[Init] Whether to run coverage study, supported systems: hadoop and etcd")
parser.add_argument('--change_workload', action='store_true', default=False,
help="[Init] Whether to change workload at runtime")
# YCSB - Benchmark
parser.add_argument('--ycsb_wkl', type = str, default = 'mixed',
help='[Benchmark] YCSB workload type.')
parser.add_argument('--ycsb_recordcount', type = str, default = '10000',
help='[Benchmark] Number of records during ycsb-load phase')
parser.add_argument('--ycsb_operationcount', type = str, default = '500000000',
help='[Benchmark] Number of operations during ycsb-run phase')
parser.add_argument('--ycsb_measurementtype', type = str, default = 'raw',
help='[Benchmark] YCSB measurement type.')
parser.add_argument('--ycsb_status_interval', type = str, default = '1',
help='[Benchmark] YCSB measurement itervals (unit: seconds).')
parser.add_argument('--ycsb_columnfamily', type = str, default = 'family',
help='[Benchmark] The column family of HBase that YCSB workloads take effect on.')
parser.add_argument('--ycsb_hbase_threadcount', type = int, default = 8,
help='[Benchmark] Number of YCSB client threads for HBase.')
parser.add_argument('--ycsb_etcd_threadcount', type = int, default = 300,
help='[Benchmark] Number of YCSB client threads for etcd.')
parser.add_argument('--ycsb_etcd_endpoints', type = str, default = 'http://0.0.0.0:2379',
help='[Benchmark] Connection strings for the YCSB client to connect etcd.')
parser.add_argument('--ycsb_crdb_max_rate', type = str, default = '0',
help='[Benchmark] crdb max_rate (0 for no limits).')
parser.add_argument('--ycsb_crdb_concurrency', type = str, default = '50',
help='[Benchmark] The number of concurrent workers.')
parser.add_argument('--ycsb_crdb_load_conn_string', type = str, default = 'postgresql://root@roach3:26257?sslmode=disable',
help='[Benchmark] Connection strings during YCSB load phase')
parser.add_argument('--ycsb_crdb_run_conn_string', type = str, default = 'postgresql://root@roach3:26257,roach2:26257,roach1:26257?sslmode=disable',
help='[Benchmark] Connection strings during YCSB run phase')
# YCSB - HBASE - Two workloads
parser.add_argument('--ycsb_hbase_threadcount2', type = int, default = 32,
help='[Benchmark] Number of YCSB client threads for HBase.')
parser.add_argument('--bench_exec_time2', type = str, default = '150',
help='[Benchmark] Benchmark duration in seconds')
parser.add_argument('--ycsb_wkl2', type = str, default = 'writeonly',
help='[Benchmark] YCSB workload type.')
parser.add_argument('--ycsb_recordcount2', type = str, default = '1000000',
help='[Benchmark] Number of records during ycsb-load phase')
parser.add_argument('--ycsb_columnfamily2', type = str, default = 'family2',
help='[Benchmark] The column family of HBase that YCSB workloads take effect on.')
# hadoop - Benchmark
parser.add_argument('--benchmark', type = str, required=True,
help='[Benchmark] Specify which benchmark to test the system',
choices=['ycsb','mrbench', 'terasort', 'perf_test', 'openmsg', 'ycsb', 'sysbench', 'etcd-official', 'depfast', 'copilot'])
# mrbench - hadoop - Benchmark
parser.add_argument('--mrbench_num_iter', type = int, default = 10,
help='[Benchmark] Number of mrbench jobs running iteratively')
parser.add_argument('--mrbench_num_reduce', type = str, default = '3',
help='[Benchmark] Number of mapreduce reduce tasks')
# terasort - hadoop - Benchmark
parser.add_argument('--terasort_num_of_100_byte_rows', type = str, default = '10737418',
help='[Benchmark] Number of 100-byte rows to sort in terasort')
parser.add_argument('--terasort_input_dir', type = str, default = '/input',
help='[Benchmark] The input directory to store teragen data in HDFS')
parser.add_argument('--terasort_output_dir', type = str, default = '/output',
help='[Benchmark] The output directory to store terasort results in HDFS')
# kafka - Benchmark
parser.add_argument('--kafka_replication_factor', type = str, default = '3',
help='[Benchmark] Replication factor of performance testing in Kafka')
parser.add_argument('--kafka_topic_partition', type = str, default = '10',
help='[Benchmark] Number of topic partitions of performance testing in Kafka')
parser.add_argument('--kafka_throughput_ub', type = int, default = 10000,
help='[Benchmark] The upper bound (limit) of throughput in performance testing in Kafka')
parser.add_argument('--kafka_num_msg', type = int, default = 14000000,
help='[Benchmark] The number of messages in performance testing in Kafka')
# openmsg - kafka - Benchmark
parser.add_argument('--openmsg_driver', type = str, default = 'kafka-latency',
help='[Benchmark] The yaml filename of openmsg kafka driver')
parser.add_argument('--openmsg_workload', type = str, default = 'simple-workload',
help='[Benchmark] The yaml filename of openmsg workload')
# sysbench - crdb - Benchmark
parser.add_argument('--sysbench_lua_scheme', type = str, default='oltp_write_only',
help='[Benchmark] The lua scheme to run sysbench workload on crdb')
# choices=['oltp_read_only', 'oltp_write_only', 'oltp_read_write'])
parser.add_argument('--sysbench_table_size', type = int, default = 10000,
help='[Benchmark] The table size to run sysbench workload on crdb')
parser.add_argument('--sysbench_num_table', type = int, default = 1,
help='[Benchmark] Number of tables in a sysbench workload to run on crdb')
parser.add_argument('--sysbench_num_thread', type = int, default = 1,
help='[Benchmark] Number of threads to run sysbench workloads on crdb')
parser.add_argument('--sysbench_report_interval', type = int, default = 1,
help='[Benchmark] Granularity of sysbench statistics at run-time')
# official-benchmark - etcd - Benchmark
parser.add_argument('--etcd_official_wkl', type = str, default = 'lease-keepalive',
choices=['txn-put', 'lease-keepalive', 'range', 'stm', 'watch', 'watch-get'],
help='[Benchmark] The benchmark from etcd official benchmarking tool to test etcd')
parser.add_argument('--etcd_official_total', type = int, default = 800000,
help='[Benchmark] The total number of requests in an etcd official benchmark')
parser.add_argument('--etcd_official_max_execution_time', type = int, default = 600,
help='[Benchmark] The maximum execution time of an etcd official benchmark (unit: seconds)')
parser.add_argument('--etcd_official_isolation', type = str, default = 'r',
choices=['r', 'c', 's', 'ss'],
help='[Benchmark] The isolation scheme of transactions in official:stm benchmark')
parser.add_argument('--etcd_official_locker', type = str, default = 'stm',
choices=['stm', 'lock-client'],
help='[Benchmark] The locking scheme of transactions in official:stm benchmark')
parser.add_argument('--etcd_official_num_watchers', type = int, default = 1000000,
help='[Benchmark] Number of watchers in benchmark:official-watch-get')
# depfast
parser.add_argument('--depfast_concurrency', type = int, default = 100,
help='[Benchmark] The number of client threads in depfast')
parser.add_argument('--depfast_scheme', type = str, default = "fpga_raft",
choices=['fpga_raft', 'copilot'],
help='[Benchmark] Depfast scheme')
parser.add_argument('--depfast_nclient', type = int, default = 1,
help='[Benchmark] Number of client machines')
# copilot
parser.add_argument('--copilot_concurrency', type = int, default = 10,
help='[Benchmark] The number of client threads in copilot')
parser.add_argument('--copilot_scheme', type = str, default = "copilot",
choices=['latentcopilot', 'epaxos', 'multipaxos', 'copilot'],
help='[Benchmark] The tested scheme')
parser.add_argument('--copilot_nclient', type = int, default = 1,
help='[Benchmark] Number of client machines')
parser.add_argument('--copilot_trim_ratio', type = str, default = "0",
help='[Benchmark] The porportion of data points to be trimmed as noise')
def main(args):
sys_name = args.sys_name
if sys_name == 'etcd' and args.fault_location not in ['leader', 'follower']:
print('Currently etcd only supports leader/follower faults')
exit(1)
if args.coverage != False and sys_name not in ['hadoop', 'etcd', 'hbase']:
print('Currently coverage study only supports hadoop, etcd, and hbase')
exit(1)
if args.version is not None and sys_name not in ['hadoop', 'etcd']:
print('Currently version study only supports hadoop and etcd')
exit(1)
if args.cpu_limit is None and args.mem_limit is None:
args.cpu_limit = "4" # 20 cores in total
args.mem_limit = "32G" # 128G in total
elif args.cpu_limit is None or args.mem_limit is None:
print(f'At least one of cpu_limit ({args.cpu_limit}) or mem_limit ({args.mem_limit}) is None')
exit(1)
if args.change_workload and sys_name not in ['hbase']:
print('Currently only hbase support changing workload at runtime')
exit(1)
if args.cluster_size not in [3, 10, 20]:
print('Currently only support cluster size of 3 or 10 or 20')
exit(1)
reslim = ResourceLimit(cpu_limit_ = args.cpu_limit,
mem_limit_ = args.mem_limit)
fault = slow_fault.SlowFault(type_ = args.fault_type,
location_ = args.fault_location,
duration_ = args.fault_duration,
severity_ = args.fault_severity,
start_time_ = args.fault_start_time,
if_restart_= args.if_restart)
if sys_name == 'cassandra':
benchmark = YCSB_CASSANDRA(exec_time_ = args.bench_exec_time,
workload_ = args.ycsb_wkl,
recordcount_ = args.ycsb_recordcount,
operationcount_ = args.ycsb_operationcount,
measurementtype_ = args.ycsb_measurementtype,
status_interval_ = args.ycsb_status_interval
)
sys = cassandra.Cassandra(sys_name_ = sys_name,
fault_ = fault,
benchmark_ = benchmark,
data_dir_ = args.data_dir,
log_root_dir_ = args.log_root_dir,
iter_ = args.iter,
xinda_software_dir_ = args.xinda_software_dir,
xinda_tools_dir_ = args.xinda_tools_dir,
charybdefs_mount_dir_ = args.charybdefs_mount_dir,
reslim_ = reslim,
version_=args.version,
if_restart_ = args.if_restart,)
elif sys_name == 'hbase':
benchmark = YCSB_HBASE(exec_time_ = args.bench_exec_time,
workload_ = args.ycsb_wkl,
recordcount_ = args.ycsb_recordcount,
operationcount_ = args.ycsb_operationcount,
measurementtype_ = args.ycsb_measurementtype,
status_interval_ = args.ycsb_status_interval,
columnfamily_ = args.ycsb_columnfamily,
threadcount_ = args.ycsb_hbase_threadcount)
benchmark2 = YCSB_HBASE(exec_time_ = args.bench_exec_time2,
workload_ = args.ycsb_wkl2,
recordcount_ = args.ycsb_recordcount2,
operationcount_ = args.ycsb_operationcount,
measurementtype_ = args.ycsb_measurementtype,
status_interval_ = args.ycsb_status_interval,
columnfamily_ = args.ycsb_columnfamily2,
threadcount_ = args.ycsb_hbase_threadcount2)
sys = hbase.HBase(sys_name_ = sys_name,
fault_ = fault,
benchmark_ = benchmark,
data_dir_ = args.data_dir,
log_root_dir_ = args.log_root_dir,
iter_ = args.iter,
xinda_software_dir_ = args.xinda_software_dir,
xinda_tools_dir_ = args.xinda_tools_dir,
charybdefs_mount_dir_ = args.charybdefs_mount_dir,
reslim_ = reslim,
version_=args.version,
if_restart_ = args.if_restart,
coverage_ = args.coverage,
change_workload_ = args.change_workload,
benchmark2_ = benchmark2,
if_iaso_ = args.if_iaso)
elif sys_name == 'etcd':
version = args.version if args.version is not None else '3.5.10'
if version not in ['3.0.0', '3.4.0', '3.5.10']:
raise ValueError(f"Version {version} not supported for etcd")
if args.benchmark == 'ycsb':
benchmark = YCSB_ETCD(exec_time_ = args.bench_exec_time,
workload_ = args.ycsb_wkl,
recordcount_ = args.ycsb_recordcount,
operationcount_ = args.ycsb_operationcount,
measurementtype_ = args.ycsb_measurementtype,
status_interval_ = args.ycsb_status_interval,
threadcount_ = args.ycsb_etcd_threadcount,
etcd_endpoints_ = args.ycsb_etcd_endpoints)
elif args.benchmark == 'etcd-official':
benchmark = OFFICIAL_ETCD(workload_ = args.etcd_official_wkl,
total_ = args.etcd_official_total,
max_execution_time_ = args.etcd_official_max_execution_time,
isolation_ = args.etcd_official_isolation,
stm_locker_ = args.etcd_official_locker,
num_watchers_ = args.etcd_official_num_watchers)
sys = etcd.Etcd(sys_name_ = sys_name,
fault_ = fault,
benchmark_ = benchmark,
data_dir_ = args.data_dir,
log_root_dir_ = args.log_root_dir,
iter_ = args.iter,
xinda_software_dir_ = args.xinda_software_dir,
xinda_tools_dir_ = args.xinda_tools_dir,
charybdefs_mount_dir_ = args.charybdefs_mount_dir,
reslim_ = reslim,
version_=version,
if_restart_ = args.if_restart,
coverage_ = args.coverage,
cluster_size_ = args.cluster_size
)
elif sys_name == 'crdb':
if args.benchmark is None or args.benchmark not in ['ycsb', 'sysbench']:
print("Need to specify which benchmark to test crdb (--benchmark). Options: ycsb OR sysbench.")
exit(1)
if args.ycsb_wkl == 'readonly':
wkl = 'C'
elif args.ycsb_wkl == 'mixed':
wkl = 'A'
elif args.ycsb_wkl == 'writeonly':
print('Currently crdb does not support ycsb:writeonly')
exit(1)
else:
wkl = args.ycsb_wkl
ycsb_crdb_run_conn_string = args.ycsb_crdb_run_conn_string
ycsb_crdb_load_conn_string = args.ycsb_crdb_load_conn_string
if args.cluster_size == 10:
ycsb_crdb_run_conn_string = 'postgresql://root@roach10:26257,roach9:26257,roach8:26257,roach7:26257,roach6:26257,roach5:26257,roach4:26257,roach3:26257,roach2:26257,roach1:26257?sslmode=disable'
ycsb_crdb_load_conn_string = 'postgresql://root@roach3:26257?sslmode=disable'
if args.cluster_size == 20:
ycsb_crdb_run_conn_string = 'postgresql://root@roach20:26257,roach19:26257,roach18:26257,roach17:26257,roach16:26257,roach15:26257,roach14:26257,roach13:26257,roach12:26257,roach11:26257,roach10:26257,roach9:26257,roach8:26257,roach7:26257,roach6:26257,roach5:26257,roach4:26257,roach3:26257,roach2:26257,roach1:26257?sslmode=disable'
ycsb_crdb_load_conn_string = 'postgresql://root@roach3:26257?sslmode=disable'
if args.benchmark == 'ycsb':
benchmark = YCSB_CRDB(exec_time_ = args.bench_exec_time,
workload_ = wkl,
operationcount_ = args.ycsb_operationcount,
max_rate_ = args.ycsb_crdb_max_rate,
concurrency_ = args.ycsb_crdb_concurrency,
status_interval_ = args.ycsb_status_interval,
recordcount_ = args.ycsb_recordcount,
load_connection_string_ = ycsb_crdb_load_conn_string,
run_connection_string_ = ycsb_crdb_run_conn_string)
elif args.benchmark == 'sysbench':
benchmark = SYSBENCH_CRDB(workload_ = args.benchmark,
lua_scheme_ = args.sysbench_lua_scheme,
table_size_=args.sysbench_table_size,
num_table_=args.sysbench_num_table,
num_thread_=args.sysbench_num_thread,
exec_time_=args.bench_exec_time,
report_interval_=args.sysbench_report_interval
)
sys = crdb.Crdb(sys_name_ = sys_name,
fault_ = fault,
benchmark_ = benchmark,
data_dir_ = args.data_dir,
log_root_dir_ = args.log_root_dir,
iter_ = args.iter,
xinda_software_dir_ = args.xinda_software_dir,
xinda_tools_dir_ = args.xinda_tools_dir,
charybdefs_mount_dir_ = args.charybdefs_mount_dir,
reslim_ = reslim,
version_=args.version,
if_restart_ = args.if_restart,
if_iaso_ = args.if_iaso,
cluster_size_ = args.cluster_size)
elif sys_name == 'hadoop':
if args.benchmark is None or args.benchmark not in ['terasort', 'mrbench']:
print("Need to specify which benchmark to test hadoop (--benchmark). Options: terasort OR mrbench.")
exit(1)
if args.benchmark == 'mrbench':
benchmark = MRBENCH_MAPRED(num_reduces_ = args.mrbench_num_reduce,
num_iter_ = args.mrbench_num_iter)
elif args.benchmark == 'terasort':
benchmark = TERASORT_MAPRED(num_of_100_byte_rows_ = args.terasort_num_of_100_byte_rows,
input_dir_ = args.terasort_input_dir,
output_dir_ = args.terasort_output_dir)
if args.version is None:
version = '3.3.6'
elif args.version not in ['3.3.6', '3.2.1', '3.0.0']:
raise ValueError(f"Version {args.version} not supported for hadoop")
else:
version = args.version
sys = mapred.Mapred(sys_name_ = sys_name,
fault_ = fault,
benchmark_ = benchmark,
data_dir_ = args.data_dir,
log_root_dir_ = args.log_root_dir,
iter_ = args.iter,
xinda_software_dir_ = args.xinda_software_dir,
xinda_tools_dir_ = args.xinda_tools_dir,
charybdefs_mount_dir_ = args.charybdefs_mount_dir,
reslim_ = reslim,
version_= version,
if_restart_ = args.if_restart,
coverage_ = args.coverage, # TODO: implement logic for this
)
elif sys_name == 'kafka':
if args.benchmark is None or args.benchmark not in ['perf_test', 'openmsg']:
print("Need to specify which benchmark to test kafka (--benchmark). Options: perf_test OR openmsg.")
exit(1)
if args.benchmark == 'perf_test':
benchmark = PERFTEST_KAFKA(replication_factor_ = args.kafka_replication_factor,
topic_partition_ = args.kafka_topic_partition,
throughput_upper_bound_=args.kafka_throughput_ub,
num_msg_=args.kafka_num_msg,
exec_time_ = args.bench_exec_time)
elif args.benchmark == 'openmsg':
benchmark = OPENMSG_KAFKA(driver_=args.openmsg_driver,
workload_file_=args.openmsg_workload,
exec_time_ = args.bench_exec_time)
sys = kafka.Kafka( sys_name_ = sys_name,
fault_ = fault,
benchmark_ = benchmark,
data_dir_ = args.data_dir,
log_root_dir_ = args.log_root_dir,
iter_ = args.iter,
xinda_software_dir_ = args.xinda_software_dir,
xinda_tools_dir_ = args.xinda_tools_dir,
charybdefs_mount_dir_ = args.charybdefs_mount_dir,
reslim_ = reslim,
version_=args.version,
if_restart_ = args.if_restart
)
elif sys_name == 'depfast':
benchmark = DEFAULT_DEPFAST(exec_time_ = args.bench_exec_time,
concurrency_ = args.depfast_concurrency,
scheme_ = args.depfast_scheme,
nclient_ = args.depfast_nclient)
sys = depfast.Depfast(sys_name_ = sys_name,
fault_ = fault,
benchmark_ = benchmark,
data_dir_ = args.data_dir,
log_root_dir_ = args.log_root_dir,
iter_ = args.iter,
xinda_software_dir_ = args.xinda_software_dir,
xinda_tools_dir_ = args.xinda_tools_dir,
charybdefs_mount_dir_ = args.charybdefs_mount_dir,
reslim_ = reslim,
version_=args.version,
if_restart_ = args.if_restart
)
elif sys_name == 'copilot':
benchmark = DEFAULT_COPILOT(exec_time_ = args.bench_exec_time,
concurrency_ = args.copilot_concurrency,
scheme_ = args.copilot_scheme,
nclient_ = args.copilot_nclient,
trim_ratio_ = args.copilot_trim_ratio)
sys = copilot.Copilot(sys_name_ = sys_name,
fault_ = fault,
benchmark_ = benchmark,
data_dir_ = args.data_dir,
log_root_dir_ = args.log_root_dir,
iter_ = args.iter,
xinda_software_dir_ = args.xinda_software_dir,
xinda_tools_dir_ = args.xinda_tools_dir,
charybdefs_mount_dir_ = args.charybdefs_mount_dir,
reslim_ = reslim,
version_=args.version,
if_restart_ = args.if_restart
)
return(sys)
if __name__ == "__main__":
try:
args = parser.parse_args()
cur_command = ' '.join(sys.argv)
sys = main(args)
sys.info(f"Current command:\npython3 {cur_command}")
sys.test()
except (KeyboardInterrupt, Exception) as e:
if args.batch_test_log is not None:
log_file_path = args.batch_test_log
else:
log_file_path = './stderr.log'
with open(log_file_path, 'a') as log_file:
log_file.write('#'*50+'\n')
cur_ts = int(time.time()*1e9)
log_file.write(f"[{str(cur_ts)}, {datetime.datetime.now()}]\n")
log_file.write(f"{cur_command}\n")
traceback.print_exc(file=log_file)
log_file.write('#'*50+'\n')