diff --git a/checkpoint_scripts/dump_result.py b/checkpoint_scripts/dump_result.py index 448d4c3..4b97e2f 100644 --- a/checkpoint_scripts/dump_result.py +++ b/checkpoint_scripts/dump_result.py @@ -87,7 +87,7 @@ def per_checkpoint_generate_json(profiling_log, cluster_path, app_list, } }) with open(os.path.join(target_path), "w") as f: - f.write(json.dumps(result)) + f.write(json.dumps(result, indent=4)) def per_checkpoint_generate_worklist(cpt_path, target_path): diff --git a/checkpoint_scripts/select_points.py b/checkpoint_scripts/select_points.py new file mode 100644 index 0000000..344d0b4 --- /dev/null +++ b/checkpoint_scripts/select_points.py @@ -0,0 +1,78 @@ +''' +Example: +python3 select_points.py -i /path/to/cluster-0-0.json -o name_prefix_of_output -w 0.8 +''' + +import json +import argparse + +argparser = argparse.ArgumentParser() +argparser.add_argument('-c', '--count-limit', type=int) +argparser.add_argument('-w', '--weight-limit', type=float, default=1.0) +argparser.add_argument('-t', '--test-set', type=str, default='int') +argparser.add_argument('-i', '--input-json', required=True) +argparser.add_argument('-o', '--output-name', type=str, default='output') + +args = argparser.parse_args() + +if args.count_limit is None: + count_limit = 10000 + count_limit_str = '' +else: + count_limit = args.count_limit + count_limit_str = f'_top{count_limit}' + +coverage_limit = args.weight_limit + +ver = '06' +weight_file = args.input_json +test_set = 'int' + +with open(weight_file) as f: + js = json.load(f) + +new_js = {} +files = [] + +workloads = [] +if len(test_set): + with open(f'spec_info/spec{ver}_{test_set}.lst') as f: + for line in f: + workloads.append(line.strip()) + +for workload, info in js.items(): + if workload not in workloads: + continue + cumm_weight = 0.0 + count = 0 + + if workload not in new_js: + new_js[workload] = {} + new_js[workload]['insts'] = js[workload]['insts'] + new_js[workload]['points'] = {} + # sorted_points = sorted(info['points'], reverse=True) + # sort info['points'] by value + sorted_points = sorted(info['points'], key=lambda x: info['points'][x], reverse=True) + for point in sorted_points: + weight = info['points'][point] + + cumm_weight += float(weight) + count += 1 + print(workload, point, weight) + files.append(f'{workload}_{point} {workload}_{point}_{weight}/0/') + new_js[workload]['points'][point] = weight + + if count >= count_limit or cumm_weight >= coverage_limit: + break + +n_chunks = 1 +chunk_size = len(files) // n_chunks +for i in range(n_chunks): + chunk = files[:chunk_size] + with open(f'{args.output_name}_{test_set}_cover{coverage_limit:.2f}{count_limit_str}.lst.{i}', 'w') \ + as outf: + outf.write('\n'.join(chunk)) + files = files[chunk_size:] + +with open(f'{args.output_name}_{test_set}_cover{coverage_limit:.2f}{count_limit_str}.json', 'w') as outf: + json.dump(new_js, outf, indent=4) diff --git a/checkpoint_scripts/take_checkpoint.py b/checkpoint_scripts/take_checkpoint.py index f1dcca2..f91bc98 100644 --- a/checkpoint_scripts/take_checkpoint.py +++ b/checkpoint_scripts/take_checkpoint.py @@ -250,7 +250,10 @@ def cluster_command(config, is_resume_from): seedproj = random.randint(100000, 999999) mkdir(os.path.split(os.path.join(config["utils"]["buffer"], config["cluster"]["config"], config["utils"]["workload"], "simpoints0"))[0]) bbv_path = os.path.join(config["utils"]["buffer"], config["profiling"]["config"], config["utils"]["workload"], "simpoint_bbv.gz") - maxK = 100 if config["utils"]["workload"] in ['xalancbmk'] else 30 + if config["utils"]["workload"] in ['xalancbmk']: + maxK = 100 + else: + maxK = 30 if is_resume_from: # make sure bbv.gz has been generated assert os.path.exists(bbv_path)