Skip to content

Commit

Permalink
Set maxK for xalanc; Select cpt of given coverage (#4)
Browse files Browse the repository at this point in the history
* add special case for xalancbmk to choose larger maxK

* Add scripts to select top x% checkpoints
  • Loading branch information
shinezyy authored Sep 29, 2024
1 parent 7d5e522 commit a37557a
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 2 deletions.
2 changes: 1 addition & 1 deletion checkpoint_scripts/dump_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def per_checkpoint_generate_json(profiling_log, cluster_path, app_list,
}
})
with open(os.path.join(target_path), "w") as f:
f.write(json.dumps(result))
f.write(json.dumps(result, indent=4))


def per_checkpoint_generate_worklist(cpt_path, target_path):
Expand Down
78 changes: 78 additions & 0 deletions checkpoint_scripts/select_points.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
'''
Example:
python3 select_points.py -i /path/to/cluster-0-0.json -o name_prefix_of_output -w 0.8
'''

import json
import argparse

argparser = argparse.ArgumentParser()
argparser.add_argument('-c', '--count-limit', type=int)
argparser.add_argument('-w', '--weight-limit', type=float, default=1.0)
argparser.add_argument('-t', '--test-set', type=str, default='int')
argparser.add_argument('-i', '--input-json', required=True)
argparser.add_argument('-o', '--output-name', type=str, default='output')

args = argparser.parse_args()

if args.count_limit is None:
count_limit = 10000
count_limit_str = ''
else:
count_limit = args.count_limit
count_limit_str = f'_top{count_limit}'

coverage_limit = args.weight_limit

ver = '06'
weight_file = args.input_json
test_set = 'int'

with open(weight_file) as f:
js = json.load(f)

new_js = {}
files = []

workloads = []
if len(test_set):
with open(f'spec_info/spec{ver}_{test_set}.lst') as f:
for line in f:
workloads.append(line.strip())

for workload, info in js.items():
if workload not in workloads:
continue
cumm_weight = 0.0
count = 0

if workload not in new_js:
new_js[workload] = {}
new_js[workload]['insts'] = js[workload]['insts']
new_js[workload]['points'] = {}
# sorted_points = sorted(info['points'], reverse=True)
# sort info['points'] by value
sorted_points = sorted(info['points'], key=lambda x: info['points'][x], reverse=True)
for point in sorted_points:
weight = info['points'][point]

cumm_weight += float(weight)
count += 1
print(workload, point, weight)
files.append(f'{workload}_{point} {workload}_{point}_{weight}/0/')
new_js[workload]['points'][point] = weight

if count >= count_limit or cumm_weight >= coverage_limit:
break

n_chunks = 1
chunk_size = len(files) // n_chunks
for i in range(n_chunks):
chunk = files[:chunk_size]
with open(f'{args.output_name}_{test_set}_cover{coverage_limit:.2f}{count_limit_str}.lst.{i}', 'w') \
as outf:
outf.write('\n'.join(chunk))
files = files[chunk_size:]

with open(f'{args.output_name}_{test_set}_cover{coverage_limit:.2f}{count_limit_str}.json', 'w') as outf:
json.dump(new_js, outf, indent=4)
5 changes: 4 additions & 1 deletion checkpoint_scripts/take_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,10 @@ def cluster_command(config, is_resume_from):
seedproj = random.randint(100000, 999999)
mkdir(os.path.split(os.path.join(config["utils"]["buffer"], config["cluster"]["config"], config["utils"]["workload"], "simpoints0"))[0])
bbv_path = os.path.join(config["utils"]["buffer"], config["profiling"]["config"], config["utils"]["workload"], "simpoint_bbv.gz")
maxK = 100 if config["utils"]["workload"] in ['xalancbmk'] else 30
if config["utils"]["workload"] in ['xalancbmk']:
maxK = 100
else:
maxK = 30
if is_resume_from:
# make sure bbv.gz has been generated
assert os.path.exists(bbv_path)
Expand Down

0 comments on commit a37557a

Please sign in to comment.