Set maxK for xalanc; Select cpt of given coverage (#4)

* add special case for xalancbmk to choose larger maxK * Add scripts to select top x% checkpoints
xyyy1420 · Sep 29, 2024 · a37557a · a37557a
1 parent 7d5e522
commit a37557a
Show file tree

Hide file tree

Showing 3 changed files with 83 additions and 2 deletions.
diff --git a/checkpoint_scripts/dump_result.py b/checkpoint_scripts/dump_result.py
@@ -87,7 +87,7 @@ def per_checkpoint_generate_json(profiling_log, cluster_path, app_list,
             }
         })
     with open(os.path.join(target_path), "w") as f:
-        f.write(json.dumps(result))
+        f.write(json.dumps(result, indent=4))
 
 
 def per_checkpoint_generate_worklist(cpt_path, target_path):

diff --git a/checkpoint_scripts/select_points.py b/checkpoint_scripts/select_points.py
@@ -0,0 +1,78 @@
+'''
+Example:
+python3 select_points.py -i /path/to/cluster-0-0.json -o name_prefix_of_output -w 0.8
+'''
+
+import json
+import argparse
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('-c', '--count-limit', type=int)
+argparser.add_argument('-w', '--weight-limit', type=float, default=1.0)
+argparser.add_argument('-t', '--test-set', type=str, default='int')
+argparser.add_argument('-i', '--input-json', required=True)
+argparser.add_argument('-o', '--output-name', type=str, default='output')
+
+args = argparser.parse_args()
+
+if args.count_limit is None:
+    count_limit = 10000
+    count_limit_str = ''
+else:
+    count_limit = args.count_limit
+    count_limit_str = f'_top{count_limit}'
+
+coverage_limit = args.weight_limit
+
+ver = '06'
+weight_file = args.input_json
+test_set = 'int'
+
+with open(weight_file) as f:
+    js = json.load(f)
+
+new_js = {}
+files = []
+
+workloads = []
+if len(test_set):
+    with open(f'spec_info/spec{ver}_{test_set}.lst') as f:
+        for line in f:
+            workloads.append(line.strip())
+
+for workload, info in js.items():
+    if workload not in workloads:
+        continue
+    cumm_weight = 0.0
+    count = 0
+
+    if workload not in new_js:
+        new_js[workload] = {}
+    new_js[workload]['insts'] = js[workload]['insts']
+    new_js[workload]['points'] = {}
+    # sorted_points = sorted(info['points'], reverse=True)
+    # sort info['points'] by value
+    sorted_points = sorted(info['points'], key=lambda x: info['points'][x], reverse=True)
+    for point in sorted_points:
+        weight = info['points'][point]
+
+        cumm_weight += float(weight)
+        count += 1
+        print(workload, point, weight)
+        files.append(f'{workload}_{point} {workload}_{point}_{weight}/0/')
+        new_js[workload]['points'][point] = weight
+
+        if count >= count_limit or cumm_weight >= coverage_limit:
+            break
+
+n_chunks = 1
+chunk_size = len(files) // n_chunks
+for i in range(n_chunks):
+    chunk = files[:chunk_size]
+    with open(f'{args.output_name}_{test_set}_cover{coverage_limit:.2f}{count_limit_str}.lst.{i}', 'w') \
+            as outf:
+        outf.write('\n'.join(chunk))
+    files = files[chunk_size:]
+
+with open(f'{args.output_name}_{test_set}_cover{coverage_limit:.2f}{count_limit_str}.json', 'w') as outf:
+    json.dump(new_js, outf, indent=4)
diff --git a/checkpoint_scripts/take_checkpoint.py b/checkpoint_scripts/take_checkpoint.py
@@ -250,7 +250,10 @@ def cluster_command(config, is_resume_from):
     seedproj = random.randint(100000, 999999)
     mkdir(os.path.split(os.path.join(config["utils"]["buffer"], config["cluster"]["config"], config["utils"]["workload"], "simpoints0"))[0])
     bbv_path = os.path.join(config["utils"]["buffer"], config["profiling"]["config"], config["utils"]["workload"], "simpoint_bbv.gz")
-    maxK = 100 if config["utils"]["workload"] in ['xalancbmk'] else 30
+    if config["utils"]["workload"] in ['xalancbmk']:
+        maxK = 100
+    else:
+        maxK = 30
     if is_resume_from:
         # make sure bbv.gz has been generated
         assert os.path.exists(bbv_path)