Skip to content

Commit 4564280

Browse files
authored
Merge pull request #86 from converged-computing/add-paper-results
Add paper results
2 parents bda94e6 + be1a7b0 commit 4564280

File tree

228 files changed

+6788128
-547
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

228 files changed

+6788128
-547
lines changed

experiments/azure/aks/cpu/size128/results/mixbench/mixbench-iter-1-1344106659840.out

Lines changed: 0 additions & 4 deletions
This file was deleted.

paper/amg2023/1-run-analysis.py

Lines changed: 114 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
11
#!/usr/bin/env python3
22

33
import argparse
4-
import collections
5-
import json
64
import os
75
import sys
86
import re
97

108
import matplotlib.pylab as plt
11-
import pandas
129
import seaborn as sns
1310

1411
here = os.path.dirname(os.path.abspath(__file__))
@@ -62,6 +59,12 @@ def get_parser():
6259
help="root directory with experiments",
6360
default=os.path.join(root, "experiments"),
6461
)
62+
parser.add_argument(
63+
"--non-anon",
64+
help="Generate non-anon",
65+
action="store_true",
66+
default=False,
67+
)
6568
parser.add_argument(
6669
"--out",
6770
help="directory to save parsed results",
@@ -93,7 +96,8 @@ def main():
9396

9497
# Saves raw data to file
9598
df = parse_data(indir, outdir, files)
96-
plot_results(df, outdir)
99+
plot_results(df, outdir, args.non_anon, log=False)
100+
plot_results(df, outdir, args.non_anon, log=True)
97101

98102

99103
def get_fom_line(item, name):
@@ -165,20 +169,25 @@ def parse_data(indir, outdir, files):
165169
item = ps.read_file(result)
166170

167171
# If this is a flux run, we have a jobspec and events here
168-
duration = None
169172
if "JOBSPEC" in item:
170173
item, duration, metadata = ps.parse_flux_metadata(item)
171174
data[exp.prefix].append(metadata)
172175

173-
# Slurm has the item output, and then just the start/end of the job
174-
elif "on-premises" not in filename:
176+
elif "on-premises" in filename:
177+
# Get the runtime from the err file
178+
err_file = ps.read_file(result.replace(".out", ".err"))
179+
duration = float(
180+
[x for x in err_file.split("\n") if "real" in x][0].split(" ")[-1]
181+
)
182+
else:
175183
duration = ps.parse_slurm_duration(item)
176184

177185
# Parse the FOM from the item - I see three.
178186
# This needs to throw an error if we can't find it - indicates the result file is wonky
179187
# Figure of Merit (FOM): nnz_AP / (Setup Phase Time + 3 * Solve Phase Time) 1.148604e+09
180188
fom_overall = get_fom_line(item, "Figure of Merit (FOM)")
181189
p.add_result("fom_overall", fom_overall)
190+
p.add_result("duration", duration)
182191

183192
print("Done parsing amg2023 results!")
184193

@@ -188,7 +197,7 @@ def parse_data(indir, outdir, files):
188197
return p.df
189198

190199

191-
def plot_results(df, outdir):
200+
def plot_results(df, outdir, non_anon=False, log=True):
192201
"""
193202
Plot analysis results
194203
"""
@@ -198,44 +207,118 @@ def plot_results(df, outdir):
198207
if not os.path.exists(img_outdir):
199208
os.makedirs(img_outdir)
200209

210+
ps.print_experiment_cost(df, outdir)
211+
212+
# For anonymization
213+
if not non_anon:
214+
df["experiment"] = df["experiment"].str.replace(
215+
"on-premises/lassen", "on-premises/b"
216+
)
217+
df["experiment"] = df["experiment"].str.replace(
218+
"on-premises/dane", "on-premises/a"
219+
)
220+
201221
# We are going to put the plots together, and the colors need to match!
202222
cloud_colors = {}
203223
for cloud in df.experiment.unique():
204224
cloud_colors[cloud] = ps.match_color(cloud)
205225

206226
# Within a setup, compare between experiments for GPU and cpu
227+
data_frames = {}
207228
for env in df.env_type.unique():
208229
subset = df[df.env_type == env]
209230

210-
# x axis is by gpu count for gpus
211-
x_by = "nodes"
212-
x_label = "Nodes"
213-
if env == "gpu":
214-
x_by = "gpu_count"
215-
x_label = "Number of GPU"
216-
217231
# Make a plot for seconds runtime, and each FOM set.
218232
# We can look at the metric across sizes, colored by experiment
219233
for metric in subset.metric.unique():
220234
metric_df = subset[subset.metric == metric]
221-
log_scale = False if metric == "seconds" else True
222235
title = " ".join([x.capitalize() for x in metric.split("_")])
223-
224-
# Make sure fom is always capitalized
225236
title = title.replace("Fom", "FOM")
226-
ps.make_plot(
227-
metric_df,
228-
title=f"AMG2023 {title} ({env.upper()})",
229-
ydimension="value",
230-
plotname=f"amg2023-{metric}-{env}",
231-
xdimension=x_by,
232-
palette=cloud_colors,
233-
outdir=img_outdir,
234-
hue="experiment",
235-
xlabel=x_label,
236-
ylabel=title,
237-
log_scale=log_scale,
238-
)
237+
data_frames[env] = metric_df
238+
239+
fig, axes = plt.subplots(1, 2, sharey=True, figsize=(18, 3.3))
240+
241+
fig = plt.figure(figsize=(18, 3.3))
242+
gs = plt.GridSpec(1, 3, width_ratios=[2, 2, 1])
243+
axes = []
244+
axes.append(fig.add_subplot(gs[0, 0]))
245+
axes.append(fig.add_subplot(gs[0, 1]))
246+
axes.append(fig.add_subplot(gs[0, 2]))
247+
248+
sns.set_style("whitegrid")
249+
sns.barplot(
250+
data_frames["cpu"],
251+
ax=axes[0],
252+
x="nodes",
253+
y="value",
254+
hue="experiment",
255+
err_kws={"color": "darkred"},
256+
hue_order=[
257+
"google/gke/cpu",
258+
"google/compute-engine/cpu",
259+
"aws/eks/cpu",
260+
"aws/parallel-cluster/cpu",
261+
"azure/aks/cpu",
262+
"azure/cyclecloud/cpu",
263+
"on-premises/a/cpu",
264+
],
265+
palette=cloud_colors,
266+
order=[32, 64, 128, 256],
267+
)
268+
axes[0].set_title("FOM Overall (CPU)", fontsize=14)
269+
if log:
270+
axes[0].set_ylabel("FOM Overall (logscale)", fontsize=14)
271+
else:
272+
axes[0].set_ylabel("FOM Overall", fontsize=14)
273+
axes[0].set_xlabel("Nodes", fontsize=14)
274+
275+
# Log scale for FOM
276+
if log:
277+
axes[0].set_yscale("log")
278+
279+
sns.barplot(
280+
data_frames["gpu"],
281+
ax=axes[1],
282+
x="gpu_count",
283+
y="value",
284+
err_kws={"color": "darkred"},
285+
hue="experiment",
286+
hue_order=[
287+
"google/compute-engine/gpu",
288+
"on-premises/b/gpu",
289+
"google/gke/gpu",
290+
"azure/cyclecloud/gpu",
291+
"azure/aks/gpu",
292+
"aws/eks/gpu",
293+
],
294+
palette=cloud_colors,
295+
order=[32, 64, 128, 256],
296+
)
297+
axes[1].set_title("FOM Overall (GPU)", fontsize=14)
298+
axes[1].set_xlabel("GPU Count", fontsize=14)
299+
axes[1].set_ylabel("")
300+
if log:
301+
axes[1].set_yscale("log")
302+
303+
handles, labels = axes[1].get_legend_handles_labels()
304+
labels = ["/".join(x.split("/")[0:2]) for x in labels]
305+
axes[2].legend(
306+
handles, labels, loc="center left", bbox_to_anchor=(-0.1, 0.5), frameon=False
307+
)
308+
for ax in axes[0:2]:
309+
ax.get_legend().remove()
310+
axes[2].axis("off")
311+
312+
plt.tight_layout()
313+
if log:
314+
plt.savefig(os.path.join(img_outdir, "amg-fom-overall-cpu-gpu-log.svg"))
315+
else:
316+
plt.savefig(os.path.join(img_outdir, "amg-fom-overall-cpu-gpu.svg"))
317+
plt.clf()
318+
319+
# Print the total number of data points
320+
print(f'Total number of CPU datum: {data_frames["cpu"].shape[0]}')
321+
print(f'Total number of GPU datum: {data_frames["gpu"].shape[0]}')
239322

240323

241324
if __name__ == "__main__":

0 commit comments

Comments
 (0)