11#!/usr/bin/env python3
22
33import argparse
4- import collections
5- import json
64import os
75import sys
86import re
97
108import matplotlib .pylab as plt
11- import pandas
129import seaborn as sns
1310
1411here = os .path .dirname (os .path .abspath (__file__ ))
@@ -62,6 +59,12 @@ def get_parser():
6259 help = "root directory with experiments" ,
6360 default = os .path .join (root , "experiments" ),
6461 )
62+ parser .add_argument (
63+ "--non-anon" ,
64+ help = "Generate non-anon" ,
65+ action = "store_true" ,
66+ default = False ,
67+ )
6568 parser .add_argument (
6669 "--out" ,
6770 help = "directory to save parsed results" ,
@@ -93,7 +96,8 @@ def main():
9396
9497 # Saves raw data to file
9598 df = parse_data (indir , outdir , files )
96- plot_results (df , outdir )
99+ plot_results (df , outdir , args .non_anon , log = False )
100+ plot_results (df , outdir , args .non_anon , log = True )
97101
98102
99103def get_fom_line (item , name ):
@@ -165,20 +169,25 @@ def parse_data(indir, outdir, files):
165169 item = ps .read_file (result )
166170
167171 # If this is a flux run, we have a jobspec and events here
168- duration = None
169172 if "JOBSPEC" in item :
170173 item , duration , metadata = ps .parse_flux_metadata (item )
171174 data [exp .prefix ].append (metadata )
172175
173- # Slurm has the item output, and then just the start/end of the job
174- elif "on-premises" not in filename :
176+ elif "on-premises" in filename :
177+ # Get the runtime from the err file
178+ err_file = ps .read_file (result .replace (".out" , ".err" ))
179+ duration = float (
180+ [x for x in err_file .split ("\n " ) if "real" in x ][0 ].split (" " )[- 1 ]
181+ )
182+ else :
175183 duration = ps .parse_slurm_duration (item )
176184
177185 # Parse the FOM from the item - I see three.
178186 # This needs to throw an error if we can't find it - indicates the result file is wonky
179187 # Figure of Merit (FOM): nnz_AP / (Setup Phase Time + 3 * Solve Phase Time) 1.148604e+09
180188 fom_overall = get_fom_line (item , "Figure of Merit (FOM)" )
181189 p .add_result ("fom_overall" , fom_overall )
190+ p .add_result ("duration" , duration )
182191
183192 print ("Done parsing amg2023 results!" )
184193
@@ -188,7 +197,7 @@ def parse_data(indir, outdir, files):
188197 return p .df
189198
190199
191- def plot_results (df , outdir ):
200+ def plot_results (df , outdir , non_anon = False , log = True ):
192201 """
193202 Plot analysis results
194203 """
@@ -198,44 +207,118 @@ def plot_results(df, outdir):
198207 if not os .path .exists (img_outdir ):
199208 os .makedirs (img_outdir )
200209
210+ ps .print_experiment_cost (df , outdir )
211+
212+ # For anonymization
213+ if not non_anon :
214+ df ["experiment" ] = df ["experiment" ].str .replace (
215+ "on-premises/lassen" , "on-premises/b"
216+ )
217+ df ["experiment" ] = df ["experiment" ].str .replace (
218+ "on-premises/dane" , "on-premises/a"
219+ )
220+
201221 # We are going to put the plots together, and the colors need to match!
202222 cloud_colors = {}
203223 for cloud in df .experiment .unique ():
204224 cloud_colors [cloud ] = ps .match_color (cloud )
205225
206226 # Within a setup, compare between experiments for GPU and cpu
227+ data_frames = {}
207228 for env in df .env_type .unique ():
208229 subset = df [df .env_type == env ]
209230
210- # x axis is by gpu count for gpus
211- x_by = "nodes"
212- x_label = "Nodes"
213- if env == "gpu" :
214- x_by = "gpu_count"
215- x_label = "Number of GPU"
216-
217231 # Make a plot for seconds runtime, and each FOM set.
218232 # We can look at the metric across sizes, colored by experiment
219233 for metric in subset .metric .unique ():
220234 metric_df = subset [subset .metric == metric ]
221- log_scale = False if metric == "seconds" else True
222235 title = " " .join ([x .capitalize () for x in metric .split ("_" )])
223-
224- # Make sure fom is always capitalized
225236 title = title .replace ("Fom" , "FOM" )
226- ps .make_plot (
227- metric_df ,
228- title = f"AMG2023 { title } ({ env .upper ()} )" ,
229- ydimension = "value" ,
230- plotname = f"amg2023-{ metric } -{ env } " ,
231- xdimension = x_by ,
232- palette = cloud_colors ,
233- outdir = img_outdir ,
234- hue = "experiment" ,
235- xlabel = x_label ,
236- ylabel = title ,
237- log_scale = log_scale ,
238- )
237+ data_frames [env ] = metric_df
238+
239+ fig , axes = plt .subplots (1 , 2 , sharey = True , figsize = (18 , 3.3 ))
240+
241+ fig = plt .figure (figsize = (18 , 3.3 ))
242+ gs = plt .GridSpec (1 , 3 , width_ratios = [2 , 2 , 1 ])
243+ axes = []
244+ axes .append (fig .add_subplot (gs [0 , 0 ]))
245+ axes .append (fig .add_subplot (gs [0 , 1 ]))
246+ axes .append (fig .add_subplot (gs [0 , 2 ]))
247+
248+ sns .set_style ("whitegrid" )
249+ sns .barplot (
250+ data_frames ["cpu" ],
251+ ax = axes [0 ],
252+ x = "nodes" ,
253+ y = "value" ,
254+ hue = "experiment" ,
255+ err_kws = {"color" : "darkred" },
256+ hue_order = [
257+ "google/gke/cpu" ,
258+ "google/compute-engine/cpu" ,
259+ "aws/eks/cpu" ,
260+ "aws/parallel-cluster/cpu" ,
261+ "azure/aks/cpu" ,
262+ "azure/cyclecloud/cpu" ,
263+ "on-premises/a/cpu" ,
264+ ],
265+ palette = cloud_colors ,
266+ order = [32 , 64 , 128 , 256 ],
267+ )
268+ axes [0 ].set_title ("FOM Overall (CPU)" , fontsize = 14 )
269+ if log :
270+ axes [0 ].set_ylabel ("FOM Overall (logscale)" , fontsize = 14 )
271+ else :
272+ axes [0 ].set_ylabel ("FOM Overall" , fontsize = 14 )
273+ axes [0 ].set_xlabel ("Nodes" , fontsize = 14 )
274+
275+ # Log scale for FOM
276+ if log :
277+ axes [0 ].set_yscale ("log" )
278+
279+ sns .barplot (
280+ data_frames ["gpu" ],
281+ ax = axes [1 ],
282+ x = "gpu_count" ,
283+ y = "value" ,
284+ err_kws = {"color" : "darkred" },
285+ hue = "experiment" ,
286+ hue_order = [
287+ "google/compute-engine/gpu" ,
288+ "on-premises/b/gpu" ,
289+ "google/gke/gpu" ,
290+ "azure/cyclecloud/gpu" ,
291+ "azure/aks/gpu" ,
292+ "aws/eks/gpu" ,
293+ ],
294+ palette = cloud_colors ,
295+ order = [32 , 64 , 128 , 256 ],
296+ )
297+ axes [1 ].set_title ("FOM Overall (GPU)" , fontsize = 14 )
298+ axes [1 ].set_xlabel ("GPU Count" , fontsize = 14 )
299+ axes [1 ].set_ylabel ("" )
300+ if log :
301+ axes [1 ].set_yscale ("log" )
302+
303+ handles , labels = axes [1 ].get_legend_handles_labels ()
304+ labels = ["/" .join (x .split ("/" )[0 :2 ]) for x in labels ]
305+ axes [2 ].legend (
306+ handles , labels , loc = "center left" , bbox_to_anchor = (- 0.1 , 0.5 ), frameon = False
307+ )
308+ for ax in axes [0 :2 ]:
309+ ax .get_legend ().remove ()
310+ axes [2 ].axis ("off" )
311+
312+ plt .tight_layout ()
313+ if log :
314+ plt .savefig (os .path .join (img_outdir , "amg-fom-overall-cpu-gpu-log.svg" ))
315+ else :
316+ plt .savefig (os .path .join (img_outdir , "amg-fom-overall-cpu-gpu.svg" ))
317+ plt .clf ()
318+
319+ # Print the total number of data points
320+ print (f'Total number of CPU datum: { data_frames ["cpu" ].shape [0 ]} ' )
321+ print (f'Total number of GPU datum: { data_frames ["gpu" ].shape [0 ]} ' )
239322
240323
241324if __name__ == "__main__" :
0 commit comments