From cbe9c045bc7bbe9a4e4725d3304c6ee3e9dfbd9e Mon Sep 17 00:00:00 2001
From: sunway513 <peng.sun@amd.com>
Date: Thu, 28 Jan 2021 20:47:49 +0000
Subject: [PATCH 1/2] add more operator in the parser

---
 rccl_nccl_parser.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/rccl_nccl_parser.py b/rccl_nccl_parser.py
index e59c02d..3403965 100644
--- a/rccl_nccl_parser.py
+++ b/rccl_nccl_parser.py
@@ -3,8 +3,15 @@
 import argparse
 
 coll_op_map = {
-            "AllReduce": "all_reduce_perf",
             "Broadcast": "broadcast_perf",
+            "Reduce": "reduce_perf",
+            "AllGather": "all_gather_perf",
+            "ReduceScatter": "reduce_scatter_perf",
+            "AllReduce": "all_reduce_perf",
+            "Gather": "gather_perf",
+            "Scatter": "scatter_perf",
+            "AllToAll": "alltoall_perf",
+            "AllToAllv": "alltoallv_perf",
           }
 
 reduction_op_map = {

From 69e8b071857cfa4d6b62d0b05bf2c0b58f7c5413 Mon Sep 17 00:00:00 2001
From: Jithun Nair <jithun.nair@amd.com>
Date: Tue, 9 Feb 2021 04:24:18 +0000
Subject: [PATCH 2/2] Add support for parsing more collectives. Only AllToAllv
 is not enabled because it has a counts array argument instead of a numeric
 count argument, so one cannot reconstruct a rccl-tests command

---
 generate_summary.py | 26 +++++++++++++++++++++++---
 rccl_nccl_parser.py | 16 +++++++++-------
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/generate_summary.py b/generate_summary.py
index a66f094..6412527 100644
--- a/generate_summary.py
+++ b/generate_summary.py
@@ -1,6 +1,7 @@
 import os
 import sys
 import argparse
+import re
 
 def get_script_commands(script_file):
     fs = open(script_file, 'r')
@@ -35,18 +36,37 @@ def parse_nccl_performance(useful_lines, commands):
     
     perf_lines = []
     perf_lines.append("sep=|")
-    perf_lines.append("size|count|type|redop|time-oplace(us)|algbw(gb/s)-oplace|busbw(gb/s)-oplace|error|" + \
-                        "time-iplace(us)|algbw(gb/s)-iplace|busbw(gb/s)-iplace|error|avg_bus_bw|commands")
+    header = "size|count|type|redop|root|time-oplace(us)|algbw(gb/s)-oplace|busbw(gb/s)-oplace|error|" + \
+             "time-iplace(us)|algbw(gb/s)-iplace|busbw(gb/s)-iplace|error|avg_bus_bw|commands"
+    #print(header)
+    num_fields = len(header.split("|"))
+    perf_lines.append(header)
     for j in range(len(useful_lines)):
         line = useful_lines[j]
         line = line.replace("# Avg bus bandwidth    : ", "")
         
         split_list = line.split()
         perf_line = ""
+        field_index = 0
         for i in range(len(split_list)):
             perf_line = perf_line + split_list[i] + "|"
+            # Some collectives do not involve a redop
+            if field_index==2 and "reduce" not in commands[j].lower():
+                perf_line = perf_line + "|"
+                field_index = field_index + 1
+            # Only broadcast and reduce involve a root
+            if (
+               field_index==3 and
+               re.search(r'\Wreduce_perf', commands[j]) is None and
+               re.search(r'\Wbroadcast_perf', commands[j]) is None
+            ):
+                perf_line = perf_line + "|"
+                field_index = field_index + 1
+            field_index = field_index + 1
         #print (perf_line + commands[j])
-        perf_lines.append(perf_line + commands[j])
+        perf_line = perf_line + commands[j]
+        assert len(perf_line.split("|")) == num_fields
+        perf_lines.append(perf_line)
 
     return perf_lines
 
diff --git a/rccl_nccl_parser.py b/rccl_nccl_parser.py
index 3403965..6d9689a 100644
--- a/rccl_nccl_parser.py
+++ b/rccl_nccl_parser.py
@@ -11,7 +11,9 @@
             "Gather": "gather_perf",
             "Scatter": "scatter_perf",
             "AllToAll": "alltoall_perf",
-            "AllToAllv": "alltoallv_perf",
+#            "AllToAllv": "alltoallv_perf",
+            "Send": "sendrecv_perf",
+            "Recv": "sendrecv_perf",
           }
 
 reduction_op_map = {
@@ -69,12 +71,12 @@ def parse_nccl_log(nccl_lines):
     for j in range(len(nccl_lines)):
         line = nccl_lines[j]
         split_list = line.split(" ")
-        comm = split_list[4].replace(":", "")
-        count = split_list[12]
-        datatype = split_list[14]
-        op_type = split_list[16]
-        root = split_list[18]
-        nnranks = split_list[21].split("=")[1].replace("]", "")
+        comm = split_list[split_list.index("INFO") + 1].replace(":", "")
+        count = split_list[split_list.index("count") + 1]
+        datatype = split_list[split_list.index("datatype") + 1]
+        op_type = split_list[split_list.index("op") + 1]
+        root = split_list[split_list.index("root") + 1]
+        nnranks = next(item for item in split_list if 'nranks' in item).split("=")[1].replace("]", "")
 
         #print (comm)
         #print (count)