Skip to content

Commit

Permalink
Allow nsys 2024.7 installation.
Browse files Browse the repository at this point in the history
  • Loading branch information
olupton committed Jan 10, 2025
1 parent 9dd32f5 commit dd2c902
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .github/container/install-nsight.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export DEBIAN_FRONTEND=noninteractive
export TZ=America/Los_Angeles

apt-get update
apt-get install -y nsight-compute nsight-systems-cli-2024.6.1
apt-get install -y nsight-compute nsight-systems-cli
apt-get clean

rm -rf /var/lib/apt/lists/*
28 changes: 17 additions & 11 deletions .github/container/nsys_jax/nsys_jax/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,23 @@ def _load_nvtx_gpu_proj_trace_single(
"Stack Level": "Lvl",
"TID": "TID",
}
if set(df.columns) == alt_rename_map.keys():
tsl_prefix = ""
df = df.rename(
columns={k: v for k, v in alt_rename_map.items() if v is not None}
)
df["ProjDurMs"] = 1e-6 * (df.pop("End") - df["Start"])
df["ProjStartMs"] = 1e-6 * df.pop("Start")
df["RangeStack"] = df["RangeStack"].map(
lambda stack: ":" + ":".join(map(str, stack))
)
# TODO: add OrigDurMs, OrigStartMs
alt_rename_map_2024_7 = alt_rename_map | {
"Device ID": "DeviceId",
"Stream ID": "StreamId",
}
for rename_map in [alt_rename_map, alt_rename_map_2024_7]:
if set(df.columns) == rename_map.keys():
tsl_prefix = ""
df = df.rename(
columns={k: v for k, v in rename_map.items() if v is not None}
)
df["ProjDurMs"] = 1e-6 * (df.pop("End") - df["Start"])
df["ProjStartMs"] = 1e-6 * df.pop("Start")
df["RangeStack"] = df["RangeStack"].map(
lambda stack: ":" + ":".join(map(str, stack))
)
# TODO: add OrigDurMs, OrigStartMs
break
else:
tsl_prefix = "TSL:"
df = df.drop(columns=["Style"])
Expand Down
116 changes: 114 additions & 2 deletions .github/container/nsys_jax/nsys_jax/scripts/patch_nsys.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import shutil
import subprocess

patch_content = r"""diff --git a/nsys_recipe/lib/nvtx.py b/nsys_recipe/lib/nvtx.py
nsys_2024_5_and_6_patch_content = r"""diff --git a/nsys_recipe/lib/nvtx.py b/nsys_recipe/lib/nvtx.py
index 2470043..7abf892 100644
--- a/nsys_recipe/lib/nvtx.py
+++ b/nsys_recipe/lib/nvtx.py
Expand All @@ -29,6 +29,111 @@
"rangeId": "Range ID",
"""

nsys_2024_7_patch_content = r'''diff --git a/nsys_recipe/lib/nvtx.py b/nsys_recipe/lib/nvtx.py
index 1e958f8..d08bb99 100644
--- a/nsys_recipe/lib/nvtx.py
+++ b/nsys_recipe/lib/nvtx.py
@@ -162,7 +162,7 @@ def _compute_gpu_projection_df(nvtx_df, cuda_df, cuda_nvtx_index_map):
starts.append(start)
ends.append(end)
- return (
+ df = (
pd.DataFrame(
{
"text": nvtx_df.loc[list(nvtx_gpu_end_dict.keys()) + indices, "text"],
@@ -172,11 +172,44 @@ def _compute_gpu_projection_df(nvtx_df, cuda_df, cuda_nvtx_index_map):
"tid": nvtx_df.loc[list(nvtx_gpu_end_dict.keys()) + indices, "tid"],
}
)
- .sort_values(by=["start", "end"], ascending=[True, False])
+ ).reset_index()
+
+ return (
+ df.sort_values(by=["start", "end", "index"], ascending=[True, False, True])
+ .drop(columns=["index"])
.reset_index(drop=True)
)
+def _compute_grouped_gpu_projection_df(
+ nvtx_df, cuda_df, cuda_nvtx_index_map, per_gpu=False, per_stream=False
+):
+ group_by_elements = []
+ if per_stream:
+ group_by_elements.append("streamId")
+ if per_gpu:
+ group_by_elements.append("deviceId")
+
+ if not group_by_elements:
+ return _compute_gpu_projection_df(nvtx_df, cuda_df, cuda_nvtx_index_map)
+
+ dfs = []
+ cuda_gdf = cuda_df.groupby(group_by_elements)
+
+ for group_keys, cuda_group_df in cuda_gdf:
+ df = _compute_gpu_projection_df(nvtx_df, cuda_group_df, cuda_nvtx_index_map)
+ if df.empty:
+ continue
+
+ if per_stream:
+ df["streamId"] = group_keys[group_by_elements.index("streamId")]
+ if per_gpu:
+ df["deviceId"] = group_keys[group_by_elements.index("deviceId")]
+ dfs.append(df)
+
+ return pd.concat(dfs, ignore_index=True)
+
+
def _find_cuda_nvtx_ranges(nvtx_df, cuda_df):
"""Find the NVTX ranges that enclose each CUDA operation.
@@ -258,8 +291,8 @@ def project_nvtx_onto_gpu(nvtx_df, cuda_df):
cuda_tid_df = cuda_gdf.get_group(global_tid)
cuda_nvtx_index_map = _find_cuda_nvtx_ranges(nvtx_tid_df, cuda_tid_df)
- df = _compute_gpu_projection_df(
- filtered_nvtx_df, cuda_tid_df, cuda_nvtx_index_map
+ df = _compute_grouped_gpu_projection_df(
+ filtered_nvtx_df, cuda_tid_df, cuda_nvtx_index_map, True, True
)
dfs.append(df)
diff --git a/nsys_recipe/lib/table_config.py b/nsys_recipe/lib/table_config.py
index e412c4f..db9449e 100644
--- a/nsys_recipe/lib/table_config.py
+++ b/nsys_recipe/lib/table_config.py
@@ -48,6 +48,7 @@ def get_cuda_gpu_dict():
"deviceId",
"contextId",
"greenContextId",
+ "streamId",
],
"CUPTI_ACTIVITY_KIND_MEMSET": [
"correlationId",
@@ -57,6 +58,7 @@ def get_cuda_gpu_dict():
"deviceId",
"contextId",
"greenContextId",
+ "streamId",
],
}
diff --git a/nsys_recipe/recipes/nvtx_gpu_proj_trace/nvtx_gpu_proj_trace.py b/nsys_recipe/recipes/nvtx_gpu_proj_trace/nvtx_gpu_proj_trace.py
index 2f05d50..e52dabe 100644
--- a/nsys_recipe/recipes/nvtx_gpu_proj_trace/nvtx_gpu_proj_trace.py
+++ b/nsys_recipe/recipes/nvtx_gpu_proj_trace/nvtx_gpu_proj_trace.py
@@ -107,6 +107,8 @@ class NvtxGpuProjTrace(recipe.Recipe):
"rangeId": "Range ID",
"parentId": "Parent ID",
"rangeStack": "Range Stack",
+ "deviceId": "Device ID",
+ "streamId": "Stream ID",
}
proj_nvtx_df = proj_nvtx_df.rename(columns=name_dict)[name_dict.values()]
'''


def main():
"""
Expand All @@ -41,7 +146,14 @@ def main():
r"^NVIDIA Nsight Systems version (\d+\.\d+\.\d+)\.\d+-\d+v\d+$", nsys_version
)
assert m is not None, f"Could not parse: {nsys_version}"
if m.group(1) in {"2024.5.1", "2024.6.1"}:
match m.group(1):
case "2024.5.1" | "2024.6.1":
patch_content = nsys_2024_5_and_6_patch_content
case "2024.7.1":
patch_content = nsys_2024_7_patch_content
case _:
patch_content = None
if patch_content is not None:
print(f"Patching Nsight Systems version {m.group(1)}")
# e.g. /opt/nvidia/nsight-systems-cli/2024.7.1/target-linux-x64
tdir = os.path.dirname(os.path.realpath(nsys))
Expand Down

0 comments on commit dd2c902

Please sign in to comment.