1+ DCGM_FR_UNKNOWN , CONTACT_SUPPORT
2+ DCGM_FR_UNRECOGNIZED , CONTACT_SUPPORT
3+ DCGM_FR_PCI_REPLAY_RATE , CONTACT_SUPPORT
4+ DCGM_FR_VOLATILE_DBE_DETECTED , COMPONENT_RESET
5+ DCGM_FR_VOLATILE_SBE_DETECTED , NONE
6+ DCGM_FR_PENDING_PAGE_RETIREMENTS , NONE
7+ DCGM_FR_RETIRED_PAGES_LIMIT , CONTACT_SUPPORT
8+ DCGM_FR_RETIRED_PAGES_DBE_LIMIT , CONTACT_SUPPORT
9+ DCGM_FR_CORRUPT_INFOROM , COMPONENT_RESET
10+ DCGM_FR_CLOCKS_EVENT_THERMAL , CONTACT_SUPPORT
11+ DCGM_FR_CLOCK_THROTTLE_THERMAL , NONE
12+ DCGM_FR_POWER_UNREADABLE , RESTART_VM
13+ DCGM_FR_CLOCKS_EVENT_POWER , NONE
14+ DCGM_FR_CLOCK_THROTTLE_POWER , NONE
15+ DCGM_FR_NVLINK_ERROR_THRESHOLD , NONE
16+ DCGM_FR_NVLINK_DOWN , RESTART_VM
17+ DCGM_FR_NVSWITCH_FATAL_ERROR , CONTACT_SUPPORT
18+ DCGM_FR_NVSWITCH_NON_FATAL_ERROR , NONE
19+ DCGM_FR_NVSWITCH_DOWN , COMPONENT_RESET
20+ DCGM_FR_NO_ACCESS_TO_FILE , CONTACT_SUPPORT
21+ DCGM_FR_NVML_API , CONTACT_SUPPORT
22+ DCGM_FR_DEVICE_COUNT_MISMATCH , CONTACT_SUPPORT
23+ DCGM_FR_BAD_PARAMETER , CONTACT_SUPPORT
24+ DCGM_FR_CANNOT_OPEN_LIB , CONTACT_SUPPORT
25+ DCGM_FR_DENYLISTED_DRIVER , CONTACT_SUPPORT
26+ DCGM_FR_NVML_LIB_BAD , CONTACT_SUPPORT
27+ DCGM_FR_GRAPHICS_PROCESSES , CONTACT_SUPPORT
28+ DCGM_FR_HOSTENGINE_CONN , CONTACT_SUPPORT
29+ DCGM_FR_FIELD_QUERY , RESTART_VM
30+ DCGM_FR_BAD_CUDA_ENV , CONTACT_SUPPORT
31+ DCGM_FR_PERSISTENCE_MODE , CONTACT_SUPPORT
32+ DCGM_FR_LOW_BANDWIDTH , CONTACT_SUPPORT
33+ DCGM_FR_HIGH_LATENCY , CONTACT_SUPPORT
34+ DCGM_FR_CANNOT_GET_FIELD_TAG , CONTACT_SUPPORT
35+ DCGM_FR_FIELD_VIOLATION , RESTART_VM
36+ DCGM_FR_FIELD_THRESHOLD , RESTART_VM
37+ DCGM_FR_FIELD_VIOLATION_DBL , RESTART_VM
38+ DCGM_FR_FIELD_THRESHOLD_DBL , RESTART_VM
39+ DCGM_FR_UNSUPPORTED_FIELD_TYPE , RESTART_VM
40+ DCGM_FR_FIELD_THRESHOLD_TS , RESTART_VM
41+ DCGM_FR_FIELD_THRESHOLD_TS_DBL , RESTART_VM
42+ DCGM_FR_THERMAL_VIOLATIONS , CONTACT_SUPPORT
43+ DCGM_FR_THERMAL_VIOLATIONS_TS , CONTACT_SUPPORT
44+ DCGM_FR_TEMP_VIOLATION , CONTACT_SUPPORT
45+ DCGM_FR_CLOCKS_EVENT_VIOLATION , RESTART_VM
46+ DCGM_FR_THROTTLING_VIOLATION , RESTART_VM
47+ DCGM_FR_INTERNAL , CONTACT_SUPPORT
48+ DCGM_FR_PCIE_GENERATION , CONTACT_SUPPORT
49+ DCGM_FR_PCIE_WIDTH , CONTACT_SUPPORT
50+ DCGM_FR_ABORTED , NONE
51+ DCGM_FR_TEST_DISABLED , NONE
52+ DCGM_FR_CANNOT_GET_STAT , CONTACT_SUPPORT
53+ DCGM_FR_STRESS_LEVEL , RESTART_VM
54+ DCGM_FR_CUDA_API , CONTACT_SUPPORT
55+ DCGM_FR_FAULTY_MEMORY , CONTACT_SUPPORT
56+ DCGM_FR_CANNOT_SET_WATCHES , RESTART_VM
57+ DCGM_FR_CUDA_UNBOUND , RESTART_VM
58+ DCGM_FR_ECC_DISABLED , CONTACT_SUPPORT
59+ DCGM_FR_MEMORY_ALLOC , RESTART_VM
60+ DCGM_FR_CUDA_DBE , CONTACT_SUPPORT
61+ DCGM_FR_MEMORY_MISMATCH , CONTACT_SUPPORT
62+ DCGM_FR_CUDA_DEVICE , CONTACT_SUPPORT
63+ DCGM_FR_ECC_UNSUPPORTED , CONTACT_SUPPORT
64+ DCGM_FR_ECC_PENDING , RESTART_VM
65+ DCGM_FR_MEMORY_BANDWIDTH , RESTART_VM
66+ DCGM_FR_TARGET_POWER , NONE
67+ DCGM_FR_API_FAIL , RESTART_VM
68+ DCGM_FR_API_FAIL_GPU , RESTART_VM
69+ DCGM_FR_CUDA_CONTEXT , CONTACT_SUPPORT
70+ DCGM_FR_DCGM_API , CONTACT_SUPPORT
71+ DCGM_FR_CONCURRENT_GPUS , CONTACT_SUPPORT
72+ DCGM_FR_TOO_MANY_ERRORS , CONTACT_SUPPORT
73+ DCGM_FR_NVLINK_CRC_ERROR_THRESHOLD , CONTACT_SUPPORT
74+ DCGM_FR_NVLINK_ERROR_CRITICAL , CONTACT_SUPPORT
75+ DCGM_FR_ENFORCED_POWER_LIMIT , CONTACT_SUPPORT
76+ DCGM_FR_MEMORY_ALLOC_HOST , RESTART_VM
77+ DCGM_FR_GPU_OP_MODE , CONTACT_SUPPORT
78+ DCGM_FR_NO_MEMORY_CLOCKS , CONTACT_SUPPORT
79+ DCGM_FR_NO_GRAPHICS_CLOCKS , NONE
80+ DCGM_FR_HAD_TO_RESTORE_STATE , RESTART_VM
81+ DCGM_FR_L1TAG_UNSUPPORTED , CONTACT_SUPPORT
82+ DCGM_FR_L1TAG_MISCOMPARE , CONTACT_SUPPORT
83+ DCGM_FR_ROW_REMAP_FAILURE , CONTACT_SUPPORT
84+ DCGM_FR_UNCONTAINED_ERROR , RESTART_VM
85+ DCGM_FR_EMPTY_GPU_LIST , CONTACT_SUPPORT
86+ DCGM_FR_DBE_PENDING_PAGE_RETIREMENTS , RESTART_VM
87+ DCGM_FR_UNCORRECTABLE_ROW_REMAP , NONE
88+ DCGM_FR_PENDING_ROW_REMAP , COMPONENT_RESET
89+ DCGM_FR_BROKEN_P2P_MEMORY_DEVICE , CONTACT_SUPPORT
90+ DCGM_FR_BROKEN_P2P_WRITER_DEVICE , CONTACT_SUPPORT
91+ DCGM_FR_NVSWITCH_NVLINK_DOWN , CONTACT_SUPPORT
92+ DCGM_FR_EUD_BINARY_PERMISSIONS , CONTACT_SUPPORT
93+ DCGM_FR_EUD_NON_ROOT_USER , CONTACT_SUPPORT
94+ DCGM_FR_EUD_SPAWN_FAILURE , CONTACT_SUPPORT
95+ DCGM_FR_EUD_TIMEOUT , CONTACT_SUPPORT
96+ DCGM_FR_EUD_ZOMBIE , CONTACT_SUPPORT
97+ DCGM_FR_EUD_NON_ZERO_EXIT_CODE , CONTACT_SUPPORT
98+ DCGM_FR_EUD_TEST_FAILED , CONTACT_SUPPORT
99+ DCGM_FR_FILE_CREATE_PERMISSIONS , CONTACT_SUPPORT
100+ DCGM_FR_PAUSE_RESUME_FAILED , CONTACT_SUPPORT
101+ DCGM_FR_PCIE_H_REPLAY_VIOLATION , CONTACT_SUPPORT
102+ DCGM_FR_GPU_EXPECTED_NVLINKS_UP , CONTACT_SUPPORT
103+ DCGM_FR_NVSWITCH_EXPECTED_NVLINKS_UP , CONTACT_SUPPORT
104+ DCGM_FR_XID_ERROR , NONE
105+ DCGM_FR_SBE_VIOLATION , CONTACT_SUPPORT
106+ DCGM_FR_DBE_VIOLATION , CONTACT_SUPPORT
107+ DCGM_FR_PCIE_REPLAY_VIOLATION , CONTACT_SUPPORT
108+ DCGM_FR_SBE_THRESHOLD_VIOLATION , CONTACT_SUPPORT
109+ DCGM_FR_DBE_THRESHOLD_VIOLATION , CONTACT_SUPPORT
110+ DCGM_FR_PCIE_REPLAY_THRESHOLD_VIOLATION , CONTACT_SUPPORT
111+ DCGM_FR_CUDA_FM_NOT_INITIALIZED , CONTACT_SUPPORT
112+ DCGM_FR_SXID_ERROR , RESTART_VM
113+ DCGM_FR_GFLOPS_THRESHOLD_VIOLATION , CONTACT_SUPPORT
114+ DCGM_FR_NAN_VALUE , CONTACT_SUPPORT
115+ DCGM_FR_FABRIC_MANAGER_TRAINING_ERROR , CONTACT_SUPPORT
116+ DCGM_FR_BROKEN_P2P_PCIE_MEMORY_DEVICE , CONTACT_SUPPORT
117+ DCGM_FR_BROKEN_P2P_PCIE_WRITER_DEVICE , CONTACT_SUPPORT
118+ DCGM_FR_BROKEN_P2P_NVLINK_MEMORY_DEVICE , CONTACT_SUPPORT
119+ DCGM_FR_BROKEN_P2P_NVLINK_WRITER_DEVICE , CONTACT_SUPPORT
120+ DCGM_FR_TEST_SKIPPED , NONE
121+ DCGM_FR_ERROR_SENTINEL , NONE
0 commit comments