55
55
56
56
@dataclass
57
57
class RunOptions :
58
+ """
59
+ Configuration options for running sparse neural network benchmarks.
60
+
61
+ This class defines the parameters that control how the benchmark is executed,
62
+ including distributed training settings, batch configuration, and profiling options.
63
+
64
+ Args:
65
+ world_size (int): Number of processes/GPUs to use for distributed training.
66
+ Default is 2.
67
+ num_batches (int): Number of batches to process during the benchmark.
68
+ Default is 10.
69
+ sharding_type (ShardingType): Strategy for sharding embedding tables across devices.
70
+ Default is ShardingType.TABLE_WISE (entire tables are placed on single devices).
71
+ input_type (str): Type of input format to use for the model.
72
+ Default is "kjt" (KeyedJaggedTensor).
73
+ profile (str): Directory to save profiling results. If empty, profiling is disabled.
74
+ Default is "" (disabled).
75
+ """
76
+
58
77
world_size : int = 2
59
78
num_batches : int = 10
60
79
sharding_type : ShardingType = ShardingType .TABLE_WISE
@@ -64,6 +83,22 @@ class RunOptions:
64
83
65
84
@dataclass
66
85
class EmbeddingTablesConfig :
86
+ """
87
+ Configuration for embedding tables used in sparse neural network benchmarks.
88
+
89
+ This class defines the parameters for generating embedding tables with both weighted
90
+ and unweighted features. It provides a method to generate the actual embedding bag
91
+ configurations that can be used to create embedding tables.
92
+
93
+ Args:
94
+ num_unweighted_features (int): Number of unweighted features to generate.
95
+ Default is 100.
96
+ num_weighted_features (int): Number of weighted features to generate.
97
+ Default is 100.
98
+ embedding_feature_dim (int): Dimension of the embedding vectors.
99
+ Default is 512.
100
+ """
101
+
67
102
num_unweighted_features : int = 100
68
103
num_weighted_features : int = 100
69
104
embedding_feature_dim : int = 512
@@ -74,6 +109,21 @@ def generate_tables(
74
109
List [EmbeddingBagConfig ],
75
110
List [EmbeddingBagConfig ],
76
111
]:
112
+ """
113
+ Generate embedding bag configurations for both unweighted and weighted features.
114
+
115
+ This method creates two lists of EmbeddingBagConfig objects:
116
+ 1. Unweighted tables: Named as "table_{i}" with feature names "feature_{i}"
117
+ 2. Weighted tables: Named as "weighted_table_{i}" with feature names "weighted_feature_{i}"
118
+
119
+ For both types, the number of embeddings scales with the feature index,
120
+ calculated as max(i + 1, 100) * 1000.
121
+
122
+ Returns:
123
+ Tuple[List[EmbeddingBagConfig], List[EmbeddingBagConfig]]: A tuple containing
124
+ two lists - the first for unweighted embedding tables and the second for
125
+ weighted embedding tables.
126
+ """
77
127
tables = [
78
128
EmbeddingBagConfig (
79
129
num_embeddings = max (i + 1 , 100 ) * 1000 ,
@@ -97,12 +147,50 @@ def generate_tables(
97
147
98
148
@dataclass
99
149
class PipelineConfig :
150
+ """
151
+ Configuration for training pipelines used in sparse neural network benchmarks.
152
+
153
+ This class defines the parameters for configuring the training pipeline and provides
154
+ a method to generate the appropriate pipeline instance based on the configuration.
155
+
156
+ Args:
157
+ pipeline (str): The type of training pipeline to use. Options include:
158
+ - "base": Basic training pipeline
159
+ - "sparse": Pipeline optimized for sparse operations
160
+ - "fused": Pipeline with fused sparse distribution
161
+ - "semi": Semi-synchronous training pipeline
162
+ - "prefetch": Pipeline with prefetching for sparse distribution
163
+ Default is "base".
164
+ emb_lookup_stream (str): The stream to use for embedding lookups.
165
+ Only used by certain pipeline types (e.g., "fused").
166
+ Default is "data_dist".
167
+ """
168
+
100
169
pipeline : str = "base"
101
170
emb_lookup_stream : str = "data_dist"
102
171
103
172
def generate_pipeline (
104
173
self , model : nn .Module , opt : torch .optim .Optimizer , device : torch .device
105
174
) -> Union [TrainPipelineBase , TrainPipelineSparseDist ]:
175
+ """
176
+ Generate a training pipeline instance based on the configuration.
177
+
178
+ This method creates and returns the appropriate training pipeline object
179
+ based on the pipeline type specified in the configuration. Different
180
+ pipeline types are optimized for different training scenarios.
181
+
182
+ Args:
183
+ model (nn.Module): The model to be trained.
184
+ opt (torch.optim.Optimizer): The optimizer to use for training.
185
+ device (torch.device): The device to run the training on.
186
+
187
+ Returns:
188
+ Union[TrainPipelineBase, TrainPipelineSparseDist]: An instance of the
189
+ appropriate training pipeline class based on the configuration.
190
+
191
+ Raises:
192
+ RuntimeError: If an unknown pipeline type is specified.
193
+ """
106
194
_pipeline_cls : Dict [
107
195
str , Type [Union [TrainPipelineBase , TrainPipelineSparseDist ]]
108
196
] = {
@@ -228,6 +316,10 @@ def runner(
228
316
input_config : TestSparseNNInputConfig ,
229
317
pipeline_config : PipelineConfig ,
230
318
) -> None :
319
+ # Ensure GPUs are available and we have enough of them
320
+ assert (
321
+ torch .cuda .is_available () and torch .cuda .device_count () >= world_size
322
+ ), "CUDA not available or insufficient GPUs for the requested world_size"
231
323
232
324
torch .autograd .set_detect_anomaly (True )
233
325
with MultiProcessContext (
0 commit comments