enable feature score auto collection in EBC (#3475)

emlin · meta-codesync[bot] · commit f7c74e417f44 · 2025-11-06T00:11:20.000-08:00
Summary: X-link: pytorch/FBGEMM#5031 Pull Request resolved: #3475 X-link: https://github.com/facebookresearch/FBGEMM/pull/2044 Enable feature score auto collection for EBC in the similar way of EC. The configuration has no difference in embedding table config: virtual_table_eviction_policy=FeatureScoreBasedEvictionPolicy( training_id_eviction_trigger_count=260_000_000, # 260M training_id_keep_count=160_000_000, # 160M enable_auto_feature_score_collection=True, feature_score_mapping={ "sparse_public_original_content_creator": 1.0, }, feature_score_default_value=0.5, ), Reviewed By: EddyLXJ Differential Revision: D85017179 fbshipit-source-id: 3d62f8adbe201d6e30c445aaed88710bbbcd6557
diff --git a/torchrec/distributed/batched_embedding_kernel.py b/torchrec/distributed/batched_embedding_kernel.py
@@ -326,6 +326,8 @@ def _populate_zero_collision_tbe_params(
         meta_header_lens[i] = table.virtual_table_eviction_policy.get_meta_header_len()
         if not isinstance(table.virtual_table_eviction_policy, NoEvictionPolicy):
             enabled = True
+
+    fs_eviction_enabled: bool = False
     if enabled:
         counter_thresholds = [0] * len(config.embedding_tables)
         ttls_in_mins = [0] * len(config.embedding_tables)
@@ -384,6 +386,7 @@ def _populate_zero_collision_tbe_params(
                         raise ValueError(
                             f"Do not support multiple eviction strategy in one tbe {eviction_strategy} and 5 for tables {table_names}"
                         )
+                    fs_eviction_enabled = True
                 elif isinstance(policy_t, TimestampBasedEvictionPolicy):
                     training_id_eviction_trigger_count[i] = (
                         policy_t.training_id_eviction_trigger_count
@@ -459,6 +462,7 @@ def _populate_zero_collision_tbe_params(
         backend_return_whole_row=(backend_type == BackendType.DRAM),
         eviction_policy=eviction_policy,
         embedding_cache_mode=embedding_cache_mode_,
+        feature_score_collection_enabled=fs_eviction_enabled,
     )
 
 
@@ -2901,6 +2905,7 @@ def __init__(
         _populate_zero_collision_tbe_params(
             ssd_tbe_params, self._bucket_spec, config, backend_type
         )
+        self._kv_zch_params: KVZCHParams = ssd_tbe_params["kv_zch_params"]
         compute_kernel = config.embedding_tables[0].compute_kernel
         embedding_location = compute_kernel_to_embedding_location(compute_kernel)
 
@@ -3185,7 +3190,40 @@ def forward(self, features: KeyedJaggedTensor) -> torch.Tensor:
         self._split_weights_res = None
         self._optim.set_sharded_embedding_weight_ids(sharded_embedding_weight_ids=None)
 
-        return super().forward(features)
+        weights = features.weights_or_none()
+        per_sample_weights = None
+        score_weights = None
+        if weights is not None and weights.dtype == torch.float64:
+            fp32_weights = weights.view(torch.float32)
+            per_sample_weights = fp32_weights[:, 0]
+            score_weights = fp32_weights[:, 1]
+        elif weights is not None and weights.dtype == torch.float32:
+            if self._kv_zch_params.feature_score_collection_enabled:
+                score_weights = weights.view(-1)
+            else:
+                per_sample_weights = weights.view(-1)
+        if features.variable_stride_per_key() and isinstance(
+            self.emb_module,
+            (
+                SplitTableBatchedEmbeddingBagsCodegen,
+                DenseTableBatchedEmbeddingBagsCodegen,
+                SSDTableBatchedEmbeddingBags,
+            ),
+        ):
+            return self.emb_module(
+                indices=features.values().long(),
+                offsets=features.offsets().long(),
+                weights=score_weights,
+                per_sample_weights=per_sample_weights,
+                batch_size_per_feature_per_rank=features.stride_per_key_per_rank(),
+            )
+        else:
+            return self.emb_module(
+                indices=features.values().long(),
+                offsets=features.offsets().long(),
+                weights=score_weights,
+                per_sample_weights=per_sample_weights,
+            )
 
 
 class BatchedFusedEmbeddingBag(
diff --git a/torchrec/distributed/embedding_lookup.py b/torchrec/distributed/embedding_lookup.py
@@ -66,6 +66,7 @@
     QuantBatchedEmbeddingBag,
 )
 from torchrec.distributed.types import rank_device, ShardedTensor, ShardingType
+from torchrec.modules.embedding_configs import FeatureScoreBasedEvictionPolicy
 from torchrec.sparse.jagged_tensor import KeyedJaggedTensor
 
 logger: logging.Logger = logging.getLogger(__name__)
@@ -515,6 +516,23 @@ def __init__(
     ) -> None:
         super().__init__()
         self._emb_modules: nn.ModuleList = nn.ModuleList()
+        self._feature_score_auto_collections: List[bool] = []
+        for config in grouped_configs:
+            collection = False
+            for table in config.embedding_tables:
+                if table.use_virtual_table and isinstance(
+                    table.virtual_table_eviction_policy, FeatureScoreBasedEvictionPolicy
+                ):
+                    if (
+                        table.virtual_table_eviction_policy.enable_auto_feature_score_collection
+                    ):
+                        collection = True
+            self._feature_score_auto_collections.append(collection)
+
+        logger.info(
+            f"GroupedPooledEmbeddingsLookup: {self._feature_score_auto_collections=}"
+        )
+
         for config in grouped_configs:
             self._emb_modules.append(
                 self._create_embedding_kernel(config, device, pg, sharding_type)
@@ -692,8 +710,11 @@ def forward(
             features_by_group = sparse_features.split(
                 self._feature_splits,
             )
-            for config, emb_op, features in zip(
-                self.grouped_configs, self._emb_modules, features_by_group
+            for config, emb_op, features, fs_auto_collection in zip(
+                self.grouped_configs,
+                self._emb_modules,
+                features_by_group,
+                self._feature_score_auto_collections,
             ):
                 if (
                     config.has_feature_processor
@@ -703,9 +724,19 @@ def forward(
                     features = self._feature_processor(features)
 
                 if config.is_weighted:
-                    features._weights = CommOpGradientScaling.apply(
+                    feature_weights = CommOpGradientScaling.apply(
                         features._weights, self._scale_gradient_factor
-                    )
+                    ).float()
+
+                    if fs_auto_collection and features.weights_or_none() is not None:
+                        score_weights = features.weights().float()
+                        assert (
+                            feature_weights.numel() == score_weights.numel()
+                        ), f"feature_weights.numel() {feature_weights.numel()} != score_weights.numel() {score_weights.numel()}"
+                        cat_weights = torch.cat(
+                            [feature_weights, score_weights], dim=1
+                        ).view(torch.float64)
+                        features._weights = cat_weights
 
                 lookup = emb_op(features)
                 embeddings.append(lookup)
diff --git a/torchrec/distributed/embeddingbag.py b/torchrec/distributed/embeddingbag.py
@@ -51,6 +51,10 @@
     KJTList,
     ShardedEmbeddingModule,
 )
+from torchrec.distributed.feature_score_utils import (
+    create_sharding_type_to_feature_score_mapping,
+    may_collect_feature_scores,
+)
 from torchrec.distributed.fused_params import (
     FUSED_PARAM_IS_SSD_TABLE,
     FUSED_PARAM_SSD_TABLE_LIST,
@@ -565,6 +569,24 @@ def __init__(
         # forward pass flow control
         self._has_uninitialized_input_dist: bool = True
         self._has_features_permute: bool = True
+
+        self._enable_feature_score_weight_accumulation: bool = False
+        self._enabled_feature_score_auto_collection: bool = False
+        self._sharding_type_feature_score_mapping: Dict[str, Dict[str, float]] = {}
+        (
+            self._enable_feature_score_weight_accumulation,
+            self._enabled_feature_score_auto_collection,
+            self._sharding_type_feature_score_mapping,
+        ) = create_sharding_type_to_feature_score_mapping(
+            self._embedding_bag_configs, self.sharding_type_to_sharding_infos
+        )
+
+        logger.info(
+            f"EBC feature score weight accumulation enabled: {self._enable_feature_score_weight_accumulation}, "
+            f"auto collection enabled: {self._enabled_feature_score_auto_collection}, "
+            f"sharding type to feature score mapping: {self._sharding_type_feature_score_mapping}"
+        )
+
         # Get all fused optimizers and combine them.
         optims = []
         for lookup in self._lookups:
@@ -1565,6 +1587,11 @@ def input_dist(
             features_by_shards = features.split(
                 self._feature_splits,
             )
+            features_by_shards = may_collect_feature_scores(
+                features_by_shards,
+                self._enabled_feature_score_auto_collection,
+                self._sharding_type_feature_score_mapping,
+            )
             awaitables = []
             for input_dist, features_by_shard, sharding_type in zip(
                 self._input_dists,
diff --git a/torchrec/distributed/feature_score_utils.py b/torchrec/distributed/feature_score_utils.py
@@ -17,7 +17,7 @@
 from torchrec.distributed.embedding_types import ShardingType
 
 from torchrec.modules.embedding_configs import (
-    EmbeddingConfig,
+    BaseEmbeddingConfig,
     FeatureScoreBasedEvictionPolicy,
 )
 from torchrec.sparse.jagged_tensor import KeyedJaggedTensor
@@ -26,7 +26,7 @@
 
 
 def create_sharding_type_to_feature_score_mapping(
-    embedding_configs: Sequence[EmbeddingConfig],
+    embedding_configs: Sequence[BaseEmbeddingConfig],
     sharding_type_to_sharding_infos: Dict[str, List[EmbeddingShardingInfo]],
 ) -> Tuple[bool, bool, Dict[str, Dict[str, float]]]:
     enable_feature_score_weight_accumulation = False
diff --git a/torchrec/distributed/tests/test_feature_score_utils.py b/torchrec/distributed/tests/test_feature_score_utils.py
diff --git a/torchrec/distributed/tests/test_sequence_model_parallel.py b/torchrec/distributed/tests/test_sequence_model_parallel.py