Add hashing for ParameterConstraints (#3009)

Caner Gocmen · facebook-github-bot · commit 714d996402b3 · 2025-05-28T20:17:30.000-07:00
Summary: Pull Request resolved: #3009 We are adding a hashing method for ParameterConstraints. We will later use this to ensure that a previously generated sharding plan is still valid when we want to reuse it later on. Reviewed By: iamzainhuda Differential Revision: D75553542 fbshipit-source-id: 7192459c9f4f749e82fac844119d3035721877e8
diff --git a/torchrec/distributed/planner/tests/test_types.py b/torchrec/distributed/planner/tests/test_types.py
@@ -14,12 +14,17 @@
 import torch
 from torchrec.distributed.embedding_types import EmbeddingComputeKernel
 
-from torchrec.distributed.planner.types import Shard, ShardingOption
+from torchrec.distributed.planner.types import (
+    ParameterConstraints,
+    Shard,
+    ShardingOption,
+)
 from torchrec.distributed.types import (
     BoundsCheckMode,
     CacheAlgorithm,
     CacheParams,
     DataType,
+    KeyValueParams,
     ShardingType,
 )
 from torchrec.modules.embedding_configs import EmbeddingBagConfig, EmbeddingConfig
@@ -207,3 +212,90 @@ def test_module_pooled_mch_ec(self) -> None:
             shards=[Shard(size=shard_size, offset=offset) for offset in shard_offsets],
         )
         self.assertEqual(sharding_option.is_pooled, False)
+
+
+class TestParameterConstraintsHash(unittest.TestCase):
+
+    def test_hash_equality(self) -> None:
+        # Create two identical instances
+        pc1 = ParameterConstraints(
+            sharding_types=["type1", "type2"],
+            compute_kernels=["kernel1"],
+            min_partition=4,
+            pooling_factors=[1.0, 2.0],
+            num_poolings=[1.0],
+            batch_sizes=[32],
+            is_weighted=True,
+            cache_params=CacheParams(),
+            enforce_hbm=True,
+            stochastic_rounding=False,
+            bounds_check_mode=BoundsCheckMode(1),
+            feature_names=["feature1", "feature2"],
+            output_dtype=DataType.FP32,
+            device_group="cuda",
+            key_value_params=KeyValueParams(),
+        )
+
+        pc2 = ParameterConstraints(
+            sharding_types=["type1", "type2"],
+            compute_kernels=["kernel1"],
+            min_partition=4,
+            pooling_factors=[1.0, 2.0],
+            num_poolings=[1.0],
+            batch_sizes=[32],
+            is_weighted=True,
+            cache_params=CacheParams(),
+            enforce_hbm=True,
+            stochastic_rounding=False,
+            bounds_check_mode=BoundsCheckMode(1),
+            feature_names=["feature1", "feature2"],
+            output_dtype=DataType.FP32,
+            device_group="cuda",
+            key_value_params=KeyValueParams(),
+        )
+
+        self.assertEqual(
+            hash(pc1), hash(pc2), "Hashes should be equal for identical instances"
+        )
+
+    def test_hash_inequality(self) -> None:
+        # Create two different instances
+        pc1 = ParameterConstraints(
+            sharding_types=["type1"],
+            compute_kernels=["kernel1"],
+            min_partition=4,
+            pooling_factors=[1.0],
+            num_poolings=[1.0],
+            batch_sizes=[32],
+            is_weighted=True,
+            cache_params=CacheParams(),
+            enforce_hbm=True,
+            stochastic_rounding=False,
+            bounds_check_mode=BoundsCheckMode(1),
+            feature_names=["feature1"],
+            output_dtype=DataType.FP32,
+            device_group="cuda",
+            key_value_params=KeyValueParams(),
+        )
+
+        pc2 = ParameterConstraints(
+            sharding_types=["type2"],
+            compute_kernels=["kernel2"],
+            min_partition=8,
+            pooling_factors=[2.0],
+            num_poolings=[2.0],
+            batch_sizes=[64],
+            is_weighted=False,
+            cache_params=CacheParams(),
+            enforce_hbm=False,
+            stochastic_rounding=True,
+            bounds_check_mode=BoundsCheckMode(1),
+            feature_names=["feature2"],
+            output_dtype=DataType.FP16,
+            device_group="cpu",
+            key_value_params=KeyValueParams(),
+        )
+
+        self.assertNotEqual(
+            hash(pc1), hash(pc2), "Hashes should be different for different instances"
+        )
diff --git a/torchrec/distributed/planner/types.py b/torchrec/distributed/planner/types.py
@@ -703,6 +703,27 @@ class ParameterConstraints:
     device_group: Optional[str] = None
     key_value_params: Optional[KeyValueParams] = None
 
+    def __hash__(self) -> int:
+        return hash(
+            (
+                tuple(self.sharding_types) if self.sharding_types else None,
+                tuple(self.compute_kernels) if self.compute_kernels else None,
+                self.min_partition,
+                tuple(self.pooling_factors),
+                tuple(self.num_poolings) if self.num_poolings else None,
+                tuple(self.batch_sizes) if self.batch_sizes else None,
+                self.is_weighted,
+                self.cache_params,
+                self.enforce_hbm,
+                self.stochastic_rounding,
+                self.bounds_check_mode,
+                tuple(self.feature_names) if self.feature_names else None,
+                self.output_dtype,
+                self.device_group,
+                self.key_value_params,
+            )
+        )
+
 
 class PlannerErrorType(Enum):
     """