support MTIA in DMPCollection (#3100)

Guangyu Wang · facebook-github-bot · commit 998f96f00dde · 2025-06-16T10:54:46.000-07:00
Summary: Pull Request resolved: #3100 tsia Reviewed By: jvandebon Differential Revision: D76608091 fbshipit-source-id: 579f97c614bd2997150a7fdc315c191341c9056a
diff --git a/torchrec/distributed/model_parallel.py b/torchrec/distributed/model_parallel.py
@@ -776,7 +776,9 @@ def __init__(
         use_inter_host_allreduce: bool = False,
         custom_all_reduce: Optional[Callable[[List[torch.Tensor]], None]] = None,
     ) -> None:
-        assert device.type == "cuda", "DMPCollection only supports CUDA"
+        assert (
+            device.type == "cuda" or device.type == "mtia"
+        ), "DMPCollection only supports CUDA or MTIA"
         self._device = device
         self._pg: dist.ProcessGroup = global_pg
         self._plan: ShardingPlan = plan
@@ -1013,7 +1015,7 @@ def _remap_sharding_plan(
                             else:
                                 shard_rank = shard.placement._rank * step + group_start
                             shard.placement = _remote_device(
-                                f"rank:{shard_rank}/cuda:{shard_rank % get_local_size()}"
+                                f"rank:{shard_rank}/{self._device.type}:{shard_rank % get_local_size()}"
                             )
         return