Merge branch 'pytorch:master' into generator

iwknow · web-flow · commit 60b408d4fedd · 2025-10-29T10:49:33.000-07:00
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -42,7 +42,14 @@ jobs:
     with:
       dev-image: us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.12_tpuvm
       timeout-minutes: 45  # Takes ~20m as of 2025/5/30.
-      has_code_changes: ${{ needs.check_code_changes.outputs.has_code_changes }}
+      # We should build PyTorch and PyTorch/XLA if:
+      #   1. There are code changes.
+      #   2. This is a `push` event to `master` or release branches.
+      #
+      # The reason for (2) is that `push-docs` job below is run precisely on (2) condition.
+      # In order for it (the `push-docs` job) to be successful, it needs to install the PyTorch
+      # and PyTorch/XLA wheels. Therefore, we need to build their wheels by running this job.
+      has_code_changes: ${{ (needs.check_code_changes.outputs.has_code_changes == 'true' || github.event_name == 'push') && 'true' || 'false' }}
       runner: linux.24xlarge
     secrets:
       gcloud-service-key: ${{ secrets.GCLOUD_SERVICE_KEY }}
diff --git a/README.md b/README.md
@@ -1,5 +1,9 @@
 # PyTorch/XLA
 
+> [!NOTE]
+> <b>10/2025</b>: Based on community feedback, we have proposed a more native direction for PyTorch on TPU. Read the RFC and comment at [#9684](https://github.com/pytorch/xla/issues/9684).
+>
+
 <b>Current CI status:</b>  ![GitHub Actions
 status](https://github.com/pytorch/xla/actions/workflows/build_and_test.yml/badge.svg)
 
diff --git a/test/stablehlo/test_stablehlo_custom_call.py b/test/stablehlo/test_stablehlo_custom_call.py
@@ -1,3 +1,4 @@
+import expecttest
 import sys
 import re
 import unittest
@@ -16,7 +17,7 @@
 m = Library("my_custom_library", "DEF")
 
 
-class StableHLOCustomCallExportTest(unittest.TestCase):
+class StableHLOCustomCallExportTest(expecttest.TestCase):
 
   def test_single_output(self):
 
diff --git a/test/test_ops_error_message.py b/test/test_ops_error_message.py
@@ -1,3 +1,4 @@
+from typing import Callable
 import expecttest
 import os
 import torch
@@ -357,6 +358,56 @@ def gen_test_fn(kernel_size=[2, 2, 2], stride=[], padding=[0]):
         expect="""avg_pool3d(): expected argument padding [1, 2] (size: 2) to have size of 3."""
     )
 
+  def _get_custom_call_properties(self, mode):
+    match mode:
+      case "tpu":
+        return (torch_xla._XLAC._xla_tpu_custom_call, "", [])
+      case "stablehlo":
+        return (torch_xla._XLAC._xla_custom_call, "custom_op_target",
+                [False, "", 0, {}])
+
+    self.fail(f"expected `mode` ({mode}) to be either of ['tpu', 'stablehlo'].")
+
+  def _gen_custom_call_no_input(self, mode):
+    lib_custom_call, payload, args = self._get_custom_call_properties(
+        mode)  # type: ignore[attr-defined]
+    return lambda: lib_custom_call([], payload, [[1]], [torch.int8], *args)
+
+  def _gen_custom_call_output_properties_size_mismatch(self, mode):
+    lib_custom_call, payload, args = self._get_custom_call_properties(
+        mode)  # type: ignore[attr-defined]
+    input = torch.rand(10, device=torch_xla.device())
+    return lambda: lib_custom_call(
+        (input,), payload, [[1], [1]], [torch.int8], *args)
+
+  def test_stablehlo_custom_call(self):
+
+    self.assertExpectedRaisesInline(
+        exc_type=RuntimeError,
+        callable=self._gen_custom_call_no_input("stablehlo"),
+        expect="""custom_call(custom_op_target): expected at least 1 input tensor."""
+    )
+
+    self.assertExpectedRaisesInline(
+        exc_type=RuntimeError,
+        callable=self._gen_custom_call_output_properties_size_mismatch(
+            "stablehlo"),
+        expect="""custom_call(custom_op_target): expected the given output shapes (size=2) to be of the same size as the given output dtypes (size=1)."""
+    )
+
+  def test_tpu_custom_call(self):
+
+    self.assertExpectedRaisesInline(
+        exc_type=RuntimeError,
+        callable=self._gen_custom_call_no_input("tpu"),
+        expect="""tpu_custom_call(): expected at least 1 input tensor.""")
+
+    self.assertExpectedRaisesInline(
+        exc_type=RuntimeError,
+        callable=self._gen_custom_call_output_properties_size_mismatch("tpu"),
+        expect="""tpu_custom_call(): expected the given output shapes (size=2) to be of the same size as the given output dtypes (size=1)."""
+    )
+
 
 if __name__ == "__main__":
   unittest.main()
diff --git a/torch_xla/csrc/init_python_bindings.cpp b/torch_xla/csrc/init_python_bindings.cpp
@@ -347,21 +347,6 @@ std::vector<std::vector<int64_t>> CreateReduceGroups(const py::list& groups) {
   return replica_groups;
 }
 
-std::vector<at::Tensor> TpuCustomCall(
-    const std::vector<at::Tensor>& inputs, const std::string& payload,
-    const std::vector<std::vector<int64_t>>& output_shapes,
-    const std::vector<py::object>& output_dtypes) {
-  std::vector<at::ScalarType> dtypes;
-  dtypes.reserve(output_dtypes.size());
-  for (auto& dtype : output_dtypes) {
-    dtypes.push_back(reinterpret_cast<THPDtype*>(dtype.ptr())->scalar_type);
-  }
-  XLA_ASSIGN_OR_THROW(std::vector<absl_nonnull XLATensorPtr> xla_inputs,
-                      bridge::GetXlaTensors(inputs));
-  return bridge::AtenFromXlaTensors(tensor_methods::tpu_custom_call(
-      xla_inputs, payload, output_shapes, dtypes));
-}
-
 std::vector<std::vector<int>> ExtractXlaDotGeneralDimVectors(
     const py::tuple& dimension_numbers) {
   // Expect Python arg `dimension_numbers` to be
@@ -3116,30 +3101,33 @@ void InitXlaModuleBindings(py::module m) {
           "_xla_custom_call",
           [](const std::vector<at::Tensor>& inputs, const std::string& target,
              const std::vector<std::vector<int64_t>>& output_shapes,
-             const std::vector<py::object>& output_dtypes, bool has_side_effect,
+             const std::vector<at::ScalarType>& output_dtypes, bool has_side_effect,
              const std::string& backend_config, const int api_version,
              const std::unordered_map<std::string, std::string>&
                  frontend_attributes) -> std::vector<at::Tensor> {
-            std::vector<at::ScalarType> dtypes;
-            dtypes.reserve(output_dtypes.size());
-            for (auto& dtype : output_dtypes) {
-              dtypes.push_back(
-                  reinterpret_cast<THPDtype*>(dtype.ptr())->scalar_type);
-            }
 
-            XLA_ASSIGN_OR_THROW(std::vector<absl_nonnull XLATensorPtr> xla_inputs, bridge::GetXlaTensors(inputs));
-            auto xtensors = tensor_methods::custom_call(
-                xla_inputs, target,
-                output_shapes, dtypes, has_side_effect, backend_config,
-                api_version, frontend_attributes);
-            return bridge::AtenFromXlaTensors(std::move(xtensors));
+            XLA_ASSIGN_OR_THROW(std::vector<absl_nonnull XLATensorPtr> xla_inputs,
+                                bridge::GetXlaTensors(inputs));
+            XLA_ASSIGN_OR_THROW(std::vector<absl_nonnull XLATensorPtr> xla_outputs,
+                                tensor_methods::custom_call(
+                                  xla_inputs, target, output_shapes, output_dtypes,
+                                  has_side_effect, backend_config, api_version,
+                                  frontend_attributes));
+
+            return bridge::AtenFromXlaTensors(std::move(xla_outputs));
           })
       .def("_xla_tpu_custom_call",
            [](const std::vector<at::Tensor>& inputs, const std::string& payload,
               const std::vector<std::vector<int64_t>>& output_shapes,
-              const std::vector<py::object>& output_dtypes)
+              const std::vector<at::ScalarType>& output_dtypes)
                -> std::vector<at::Tensor> {
-            return TpuCustomCall(inputs, payload, output_shapes, output_dtypes);
+
+            XLA_ASSIGN_OR_THROW(std::vector<absl_nonnull XLATensorPtr> xla_inputs,
+                                bridge::GetXlaTensors(inputs));
+            XLA_ASSIGN_OR_THROW(std::vector<absl_nonnull XLATensorPtr> xla_outputs,
+                                tensor_methods::tpu_custom_call(xla_inputs, payload, output_shapes, output_dtypes));
+
+            return bridge::AtenFromXlaTensors(std::move(xla_outputs));
            })
       .def("_xla_register_custom_call_target",
            [](const std::string& fn_name, const py::capsule& function_ptr,
diff --git a/torch_xla/csrc/tensor_methods.cpp b/torch_xla/csrc/tensor_methods.cpp
diff --git a/torch_xla/csrc/tensor_methods.h b/torch_xla/csrc/tensor_methods.h