GoogleCloudPlatform · syeda-anjum · Dec 16, 2025 · Dec 17, 2025 · Dec 18, 2025 · Dec 18, 2025
diff --git a/.github/workflows/dictionary/accelerated-platforms.txt b/.github/workflows/dictionary/accelerated-platforms.txt
@@ -54,6 +54,7 @@ prereqs
 psutil
 qwiklabs
 rayutil
+rtxpro
 rueth
 safetensors
 scann

diff --git a/...ke/base/use-cases/inference-ref-arch/online-inference-gpu/vllm-with-hf-model.md b/...ke/base/use-cases/inference-ref-arch/online-inference-gpu/vllm-with-hf-model.md
@@ -162,15 +162,15 @@ This example is built on top of the
 
   - Select an accelerator.
 
-    | Model                          | l4  | h100 | h200 |
-    | ------------------------------ | --- | ---- | ---- |
-    | gemma-3-1b-it                  | ✅  | ❌   | ❌   |
-    | gemma-3-4b-it                  | ✅  | ❌   | ❌   |
-    | gemma-3-27b-it                 | ✅  | ✅   | ✅   |
-    | gpt-oss-20b                    | ✅  | ✅   | ✅   |
-    | llama-3.3-70b-instruct         | ❌  | ✅   | ✅   |
-    | llama-4-scout-17b-16e-instruct | ❌  | ✅   | ✅   |
-    | qwen3-32b                      | ✅  | ✅   | ✅   |
+    | Model                          | l4  | h100 | h200 | RTX Pro 6000 |
+    | ------------------------------ | --- | ---- | ---- | ------------ |
+    | gemma-3-1b-it                  | ✅  | ❌   | ❌   | ❌           |
+    | gemma-3-4b-it                  | ✅  | ❌   | ❌   | ❌           |
+    | gemma-3-27b-it                 | ✅  | ✅   | ✅   | ✅           |
+    | gpt-oss-20b                    | ✅  | ✅   | ✅   | ✅           |
+    | llama-3.3-70b-instruct         | ❌  | ✅   | ✅   | ✅           |
+    | llama-4-scout-17b-16e-instruct | ❌  | ✅   | ✅   | ✅           |
+    | qwen3-32b                      | ✅  | ✅   | ✅   | ✅           |
 
     - **NVIDIA Tesla L4 24GB**:
 
@@ -190,6 +190,12 @@ This example is built on top of the
       export ACCELERATOR_TYPE="h200"
       ```
 
+    - **NVIDIA RTX 6000 96GB**:
+
+      ```shell
+      export ACCELERATOR_TYPE="rtx-pro-6000"
+      ```
+
     Ensure that you have enough quota in your project to provision the selected
     accelerator type. For more information, see about viewing GPU quotas, see
     [Allocation quotas: GPU quota](https://cloud.google.com/compute/resource-usage#gpu_quota).

diff --git a/...ss/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1.yaml b/...ss/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x1.yaml
@@ -0,0 +1,96 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: cloud.google.com/v1
+kind: ComputeClass
+metadata:
+  name: gpu-rtx-pro-6000-96gb-x1
+spec:
+  activeMigration:
+    optimizeRulePriority: true
+  nodePoolConfig:
+    imageStreaming:
+      enabled: true
+  nodePoolAutoCreation:
+    enabled: true
+  priorities:
+    # Use a specific reservation
+    # - gpu:
+    #     count: 1
+    #     driverVersion: latest
+    #     type: nvidia-rtx-pro-6000
+    #   machineType: g4-standard-48
+    #   maxPodsPerNode: 32
+    #   reservations:
+    #     affinity: Specific
+    #     specific:
+    #       - name: nvidia-rtx-pro-6000-specific
+    #         reservationBlock:
+    #           name: <RESERVATION_NAME>
+    #   spot: false
+
+    # Use any reservation
+    - gpu:
+        count: 1
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-48
+      maxPodsPerNode: 32
+      reservations:
+        affinity: AnyBestEffort
+      spot: false
+
+    # Use on-demand
+    - gpu:
+        count: 1
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-48
+      maxPodsPerNode: 32
+      spot: false
+
+    # Use DWS FlexStart with 7 day limit
+    - flexStart:
+        enabled: true
+        nodeRecycling:
+          leadTimeSeconds: 3600
+      gpu:
+        count: 1
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-48
+      maxPodsPerNode: 32
+      maxRunDurationSeconds: 604800
+
+    # Use DWS FlexStart with 1 day limit
+    - flexStart:
+        enabled: true
+        nodeRecycling:
+          leadTimeSeconds: 3600
+      gpu:
+        count: 1
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-48
+      maxPodsPerNode: 32
+      maxRunDurationSeconds: 86400
+
+    # Use spot
+    - gpu:
+        count: 1
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-48
+      maxPodsPerNode: 32
+      spot: true
diff --git a/...ss/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x2.yaml b/...ss/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x2.yaml
@@ -0,0 +1,96 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: cloud.google.com/v1
+kind: ComputeClass
+metadata:
+  name: gpu-rtx-pro-6000-96gb-x2
+spec:
+  activeMigration:
+    optimizeRulePriority: true
+  nodePoolConfig:
+    imageStreaming:
+      enabled: true
+  nodePoolAutoCreation:
+    enabled: true
+  priorities:
+    # Use a specific reservation
+    # - gpu:
+    #     count: 2
+    #     driverVersion: latest
+    #     type: nvidia-rtx-pro-6000
+    #   machineType: g4-standard-96
+    #   maxPodsPerNode: 32
+    #   reservations:
+    #     affinity: Specific
+    #     specific:
+    #       - name: nvidia-rtx-pro-6000-specific
+    #         reservationBlock:
+    #           name: <RESERVATION_NAME>
+    #   spot: false
+
+    # Use any reservation
+    - gpu:
+        count: 2
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-96
+      maxPodsPerNode: 32
+      reservations:
+        affinity: AnyBestEffort
+      spot: false
+
+    # Use on-demand
+    - gpu:
+        count: 2
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-96
+      maxPodsPerNode: 32
+      spot: false
+
+    # Use DWS FlexStart with 7 day limit
+    - flexStart:
+        enabled: true
+        nodeRecycling:
+          leadTimeSeconds: 3600
+      gpu:
+        count: 2
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-96
+      maxPodsPerNode: 32
+      maxRunDurationSeconds: 604800
+
+    # Use DWS FlexStart with 1 day limit
+    - flexStart:
+        enabled: true
+        nodeRecycling:
+          leadTimeSeconds: 3600
+      gpu:
+        count: 2
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-96
+      maxPodsPerNode: 32
+      maxRunDurationSeconds: 86400
+
+    # Use spot
+    - gpu:
+        count: 2
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-96
+      maxPodsPerNode: 32
+      spot: true
diff --git a/...ss/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x4.yaml b/...ss/templates/manifests/gpu/rtx-pro-6000-96gb/custom-compute-gpu-rtx-pro-6000-96gb-x4.yaml
@@ -0,0 +1,96 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: cloud.google.com/v1
+kind: ComputeClass
+metadata:
+  name: gpu-rtx-pro-6000-96gb-x4
+spec:
+  activeMigration:
+    optimizeRulePriority: true
+  nodePoolConfig:
+    imageStreaming:
+      enabled: true
+  nodePoolAutoCreation:
+    enabled: true
+  priorities:
+    # Use a specific reservation
+    # - gpu:
+    #     count: 4
+    #     driverVersion: latest
+    #     type: nvidia-rtx-pro-6000
+    #   machineType: g4-standard-192
+    #   maxPodsPerNode: 32
+    #   reservations:
+    #     affinity: Specific
+    #     specific:
+    #       - name: nvidia-rtx-pro-6000-specific
+    #         reservationBlock:
+    #           name: <RESERVATION_NAME>
+    #   spot: false
+
+    # Use any reservation
+    - gpu:
+        count: 4
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-192
+      maxPodsPerNode: 32
+      reservations:
+        affinity: AnyBestEffort
+      spot: false
+
+    # Use on-demand
+    - gpu:
+        count: 4
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-192
+      maxPodsPerNode: 32
+      spot: false
+
+    # Use DWS FlexStart with 7 day limit
+    - flexStart:
+        enabled: true
+        nodeRecycling:
+          leadTimeSeconds: 3600
+      gpu:
+        count: 4
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-192
+      maxPodsPerNode: 32
+      maxRunDurationSeconds: 604800
+
+    # Use DWS FlexStart with 1 day limit
+    - flexStart:
+        enabled: true
+        nodeRecycling:
+          leadTimeSeconds: 3600
+      gpu:
+        count: 4
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-192
+      maxPodsPerNode: 32
+      maxRunDurationSeconds: 86400
+
+    # Use spot
+    - gpu:
+        count: 4
+        driverVersion: latest
+        type: nvidia-rtx-pro-6000
+      machineType: g4-standard-192
+      maxPodsPerNode: 32
+      spot: true
-Original file line number
+Diff line change
@@ Expand Up / @@ -54,6 +54,7 @@ prereqs @@
     psutil
     qwiklabs
     rayutil
+    rtxpro
     rueth
     safetensors
     scann
@@ Expand Down @@