GoogleCloudPlatform · raushan2016 · Jan 17, 2025 · Jan 15, 2025 · Jan 16, 2025 · Jan 16, 2025
@@ -29,7 +29,7 @@ spec:
     spec:
       containers:
       - name: llm
-        image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311
+        image: ghcr.io/huggingface/text-generation-inference:1.4.3
         resources:
           requests:
             cpu: "10"
@@ -51,6 +51,9 @@ spec:
         volumeMounts:
           - mountPath: /dev/shm
             name: dshm
+          # mountPath is set to /data as it's the path where the HF_HOME environment
+          # variable points to in the TGI container image i.e. where the downloaded model from the Hub will be
+          # stored
           - mountPath: /data
             name: ephemeral-volume
       volumes:

@@ -29,7 +29,7 @@ spec:
     spec:
       containers:
       - name: llm
-        image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311
+        image: ghcr.io/huggingface/text-generation-inference:1.4.3
         resources:
           requests:
             cpu: "10"
@@ -56,6 +56,9 @@ spec:
         volumeMounts:
           - mountPath: /dev/shm
             name: dshm
+          # mountPath is set to /data as it's the path where the HF_HOME environment
+          # variable points to in the TGI container image i.e. where the downloaded model from the Hub will be
+          # stored
           - mountPath: /data
             name: ephemeral-volume
       volumes:

@@ -29,7 +29,7 @@ spec:
     spec:
       containers:
       - name: llm
-        image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311
+        image: ghcr.io/huggingface/text-generation-inference:2.0.4
         resources:
           requests:
             cpu: "10"
@@ -58,6 +58,9 @@ spec:
         volumeMounts:
           - mountPath: /dev/shm
             name: dshm
+          # mountPath is set to /data as it's the path where the HF_HOME environment
+          # variable points to in the TGI container image i.e. where the downloaded model from the Hub will be
+          # stored
           - mountPath: /data
             name: ephemeral-volume
       volumes:

@@ -29,7 +29,7 @@ spec:
     spec:
       containers:
       - name: llm
-        image: us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311
+        image: ghcr.io/huggingface/text-generation-inference:1.4.3
         resources:
           requests:
             cpu: "5"
@@ -56,6 +56,9 @@ spec:
         volumeMounts:
           - mountPath: /dev/shm
             name: dshm
+          # mountPath is set to /data as it's the path where the HF_HOME environment
+          # variable points to in the TGI container image i.e. where the downloaded model from the Hub will be
+          # stored
           - mountPath: /data
             name: ephemeral-volume
       volumes: