From 80725a4c19cce36f3f64091fb16825117bd0ea7b Mon Sep 17 00:00:00 2001
From: gushob21 <gushob@google.com>
Date: Thu, 13 Nov 2025 22:09:36 +0000
Subject: [PATCH] Add images optional in nano banana node and remove gcs
 dependency for r2v

terraform fmt

Add images optional in nano banana node and remove gcs dependency for r2v

fix markdown

Add images optional in nano banana node and remove gcs dependency for r2v
---
 .../google_genmedia/gemini_flash_image_api.py |   4 +-
 .../gemini_flash_image_node.py                |   6 +-
 .../src/custom_nodes/google_genmedia/utils.py |  84 +-----
 .../custom_nodes/google_genmedia/veo3_api.py  |  24 +-
 .../google_genmedia/veo3_nodes.py             |   9 -
 .../terraform/comfyui/gcs.tf                  |  17 +-
 ...ftpl.json => veo3-reference-to-video.json} | 271 +++++++++---------
 7 files changed, 163 insertions(+), 252 deletions(-)
 rename platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/src/comfyui-workflows/{veo3-reference-to-video.tftpl.json => veo3-reference-to-video.json} (90%)

diff --git a/modules/python/src/custom_nodes/google_genmedia/gemini_flash_image_api.py b/modules/python/src/custom_nodes/google_genmedia/gemini_flash_image_api.py
index a7aea5d96..e6ce42cc5 100644
--- a/modules/python/src/custom_nodes/google_genmedia/gemini_flash_image_api.py
+++ b/modules/python/src/custom_nodes/google_genmedia/gemini_flash_image_api.py
@@ -70,7 +70,7 @@ def generate_image(
         sexually_explicit_threshold: str,
         dangerous_content_threshold: str,
         system_instruction: str,
-        image1: torch.Tensor,
+        image1: Optional[torch.Tensor] = None,
         image2: Optional[torch.Tensor] = None,
         image3: Optional[torch.Tensor] = None,
     ) -> List[Image.Image]:
@@ -89,7 +89,7 @@ def generate_image(
               content.
             dangerous_content_threshold: Safety threshold for dangerous content.
             system_instruction: System-level instructions for the model.
-            image1: The primary input image tensor for image-to-image tasks.
+            image1: An optional primary input image tensor for image-to-image tasks.
             image2: An optional second input image tensor. Defaults to None.
             image3: An optional third input image tensor. Defaults to None.
 
diff --git a/modules/python/src/custom_nodes/google_genmedia/gemini_flash_image_node.py b/modules/python/src/custom_nodes/google_genmedia/gemini_flash_image_node.py
index 68fafcb70..393fa3a74 100644
--- a/modules/python/src/custom_nodes/google_genmedia/gemini_flash_image_node.py
+++ b/modules/python/src/custom_nodes/google_genmedia/gemini_flash_image_node.py
@@ -60,7 +60,6 @@ def INPUT_TYPES(cls) -> Dict[str, Dict[str, Any]]:
                         "default": "A vivid landscape painting of a futuristic city",
                     },
                 ),
-                "image1": ("IMAGE",),
                 "aspect_ratio": (
                     [
                         "1:1",
@@ -87,6 +86,7 @@ def INPUT_TYPES(cls) -> Dict[str, Dict[str, Any]]:
                 "top_k": ("INT", {"default": 32, "min": 1, "max": 64}),
             },
             "optional": {
+                "image1": ("IMAGE",),
                 "image2": ("IMAGE",),
                 "image3": ("IMAGE",),
                 # Safety Settings
@@ -145,12 +145,12 @@ def generate_and_return_image(
         temperature: float,
         top_p: float,
         top_k: int,
-        image1: torch.Tensor,
         hate_speech_threshold: str,
         harassment_threshold: str,
         sexually_explicit_threshold: str,
         dangerous_content_threshold: str,
         system_instruction: str,
+        image1: Optional[torch.Tensor] = None,
         image2: Optional[torch.Tensor] = None,
         image3: Optional[torch.Tensor] = None,
         gcp_project_id: Optional[str] = None,
@@ -175,7 +175,7 @@ def generate_and_return_image(
               content.
             dangerous_content_threshold: Safety threshold for dangerous content.
             system_instruction: System-level instructions for the model.
-            image1: The primary input image tensor for image editing tasks.
+            image1: An optional primary input image tensor for image editing tasks.
             image2: An optional second input image tensor. Defaults to None.
             image3: An optional third input image tensor. Defaults to None.
             gcp_project_id: The GCP project ID.
diff --git a/modules/python/src/custom_nodes/google_genmedia/utils.py b/modules/python/src/custom_nodes/google_genmedia/utils.py
index 57f74ec45..e3d6e66e3 100644
--- a/modules/python/src/custom_nodes/google_genmedia/utils.py
+++ b/modules/python/src/custom_nodes/google_genmedia/utils.py
@@ -483,13 +483,15 @@ def generate_video_from_image(
 
 
 @api_error_retry
-def generate_video_from_gcs_references(
+def generate_video_from_references(
     client: genai.Client,
     model: str,
     prompt: str,
-    gcs_uris: List[str],
     image_format: str,
     aspect_ratio: str,
+    image1: torch.Tensor,
+    image2: Optional[torch.Tensor],
+    image3: Optional[torch.Tensor],
     output_resolution: Optional[str],
     compression_quality: Optional[str],
     person_generation: str,
@@ -530,16 +532,18 @@ def generate_video_from_gcs_references(
         "generate_audio": generate_audio if generate_audio is not None else False,
     }
 
-    if output_gcs_uri:
-        temp_config["output_gcs_uri"] = output_gcs_uri
-
     reference_images = []
-    for uri in gcs_uris:
-        image_part = Image(gcs_uri=uri, mime_type=mime_type)
-        reference_image = types.VideoGenerationReferenceImage(
-            image=image_part, reference_type="asset"
-        )
-        reference_images.append(reference_image)
+
+    for image_tensor in [image1, image2, image3]:
+        if image_tensor is not None:
+            image_part = Image(
+                imageBytes=tensor_to_pil_to_base64(image_tensor, image_format),
+                mime_type=mime_type,
+            )
+            reference_image = types.VideoGenerationReferenceImage(
+                image=image_part, reference_type="asset"
+            )
+            reference_images.append(reference_image)
 
     temp_config["reference_images"] = reference_images
 
@@ -966,64 +970,6 @@ def process_video_response(operation: Any) -> List[str]:
     return video_paths
 
 
-def upload_images_to_gcs(
-    images: List[Optional[torch.Tensor]], bucket_name: str, image_format: str
-) -> List[str]:
-    """
-    Uploads a list of image tensors to a GCS bucket.
-
-    Args:
-        images: A list of torch.Tensor images, which can contain None.
-        bucket_name: The name of the GCS bucket.
-        image_format: The format of the images (e.g., "PNG", "JPEG").
-
-    Returns:
-        A list of GCS URIs for the uploaded images.
-    """
-    prefix = "gs://"
-    if bucket_name.startswith(prefix):
-        bucket_name = bucket_name[len(prefix) :]
-
-    gcs_uris = []
-    storage_client = storage.Client(
-        client_info=ClientInfo(user_agent=STORAGE_USER_AGENT)
-    )
-    bucket = storage_client.bucket(bucket_name)
-
-    if not bucket.exists():
-        raise APIInputError(
-            f"GCS bucket '{bucket_name}' does not exist or is inaccessible."
-        )
-
-    for i, image_tensor in enumerate(images):
-        if image_tensor is not None:
-            try:
-                timestamp = int(time.time())
-                unique_id = random.randint(1000, 9999)
-                object_name = f"temporary-reference-images/ref_{timestamp}_{i+1}_{unique_id}.{image_format.lower()}"
-                blob = bucket.blob(object_name)
-
-                # VEO expects single images, not batches. Take the first from any potential batch.
-                single_image_tensor = image_tensor[0].unsqueeze(0)
-                image_bytes = tensor_to_pil_to_bytes(
-                    single_image_tensor, format=image_format.upper()
-                )
-
-                blob.upload_from_string(
-                    image_bytes, content_type=f"image/{image_format.lower()}"
-                )
-
-                gcs_uri = f"gs://{bucket_name}/{object_name}"
-                gcs_uris.append(gcs_uri)
-                logger.info(f"Successfully uploaded reference image {i+1} to {gcs_uri}")
-            except Exception as e:
-                raise APIExecutionError(
-                    f"Failed to upload image {i+1} to GCS: {e}"
-                ) from e
-
-    return gcs_uris
-
-
 def validate_gcs_uri_and_image(
     gcs_uri: str, check_object: bool = True
 ) -> Tuple[bool, str]:
diff --git a/modules/python/src/custom_nodes/google_genmedia/veo3_api.py b/modules/python/src/custom_nodes/google_genmedia/veo3_api.py
index d79d24bdc..974baf16d 100644
--- a/modules/python/src/custom_nodes/google_genmedia/veo3_api.py
+++ b/modules/python/src/custom_nodes/google_genmedia/veo3_api.py
@@ -231,7 +231,6 @@ def generate_video_from_references(
         self,
         model: str,
         prompt: str,
-        bucket_name: str,
         image1: torch.Tensor,
         image_format: str,
         aspect_ratio: str,
@@ -249,12 +248,11 @@ def generate_video_from_references(
         seed: Optional[int],
     ) -> List[str]:
         """
-        Uploads reference images to GCS and then generates a video.
+        Generates a video from the references.
 
         Args:
             model: Veo3 model.
             prompt: The text prompt for video generation.
-            bucket_name: The GCS bucket to upload reference images to.
             image1: The first reference image as a torch.Tensor.
             image_format: The format of the input images.
             aspect_ratio: The desired aspect ratio of the video.
@@ -278,27 +276,15 @@ def generate_video_from_references(
             raise APIInputError(
                 "Image1 is required. At least reference image must be provided."
             )
-        if not bucket_name:
-            raise APIInputError(
-                "bucket_name is required for uploading reference images."
-            )
-
-        gcs_uris = utils.upload_images_to_gcs(
-            images=[image1, image2, image3],
-            bucket_name=bucket_name,
-            image_format=image_format,
-        )
-
-        if not gcs_uris:
-            raise APIExecutionError("Failed to upload any reference images to GCS.")
-
         model_enum = Veo3Model[model]
 
-        return utils.generate_video_from_gcs_references(
+        return utils.generate_video_from_references(
             client=self.client,
             model=model_enum,
             prompt=prompt,
-            gcs_uris=gcs_uris,
+            image1=image1,
+            image2=image2,
+            image3=image3,
             image_format=image_format,
             aspect_ratio=aspect_ratio,
             output_resolution=output_resolution,
diff --git a/modules/python/src/custom_nodes/google_genmedia/veo3_nodes.py b/modules/python/src/custom_nodes/google_genmedia/veo3_nodes.py
index 6e219eaf7..3f99c5c32 100644
--- a/modules/python/src/custom_nodes/google_genmedia/veo3_nodes.py
+++ b/modules/python/src/custom_nodes/google_genmedia/veo3_nodes.py
@@ -522,13 +522,6 @@ def INPUT_TYPES(cls) -> Dict[str, Dict[str, Any]]:
                     [model.name for model in Veo3Model],
                     {"default": Veo3Model.VEO_3_1_PREVIEW.name},
                 ),
-                "bucket_name": (
-                    "STRING",
-                    {
-                        "default": "",
-                        "tooltip": "GCS bucket name to temporarily store reference images.",
-                    },
-                ),
                 "image1": ("IMAGE",),
                 "image_format": (
                     ["PNG", "JPEG"],
@@ -591,7 +584,6 @@ def INPUT_TYPES(cls) -> Dict[str, Dict[str, Any]]:
     def generate_from_references(
         self,
         model: str,
-        bucket_name: str,
         image1: torch.Tensor,
         image_format: str,
         prompt: str,
@@ -622,7 +614,6 @@ def generate_from_references(
             video_paths = api.generate_video_from_references(
                 model=model,
                 prompt=prompt,
-                bucket_name=bucket_name,
                 image1=image1,
                 image2=image2,
                 image3=image3,
diff --git a/platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/gcs.tf b/platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/gcs.tf
index 66fed7da8..2f2d99f5f 100644
--- a/platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/gcs.tf
+++ b/platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/gcs.tf
@@ -199,10 +199,9 @@ resource "google_storage_bucket_object" "workflow_veo3_itv" {
 }
 
 resource "google_storage_bucket_object" "workflow_veo3_r2v" {
-  bucket     = google_storage_bucket.comfyui_workflow.name
-  name       = "veo3-reference-to-video.json"
-  source     = "src/comfyui-workflows/veo3-reference-to-video.json"
-  depends_on = [local_file.workflow_veo2_ttv]
+  bucket = google_storage_bucket.comfyui_workflow.name
+  name   = "veo3-reference-to-video.json"
+  source = "src/comfyui-workflows/veo3-reference-to-video.json"
 }
 
 resource "google_storage_bucket_object" "workflow_veo3_ttv" {
@@ -270,13 +269,3 @@ resource "local_file" "workflow_veo3_ttv" {
   )
   filename = "${path.module}/src/comfyui-workflows/veo3-text-to-video.json"
 }
-
-resource "local_file" "workflow_veo3_r2v" {
-  content = templatefile(
-    "${path.module}/src/comfyui-workflows/veo3-reference-to-video.tftpl.json",
-    {
-      output_bucket_uri = google_storage_bucket.comfyui_output.url
-    }
-  )
-  filename = "${path.module}/src/comfyui-workflows/veo3-reference-to-video.json"
-}
diff --git a/platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/src/comfyui-workflows/veo3-reference-to-video.tftpl.json b/platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/src/comfyui-workflows/veo3-reference-to-video.json
similarity index 90%
rename from platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/src/comfyui-workflows/veo3-reference-to-video.tftpl.json
rename to platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/src/comfyui-workflows/veo3-reference-to-video.json
index 43b23b336..cf200dfb8 100644
--- a/platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/src/comfyui-workflows/veo3-reference-to-video.tftpl.json
+++ b/platforms/gke/base/use-cases/inference-ref-arch/terraform/comfyui/src/comfyui-workflows/veo3-reference-to-video.json
@@ -1,118 +1,15 @@
 {
   "id": "dac18393-16f7-48e2-a88f-ef94ceabc45a",
   "revision": 0,
-  "last_node_id": 9,
-  "last_link_id": 7,
+  "last_node_id": 10,
+  "last_link_id": 11,
   "nodes": [
-    {
-      "id": 1,
-      "type": "Veo3ReferenceToVideo",
-      "pos": [
-        2427.0403108054265,
-        -391.62077003714944
-      ],
-      "size": [
-        503.59017692201724,
-        683.7693026098127
-      ],
-      "flags": {},
-      "order": 3,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "image1",
-          "type": "IMAGE",
-          "link": 5
-        },
-        {
-          "name": "image2",
-          "shape": 7,
-          "type": "IMAGE",
-          "link": 6
-        },
-        {
-          "name": "image3",
-          "shape": 7,
-          "type": "IMAGE",
-          "link": 7
-        }
-      ],
-      "outputs": [
-        {
-          "name": "video_paths",
-          "type": "VEO_VIDEO",
-          "links": [
-            1
-          ]
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "Veo3ReferenceToVideo"
-      },
-      "widgets_values": [
-        "VEO_3_1_PREVIEW",
-        "${output_bucket_uri}",
-        "PNG",
-        "The video opens with a medium, eye-level shot of a beautiful woman with dark hair and warm brown eyes. She wears a magnificent, high-fashion flamingo dress with layers of pink and fuchsia feathers, complemented by whimsical pink, heart-shaped sunglasses. She walks with serene confidence through the crystal-clear, shallow turquoise water of a sun-drenched lagoon. The camera slowly pulls back to a medium-wide shot, revealing the breathtaking scene as the dress's long train glides and floats gracefully on the water's surface behind her. The cinematic, dreamlike atmosphere is enhanced by the vibrant colors of the dress against the serene, minimalist landscape, capturing a moment of pure elegance and high-fashion fantasy.",
-        "16:9",
-        "720p",
-        "optimized",
-        "allow_adult",
-        8,
-        true,
-        1,
-        "",
-        "",
-        103092426,
-        "randomize",
-        "",
-        ""
-      ]
-    },
-    {
-      "id": 2,
-      "type": "VeoVideoSaveAndPreview",
-      "pos": [
-        3014.188175391946,
-        -239.5717698461788
-      ],
-      "size": [
-        282.740234375,
-        154
-      ],
-      "flags": {},
-      "order": 4,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "video_paths",
-          "type": "VEO_VIDEO",
-          "link": 1
-        }
-      ],
-      "outputs": [],
-      "properties": {
-        "Node name for S&R": "VeoVideoSaveAndPreview"
-      },
-      "widgets_values": [
-        true,
-        true,
-        false,
-        false,
-        "veo_video",
-        {
-          "hidden": false,
-          "paused": false,
-          "parameters": {}
-        }
-      ]
-    },
     {
       "id": 7,
       "type": "VHS_LoadImagePath",
       "pos": [
-        1860.8935736279657,
-        -409.4608489559145
+        1513.1940777514956,
+        -409.58235958328333
       ],
       "size": [
         239.3153533935547,
@@ -134,7 +31,7 @@
           "name": "IMAGE",
           "type": "IMAGE",
           "links": [
-            5
+            8
           ]
         },
         {
@@ -166,11 +63,11 @@
       }
     },
     {
-      "id": 9,
+      "id": 8,
       "type": "VHS_LoadImagePath",
       "pos": [
-        2015.3096964165945,
-        -16.74050675143352
+        1530.2100449724562,
+        -42.76215883321408
       ],
       "size": [
         239.3153533935547,
@@ -192,7 +89,7 @@
           "name": "IMAGE",
           "type": "IMAGE",
           "links": [
-            6
+            9
           ]
         },
         {
@@ -207,7 +104,7 @@
         "Node name for S&R": "VHS_LoadImagePath"
       },
       "widgets_values": {
-        "image": "input/veo/flamingo_woman.png",
+        "image": "input/veo/flamingo_glasses.png",
         "custom_width": 0,
         "custom_height": 0,
         "videopreview": {
@@ -216,7 +113,7 @@
           "params": {
             "custom_width": 0,
             "custom_height": 0,
-            "filename": "input/veo/flamingo_woman.png",
+            "filename": "input/veo/flamingo_glasses.png",
             "type": "path",
             "format": "video/png"
           }
@@ -224,11 +121,11 @@
       }
     },
     {
-      "id": 8,
+      "id": 9,
       "type": "VHS_LoadImagePath",
       "pos": [
-        1714.7830276948869,
-        -97.21680457550596
+        1881.5675581223231,
+        30.32322878015156
       ],
       "size": [
         239.3153533935547,
@@ -250,7 +147,7 @@
           "name": "IMAGE",
           "type": "IMAGE",
           "links": [
-            7
+            10
           ]
         },
         {
@@ -265,7 +162,7 @@
         "Node name for S&R": "VHS_LoadImagePath"
       },
       "widgets_values": {
-        "image": "input/veo/flamingo_glasses.png",
+        "image": "input/veo/flamingo_woman.png",
         "custom_width": 0,
         "custom_height": 0,
         "videopreview": {
@@ -274,46 +171,148 @@
           "params": {
             "custom_width": 0,
             "custom_height": 0,
-            "filename": "input/veo/flamingo_glasses.png",
+            "filename": "input/veo/flamingo_woman.png",
             "type": "path",
             "format": "video/png"
           }
         }
       }
+    },
+    {
+      "id": 10,
+      "type": "Veo3ReferenceToVideo",
+      "pos": [
+        2165.3064619072197,
+        -445.49805395006825
+      ],
+      "size": [
+        568.2292280069219,
+        739.3445609954935
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image1",
+          "type": "IMAGE",
+          "link": 8
+        },
+        {
+          "name": "image2",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 9
+        },
+        {
+          "name": "image3",
+          "shape": 7,
+          "type": "IMAGE",
+          "link": 10
+        }
+      ],
+      "outputs": [
+        {
+          "name": "video_paths",
+          "type": "VEO_VIDEO",
+          "links": [
+            11
+          ]
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "Veo3ReferenceToVideo"
+      },
+      "widgets_values": [
+        "VEO_3_1_PREVIEW",
+        "PNG",
+        "The video opens with a medium, eye-level shot of a beautiful woman with dark hair and warm brown eyes. She wears a magnificent, high-fashion flamingo dress with layers of pink and fuchsia feathers, complemented by whimsical pink, heart-shaped sunglasses. She walks with serene confidence through the crystal-clear, shallow turquoise water of a sun-drenched lagoon. The camera slowly pulls back to a medium-wide shot, revealing the breathtaking scene as the dress's long train glides and floats gracefully on the water's surface behind her. The cinematic, dreamlike atmosphere is enhanced by the vibrant colors of the dress against the serene, minimalist landscape, capturing a moment of pure elegance and high-fashion fantasy.",
+        "16:9",
+        "720p",
+        "optimized",
+        "allow_adult",
+        8,
+        true,
+        1,
+        "",
+        "",
+        3242057037,
+        "randomize",
+        "",
+        ""
+      ]
+    },
+    {
+      "id": 2,
+      "type": "VeoVideoSaveAndPreview",
+      "pos": [
+        2850.199326367174,
+        -295.7914758055095
+      ],
+      "size": [
+        282.740234375,
+        355.7913818359375
+      ],
+      "flags": {},
+      "order": 4,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "video_paths",
+          "type": "VEO_VIDEO",
+          "link": 11
+        }
+      ],
+      "outputs": [],
+      "properties": {
+        "Node name for S&R": "VeoVideoSaveAndPreview"
+      },
+      "widgets_values": [
+        true,
+        true,
+        false,
+        false,
+        "veo_video",
+        {
+          "hidden": false,
+          "paused": false,
+          "parameters": {}
+        }
+      ]
     }
   ],
   "links": [
     [
-      1,
-      1,
+      8,
+      7,
       0,
-      2,
+      10,
       0,
-      "VEO_VIDEO"
+      "IMAGE"
     ],
     [
-      5,
-      7,
+      9,
+      8,
       0,
+      10,
       1,
-      0,
       "IMAGE"
     ],
     [
-      6,
+      10,
       9,
       0,
-      1,
-      1,
+      10,
+      2,
       "IMAGE"
     ],
     [
-      7,
-      8,
+      11,
+      10,
       0,
-      1,
       2,
-      "IMAGE"
+      0,
+      "VEO_VIDEO"
     ]
   ],
   "groups": [],
@@ -322,8 +321,8 @@
     "ds": {
       "scale": 0.9282559986106438,
       "offset": [
-        -1638.2093711162324,
-        488.8059078251353
+        -1479.352537903931,
+        495.5640159374671
       ]
     },
     "frontendVersion": "1.28.7",