Resolves Gemini comments.

james77777778 · james77777778 · commit 0fde3f7dca6c · 2025-10-27T21:35:32.000+08:00
diff --git a/keras_hub/src/models/dinov3/dinov3_backbone.py b/keras_hub/src/models/dinov3/dinov3_backbone.py
@@ -64,6 +64,48 @@ class DINOV3Backbone(FeaturePyramidBackbone):
             for the models computations and weights. Note that some
             computations, such as softmax and layer normalization will always
             be done a float32 precision regardless of dtype.
+
+    Example:
+    ```python
+    # Pretrained DINOV3 model.
+    input_data = {
+        "images": np.ones(shape=(1, 518, 518, 3), dtype="float32"),
+    }
+    model = keras_hub.models.DINOV3Backbone.from_preset(
+        "dinov3_vit_small_lvd1689m"
+    )
+    model(input_data)
+
+    # Pretrained DINOV3 model with custom image shape.
+    input_data = {
+        "images": np.ones(shape=(1, 224, 224, 3), dtype="float32"),
+    }
+    model = keras_hub.models.DINOV3Backbone.from_preset(
+        "dinov3_vit_small_lvd1689m", image_shape=(224, 224, 3)
+    )
+    model(input_data)
+
+    # Randomly initialized DINOV3 model with custom config.
+    model = keras_hub.models.DINOV3Backbone(
+        patch_size=14,
+        num_layers=2,
+        hidden_dim=32,
+        num_heads=2,
+        intermediate_dim=128,
+        image_shape=(224, 224, 3),
+    )
+    model(input_data)
+
+    # Accessing feature pyramid outputs.
+    backbone = keras_hub.models.DINOV3Backbone.from_preset(
+        "dinov3_vit_small_lvd1689m", image_shape=(224, 224, 3)
+    )
+    model = keras.Model(
+        inputs=backbone.inputs,
+        outputs=backbone.pyramid_outputs,
+    )
+    features = model(input_data)
+    ```
     """
 
     def __init__(
@@ -141,7 +183,7 @@ def __init__(
 
         # === Functional Model ===
         pyramid_outputs = {}
-        image_input = layers.Input(shape=image_shape, name="images")
+        image_input = layers.Input(shape=image_shape, name="pixel_values")
         x = self.embeddings(image_input)
         pyramid_outputs["stem"] = x
 
@@ -160,7 +202,7 @@ def __init__(
                 pyramid_outputs[key] = self.layernorm(pyramid_outputs[key])
         outputs = x
         super().__init__(
-            inputs={"images": image_input},
+            inputs={"pixel_values": image_input},
             outputs=outputs,
             dtype=dtype,
             name=name,
diff --git a/keras_hub/src/models/dinov3/dinov3_backbone_test.py b/keras_hub/src/models/dinov3/dinov3_backbone_test.py
@@ -23,7 +23,7 @@ def setUp(self):
             "name": "dinov3_backbone",
         }
         self.input_data = {
-            "images": ops.ones((2, 64, 64, 3)),
+            "pixel_values": ops.ones((2, 64, 64, 3)),
         }
 
     def test_backbone_basics(self):
@@ -73,7 +73,7 @@ def test_position_embedding_interpolation(self):
             image_shape=(128, 128, 3),  # From 64 to 128.
         )
         input_data = {
-            "images": ops.ones((2, 128, 128, 3)),
+            "pixel_values": ops.ones((2, 128, 128, 3)),
         }
         restored_output = restored_model(input_data)
         self.assertNotEqual(model_output.shape, restored_output.shape)
diff --git a/keras_hub/src/models/dinov3/dinov3_layers.py b/keras_hub/src/models/dinov3/dinov3_layers.py
@@ -449,6 +449,7 @@ def call(
             is_causal=False,
         )
         attn_output = ops.reshape(attn_output, (batch_size, seq_len, -1))
+        attn_output = self.dropout(attn_output, training=training)
         return self.output_dense(attn_output, training=training)
 
     def get_config(self):
@@ -815,6 +816,7 @@ def call(
         attention_mask=None,
         position_embeddings=None,
         num_prefix_tokens=0,
+        training=None,
     ):
         residual = inputs
         hidden_states = self.norm1(inputs)
@@ -823,17 +825,18 @@ def call(
             attention_mask=attention_mask,
             position_embeddings=position_embeddings,
             num_prefix_tokens=num_prefix_tokens,
+            training=training,
+        )
+        hidden_states = self.layer_scale1(hidden_states, training=training)
+        hidden_states = (
+            self.drop_path(hidden_states, training=training) + residual
         )
-        hidden_states = self.layer_scale1(hidden_states)
-        hidden_states = self.drop_path(hidden_states) + residual
 
         residual = hidden_states
-        hidden_states = self.norm2(hidden_states)
-        hidden_states = self.mlp(hidden_states)
-        hidden_states = self.layer_scale2(hidden_states)
-        hidden_states = self.drop_path(hidden_states) + residual
-
-        return hidden_states
+        hidden_states = self.norm2(hidden_states, training=training)
+        hidden_states = self.mlp(hidden_states, training=training)
+        hidden_states = self.layer_scale2(hidden_states, training=training)
+        return self.drop_path(hidden_states, training=training) + residual
 
     def get_config(self):
         config = super().get_config()
diff --git a/keras_hub/src/utils/transformers/convert_dinov3_test.py b/keras_hub/src/utils/transformers/convert_dinov3_test.py
@@ -0,0 +1,34 @@
+import numpy as np
+import pytest
+
+from keras_hub.src.models.dinov3.dinov3_backbone import DINOV3Backbone
+from keras_hub.src.tests.test_case import TestCase
+
+
+class TestTask(TestCase):
+    @pytest.mark.large
+    def test_convert_tiny_preset(self):
+        model = DINOV3Backbone.from_preset(
+            "hf://facebook/dinov3-vits16-pretrain-lvd1689m",
+            image_shape=(224, 224, 3),
+        )
+        dummy_input = {
+            "pixel_values": np.ones((1, 224, 224, 3), dtype="float32")
+        }
+        output = model.predict(dummy_input)
+        self.assertAllClose(
+            output[0, 0, :10],
+            [
+                -0.2769,
+                0.5487,
+                0.2501,
+                -1.2269,
+                0.5886,
+                0.0762,
+                0.6251,
+                0.1874,
+                -0.4259,
+                -0.4362,
+            ],
+            atol=1e-2,
+        )

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@ def setUp(self):`
`23`	`23`	`"name": "dinov3_backbone",`
`24`	`24`	`}`
`25`	`25`	`self.input_data = {`
`26`		`- "images": ops.ones((2, 64, 64, 3)),`
	`26`	`+ "pixel_values": ops.ones((2, 64, 64, 3)),`
`27`	`27`	`}`
`28`	`28`
`29`	`29`	`def test_backbone_basics(self):`
`@@ -73,7 +73,7 @@ def test_position_embedding_interpolation(self):`
`73`	`73`	`image_shape=(128, 128, 3), # From 64 to 128.`
`74`	`74`	`)`
`75`	`75`	`input_data = {`
`76`		`- "images": ops.ones((2, 128, 128, 3)),`
	`76`	`+ "pixel_values": ops.ones((2, 128, 128, 3)),`
`77`	`77`	`}`
`78`	`78`	`restored_output = restored_model(input_data)`
`79`	`79`	`self.assertNotEqual(model_output.shape, restored_output.shape)`