Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions keras_hub/api/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@
from keras_hub.src.models.vit.vit_image_converter import (
ViTImageConverter as ViTImageConverter,
)
from keras_hub.src.models.vit_det.vit_det_image_converter import (
ViTDetImageConverter as ViTDetImageConverter,
)
from keras_hub.src.models.whisper.whisper_audio_converter import (
WhisperAudioConverter as WhisperAudioConverter,
)
Expand Down
5 changes: 0 additions & 5 deletions keras_hub/src/models/vit_det/vit_det_backbone.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import keras
from keras import ops

from keras_hub.src.api_export import keras_hub_export
from keras_hub.src.models.backbone import Backbone
Expand Down Expand Up @@ -105,10 +104,6 @@ def __init__(
)
img_size = img_input.shape[-3]
x = img_input
# VITDet scales inputs based on the standard ImageNet mean/stddev.
x = (x - ops.array([0.485, 0.456, 0.406], dtype=x.dtype)) / (
ops.array([0.229, 0.224, 0.225], dtype=x.dtype)
)
x = ViTDetPatchingAndEmbedding(
kernel_size=(patch_size, patch_size),
strides=(patch_size, patch_size),
Expand Down
38 changes: 38 additions & 0 deletions keras_hub/src/models/vit_det/vit_det_image_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from keras_hub.src.api_export import keras_hub_export
from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
from keras_hub.src.models.vit_det.vit_det_backbone import ViTDetBackbone


@keras_hub_export("keras_hub.layers.ViTDetImageConverter")
class ViTDetImageConverter(ImageConverter):
"""Image converter for ViTDet models.

This layer applies ImageNet normalization (mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]) to input images for ViTDet models.

Args:
image_size: int or tuple of (height, width). The output size of the
image. Defaults to `(1024, 1024)`.

Example:
```python
converter = keras_hub.layers.ViTDetImageConverter(image_size=(1024, 1024))
converter(np.random.rand(1, 512, 512, 3)) # Resizes and normalizes
Comment on lines +19 to +20
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The example code is a bit misleading and incomplete:

  1. It uses np without showing the import numpy as np statement.
  2. np.random.rand() generates float values in [0, 1). The layer then scales this by 1/255, which is likely not the intended demonstration. Using np.random.randint(0, 256, ...) would better simulate a typical uint8 image for which scale=1.0 / 255.0 is appropriate.

Please add the import inside the ````pythonblock and userandint` for clarity.

Suggested change
converter = keras_hub.layers.ViTDetImageConverter(image_size=(1024, 1024))
converter(np.random.rand(1, 512, 512, 3)) # Resizes and normalizes
converter = keras_hub.layers.ViTDetImageConverter(image_size=(1024, 1024))
converter(np.random.randint(0, 256, size=(1, 512, 512, 3))) # Resizes and normalizes

```
"""

backbone_cls = ViTDetBackbone

def __init__(
self,
image_size=(1024, 1024),
**kwargs,
):
super().__init__(
image_size=image_size,
scale=1.0 / 255.0, # Scale to [0, 1]
offset=None,
norm_mean=[0.485, 0.456, 0.406],
norm_std=[0.229, 0.224, 0.225],
**kwargs,
)
Loading