From 88b55802314bd3769563b64aaf5a83e3a94456cb Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Sun, 10 Nov 2024 19:12:38 +0000 Subject: [PATCH] target size arg added into extract faces --- retinaface/RetinaFace.py | 23 +++++++++-- retinaface/commons/postprocess.py | 63 +++++++++++++++++++++++++++++++ tests/test_actions.py | 10 +++++ 3 files changed, 93 insertions(+), 3 deletions(-) diff --git a/retinaface/RetinaFace.py b/retinaface/RetinaFace.py index 77ff2df..ead0692 100644 --- a/retinaface/RetinaFace.py +++ b/retinaface/RetinaFace.py @@ -1,7 +1,7 @@ import os import warnings import logging -from typing import Union, Any, Optional, Dict +from typing import Union, Any, Optional, Dict, Tuple, List # this has to be set before importing tf os.environ["TF_USE_LEGACY_KERAS"] = "1" @@ -220,7 +220,9 @@ def extract_faces( align: bool = True, allow_upscaling: bool = True, expand_face_area: int = 0, -) -> list: + target_size: Optional[Tuple[int, int]] = None, + min_max_norm: bool = True, +) -> List[np.ndarray]: """ Extract detected and aligned faces Args: @@ -230,6 +232,13 @@ def extract_faces( align (bool): enable or disable alignment allow_upscaling (bool): allowing up-scaling expand_face_area (int): expand detected facial area with a percentage + target_size (optional tuple): resize the image by padding it with black pixels + to fit the specified dimensions. default is None + min_max_norm (bool): set this to True if you want to normalize image in [0, 1]. + this is only running when target_size is not none. + for instance, matplotlib expects inputs in this scale. (default is True) + Returns: + result (List[np.ndarray]): list of extracted faces """ resp = [] @@ -289,6 +298,14 @@ def extract_faces( int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2) ] - resp.append(facial_img[:, :, ::-1]) + if target_size is not None: + facial_img = postprocess.resize_image( + img=facial_img, target_size=target_size, min_max_norm=min_max_norm + ) + + # to rgb + facial_img = facial_img[:, :, ::-1] + + resp.append(facial_img) return resp diff --git a/retinaface/commons/postprocess.py b/retinaface/commons/postprocess.py index d73f931..7b1f642 100644 --- a/retinaface/commons/postprocess.py +++ b/retinaface/commons/postprocess.py @@ -1,7 +1,18 @@ +# built-in dependencies import math from typing import Union, Tuple + +# 3rd party dependencies import numpy as np from PIL import Image +import cv2 +import tensorflow as tf + +tf_major_version = int(tf.__version__.split(".", maxsplit=1)[0]) +if tf_major_version == 1: + from keras.preprocessing import image +else: + from tensorflow.keras.preprocessing import image # pylint: disable=unused-argument @@ -143,6 +154,58 @@ def rotate_facial_area( return (x1, y1, x2, y2) +def resize_image( + img: np.ndarray, target_size: Tuple[int, int], min_max_norm: bool = True +) -> np.ndarray: + """ + Resize an image to expected size of a ml model with adding black pixels. + Ref: github.com/serengil/deepface/blob/master/deepface/modules/preprocessing.py + Args: + img (np.ndarray): pre-loaded image as numpy array + target_size (tuple): input shape of ml model + min_max_norm (bool): set this to True if you want to normalize image in [0, 1]. + this is only running when target_size is not none. + for instance, matplotlib expects inputs in this scale. (default is True) + Returns: + img (np.ndarray): resized input image + """ + factor_0 = target_size[0] / img.shape[0] + factor_1 = target_size[1] / img.shape[1] + factor = min(factor_0, factor_1) + + dsize = ( + int(img.shape[1] * factor), + int(img.shape[0] * factor), + ) + img = cv2.resize(img, dsize) + + diff_0 = target_size[0] - img.shape[0] + diff_1 = target_size[1] - img.shape[1] + + # Put the base image in the middle of the padded image + img = np.pad( + img, + ( + (diff_0 // 2, diff_0 - diff_0 // 2), + (diff_1 // 2, diff_1 - diff_1 // 2), + (0, 0), + ), + "constant", + ) + + # double check: if target image is not still the same size with target. + if img.shape[0:2] != target_size: + img = cv2.resize(img, target_size) + + # make it 4-dimensional how ML models expect + img = image.img_to_array(img) + + if min_max_norm is True and img.max() > 1: + img = (img.astype(np.float32) / 255.0).astype(np.float32) + + return img + + def bbox_pred(boxes, box_deltas): """ This function is copied from the following code snippet: diff --git a/tests/test_actions.py b/tests/test_actions.py index 3cfd950..349b068 100644 --- a/tests/test_actions.py +++ b/tests/test_actions.py @@ -107,3 +107,13 @@ def test_different_expanding_ratios(): plt.imshow(face) plt.axis("off") plt.show() + + +def test_resize(): + faces = RetinaFace.extract_faces(img_path="tests/dataset/img11.jpg", target_size=(224, 224)) + for face in faces: + assert face.shape == (224, 224, 3) + if do_plotting is True: + plt.imshow(face) + plt.show() + logger.info("✅ resize test done")