|
| 1 | +""" |
| 2 | +Modified from face-alignment v1.4.1 api.py |
| 3 | +Original source: https://github.com/1adrianb/face-alignment |
| 4 | +License: BSD-3-Clause License |
| 5 | +""" |
| 6 | +import torch |
| 7 | +import warnings |
| 8 | +from enum import IntEnum |
| 9 | +from skimage import io |
| 10 | +import numpy as np |
| 11 | +from packaging import version |
| 12 | +from tqdm import tqdm |
| 13 | + |
| 14 | +from face_alignment.utils import * |
| 15 | +from face_alignment.folder_data import FolderData |
| 16 | +from face_alignment.detection import sfd |
| 17 | + |
| 18 | +class LandmarksType(IntEnum): |
| 19 | + """Enum class defining the type of landmarks to detect. |
| 20 | +
|
| 21 | + ``TWO_D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face |
| 22 | + ``TWO_HALF_D`` - this points represent the projection of the 3D points into 3D |
| 23 | + ``THREE_D`` - detect the points ``(x,y,z)``` in a 3D space |
| 24 | +
|
| 25 | + """ |
| 26 | + TWO_D = 1 |
| 27 | + TWO_HALF_D = 2 |
| 28 | + THREE_D = 3 |
| 29 | + |
| 30 | + |
| 31 | +class NetworkSize(IntEnum): |
| 32 | + # TINY = 1 |
| 33 | + # SMALL = 2 |
| 34 | + # MEDIUM = 3 |
| 35 | + LARGE = 4 |
| 36 | + |
| 37 | + |
| 38 | +default_model_urls = { |
| 39 | + '2DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/2DFAN4-cd938726ad.zip', |
| 40 | + '3DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/3DFAN4-4a694010b9.zip', |
| 41 | + 'depth': 'https://www.adrianbulat.com/downloads/python-fan/depth-6c4283c0e0.zip', |
| 42 | +} |
| 43 | + |
| 44 | +models_urls = { |
| 45 | + '1.6': { |
| 46 | + '2DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/2DFAN4_1.6-c827573f02.zip', |
| 47 | + '3DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/3DFAN4_1.6-ec5cf40a1d.zip', |
| 48 | + 'depth': 'https://www.adrianbulat.com/downloads/python-fan/depth_1.6-2aa3f18772.zip', |
| 49 | + }, |
| 50 | + '1.5': { |
| 51 | + '2DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/2DFAN4_1.5-a60332318a.zip', |
| 52 | + '3DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/3DFAN4_1.5-176570af4d.zip', |
| 53 | + 'depth': 'https://www.adrianbulat.com/downloads/python-fan/depth_1.5-bc10f98e39.zip', |
| 54 | + }, |
| 55 | +} |
| 56 | + |
| 57 | + |
| 58 | +class FaceAlignment: |
| 59 | + def __init__(self, landmarks_type, network_size=NetworkSize.LARGE, net_path=None, |
| 60 | + device='cuda', dtype=torch.float32, flip_input=False, face_detector_kwargs=None, verbose=False): |
| 61 | + self.device = device |
| 62 | + self.flip_input = flip_input |
| 63 | + self.landmarks_type = landmarks_type |
| 64 | + self.verbose = verbose |
| 65 | + self.dtype = dtype |
| 66 | + |
| 67 | + if version.parse(torch.__version__) < version.parse('1.5.0'): |
| 68 | + raise ImportError(f'Unsupported pytorch version detected. Minimum supported version of pytorch: 1.5.0\ |
| 69 | + Either upgrade (recommended) your pytorch setup, or downgrade to face-alignment 1.2.0') |
| 70 | + |
| 71 | + network_size = int(network_size) |
| 72 | + pytorch_version = torch.__version__ |
| 73 | + if 'dev' in pytorch_version: |
| 74 | + pytorch_version = pytorch_version.rsplit('.', 2)[0] |
| 75 | + else: |
| 76 | + pytorch_version = pytorch_version.rsplit('.', 1)[0] |
| 77 | + |
| 78 | + if 'cuda' in device: |
| 79 | + torch.backends.cudnn.benchmark = True |
| 80 | + |
| 81 | + # Get the face detector |
| 82 | + # face_detector_module = __import__('face_alignment.detection.' + face_detector, |
| 83 | + # globals(), locals(), [face_detector], 0) |
| 84 | + face_detector_kwargs = face_detector_kwargs or {} |
| 85 | + self.face_detector = sfd.FaceDetector(device=device, verbose=verbose, **face_detector_kwargs) |
| 86 | + |
| 87 | + # Initialise the face alignemnt networks |
| 88 | + if landmarks_type == LandmarksType.TWO_D: |
| 89 | + network_name = '2DFAN-' + str(network_size) |
| 90 | + else: |
| 91 | + network_name = '3DFAN-' + str(network_size) |
| 92 | + if net_path is None: |
| 93 | + net_path = load_file_from_url(models_urls.get(pytorch_version, default_model_urls)[network_name]) |
| 94 | + self.face_alignment_net = torch.jit.load(net_path) |
| 95 | + |
| 96 | + self.face_alignment_net.to(device, dtype=dtype) |
| 97 | + self.face_alignment_net.eval() |
| 98 | + |
| 99 | + # Initialiase the depth prediciton network |
| 100 | + if landmarks_type == LandmarksType.THREE_D: |
| 101 | + self.depth_prediciton_net = torch.jit.load( |
| 102 | + load_file_from_url(models_urls.get(pytorch_version, default_model_urls)['depth'])) |
| 103 | + |
| 104 | + self.depth_prediciton_net.to(device, dtype=dtype) |
| 105 | + self.depth_prediciton_net.eval() |
| 106 | + |
| 107 | + def get_landmarks(self, image_or_path, detected_faces=None, return_bboxes=False, return_landmark_score=False): |
| 108 | + """Deprecated, please use get_landmarks_from_image |
| 109 | +
|
| 110 | + Arguments: |
| 111 | + image_or_path {string or numpy.array or torch.tensor} -- The input image or path to it |
| 112 | +
|
| 113 | + Keyword Arguments: |
| 114 | + detected_faces {list of numpy.array} -- list of bounding boxes, one for each face found |
| 115 | + in the image (default: {None}) |
| 116 | + return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints. |
| 117 | + return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints. |
| 118 | + """ |
| 119 | + return self.get_landmarks_from_image(image_or_path, detected_faces, return_bboxes, return_landmark_score) |
| 120 | + |
| 121 | + @torch.no_grad() |
| 122 | + def get_landmarks_from_image(self, image_or_path, detected_faces=None, return_bboxes=False, |
| 123 | + return_landmark_score=False): |
| 124 | + """Predict the landmarks for each face present in the image. |
| 125 | +
|
| 126 | + This function predicts a set of 68 2D or 3D images, one for each image present. |
| 127 | + If detect_faces is None the method will also run a face detector. |
| 128 | +
|
| 129 | + Arguments: |
| 130 | + image_or_path {string or numpy.array or torch.tensor} -- The input image or path to it. |
| 131 | +
|
| 132 | + Keyword Arguments: |
| 133 | + detected_faces {list of numpy.array} -- list of bounding boxes, one for each face found |
| 134 | + in the image (default: {None}) |
| 135 | + return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints. |
| 136 | + return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints. |
| 137 | +
|
| 138 | + Return: |
| 139 | + result: |
| 140 | + 1. if both return_bboxes and return_landmark_score are False, result will be: |
| 141 | + landmark |
| 142 | + 2. Otherwise, result will be one of the following, depending on the actual value of return_* arguments. |
| 143 | + (landmark, landmark_score, detected_face) |
| 144 | + (landmark, None, detected_face) |
| 145 | + (landmark, landmark_score, None ) |
| 146 | + """ |
| 147 | + image = get_image(image_or_path) |
| 148 | + |
| 149 | + if detected_faces is None: |
| 150 | + detected_faces = self.face_detector.detect_from_image(image.copy()) |
| 151 | + |
| 152 | + if len(detected_faces) == 0: |
| 153 | + warnings.warn("No faces were detected.") |
| 154 | + if return_bboxes or return_landmark_score: |
| 155 | + return None, None, None |
| 156 | + else: |
| 157 | + return None |
| 158 | + |
| 159 | + landmarks = [] |
| 160 | + landmarks_scores = [] |
| 161 | + for i, d in enumerate(detected_faces): |
| 162 | + center = torch.tensor( |
| 163 | + [d[2] - (d[2] - d[0]) / 2.0, d[3] - (d[3] - d[1]) / 2.0]) |
| 164 | + center[1] = center[1] - (d[3] - d[1]) * 0.12 |
| 165 | + scale = (d[2] - d[0] + d[3] - d[1]) / self.face_detector.reference_scale |
| 166 | + |
| 167 | + inp = crop(image, center, scale) |
| 168 | + inp = torch.from_numpy(inp.transpose( |
| 169 | + (2, 0, 1))).float() |
| 170 | + |
| 171 | + inp = inp.to(self.device, dtype=self.dtype) |
| 172 | + inp.div_(255.0).unsqueeze_(0) |
| 173 | + |
| 174 | + out = self.face_alignment_net(inp).detach() |
| 175 | + if self.flip_input: |
| 176 | + out += flip(self.face_alignment_net(flip(inp)).detach(), is_label=True) |
| 177 | + out = out.to(device='cpu', dtype=torch.float32).numpy() |
| 178 | + |
| 179 | + pts, pts_img, scores = get_preds_fromhm(out, center.numpy(), scale) |
| 180 | + pts, pts_img = torch.from_numpy(pts), torch.from_numpy(pts_img) |
| 181 | + pts, pts_img = pts.view(68, 2) * 4, pts_img.view(68, 2) |
| 182 | + scores = scores.squeeze(0) |
| 183 | + |
| 184 | + if self.landmarks_type == LandmarksType.THREE_D: |
| 185 | + heatmaps = np.zeros((68, 256, 256), dtype=np.float32) |
| 186 | + for i in range(68): |
| 187 | + if pts[i, 0] > 0 and pts[i, 1] > 0: |
| 188 | + heatmaps[i] = draw_gaussian( |
| 189 | + heatmaps[i], pts[i], 2) |
| 190 | + heatmaps = torch.from_numpy( |
| 191 | + heatmaps).unsqueeze_(0) |
| 192 | + |
| 193 | + heatmaps = heatmaps.to(self.device, dtype=self.dtype) |
| 194 | + depth_pred = self.depth_prediciton_net( |
| 195 | + torch.cat((inp, heatmaps), 1)).data.cpu().view(68, 1).to(dtype=torch.float32) |
| 196 | + pts_img = torch.cat( |
| 197 | + (pts_img, depth_pred * (1.0 / (256.0 / (200.0 * scale)))), 1) |
| 198 | + |
| 199 | + landmarks.append(pts_img.numpy()) |
| 200 | + landmarks_scores.append(scores) |
| 201 | + |
| 202 | + if not return_bboxes: |
| 203 | + detected_faces = None |
| 204 | + if not return_landmark_score: |
| 205 | + landmarks_scores = None |
| 206 | + if return_bboxes or return_landmark_score: |
| 207 | + return landmarks, landmarks_scores, detected_faces |
| 208 | + else: |
| 209 | + return landmarks |
| 210 | + |
| 211 | + @torch.no_grad() |
| 212 | + def get_landmarks_from_batch(self, image_batch, detected_faces=None, return_bboxes=False, |
| 213 | + return_landmark_score=False): |
| 214 | + """Predict the landmarks for each face present in the image. |
| 215 | +
|
| 216 | + This function predicts a set of 68 2D or 3D images, one for each image in a batch in parallel. |
| 217 | + If detect_faces is None the method will also run a face detector. |
| 218 | +
|
| 219 | + Arguments: |
| 220 | + image_batch {torch.tensor} -- The input images batch |
| 221 | +
|
| 222 | + Keyword Arguments: |
| 223 | + detected_faces {list of numpy.array} -- list of bounding boxes, one for each face found |
| 224 | + in the image (default: {None}) |
| 225 | + return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints. |
| 226 | + return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints. |
| 227 | +
|
| 228 | + Return: |
| 229 | + result: |
| 230 | + 1. if both return_bboxes and return_landmark_score are False, result will be: |
| 231 | + landmarks |
| 232 | + 2. Otherwise, result will be one of the following, depending on the actual value of return_* arguments. |
| 233 | + (landmark, landmark_score, detected_face) |
| 234 | + (landmark, None, detected_face) |
| 235 | + (landmark, landmark_score, None ) |
| 236 | + """ |
| 237 | + |
| 238 | + if detected_faces is None: |
| 239 | + detected_faces = self.face_detector.detect_from_batch(image_batch) |
| 240 | + |
| 241 | + if len(detected_faces) == 0: |
| 242 | + warnings.warn("No faces were detected.") |
| 243 | + if return_bboxes or return_landmark_score: |
| 244 | + return None, None, None |
| 245 | + else: |
| 246 | + return None |
| 247 | + |
| 248 | + landmarks = [] |
| 249 | + landmarks_scores_list = [] |
| 250 | + # A batch for each frame |
| 251 | + for i, faces in enumerate(detected_faces): |
| 252 | + res = self.get_landmarks_from_image( |
| 253 | + image_batch[i].cpu().numpy().transpose(1, 2, 0), |
| 254 | + detected_faces=faces, |
| 255 | + return_landmark_score=return_landmark_score, |
| 256 | + ) |
| 257 | + if return_landmark_score: |
| 258 | + landmark_set, landmarks_scores, _ = res |
| 259 | + landmarks_scores_list.append(landmarks_scores) |
| 260 | + else: |
| 261 | + landmark_set = res |
| 262 | + # Bacward compatibility |
| 263 | + if landmark_set is not None: |
| 264 | + landmark_set = np.concatenate(landmark_set, axis=0) |
| 265 | + else: |
| 266 | + landmark_set = [] |
| 267 | + landmarks.append(landmark_set) |
| 268 | + |
| 269 | + if not return_bboxes: |
| 270 | + detected_faces = None |
| 271 | + if not return_landmark_score: |
| 272 | + landmarks_scores_list = None |
| 273 | + if return_bboxes or return_landmark_score: |
| 274 | + return landmarks, landmarks_scores_list, detected_faces |
| 275 | + else: |
| 276 | + return landmarks |
| 277 | + |
| 278 | + def get_landmarks_from_directory(self, path, extensions=['.jpg', '.png'], recursive=True, show_progress_bar=True, |
| 279 | + return_bboxes=False, return_landmark_score=False): |
| 280 | + """Scan a directory for images with a given extension type(s) and predict the landmarks for each |
| 281 | + face present in the images found. |
| 282 | +
|
| 283 | + Arguments: |
| 284 | + path {str} -- path to the target directory containing the images |
| 285 | +
|
| 286 | + Keyword Arguments: |
| 287 | + extensions {list of str} -- list containing the image extensions considered (default: ['.jpg', '.png']) |
| 288 | + recursive {boolean} -- If True, scans for images recursively (default: True) |
| 289 | + show_progress_bar {boolean} -- If True displays a progress bar (default: True) |
| 290 | + return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints. |
| 291 | + return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints. |
| 292 | + """ |
| 293 | + dataset = FolderData(path, self.face_detector.tensor_or_path_to_ndarray, extensions, recursive, self.verbose) |
| 294 | + dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2, prefetch_factor=4) |
| 295 | + |
| 296 | + predictions = {} |
| 297 | + for (image_path, image) in tqdm(dataloader, disable=not show_progress_bar): |
| 298 | + image_path, image = image_path[0], image[0] |
| 299 | + bounding_boxes = self.face_detector.detect_from_image(image) |
| 300 | + if return_bboxes or return_landmark_score: |
| 301 | + preds, bbox, score = self.get_landmarks_from_image( |
| 302 | + image, bounding_boxes, return_bboxes=return_bboxes, return_landmark_score=return_landmark_score) |
| 303 | + predictions[image_path] = (preds, bbox, score) |
| 304 | + else: |
| 305 | + preds = self.get_landmarks_from_image(image, bounding_boxes) |
| 306 | + predictions[image_path] = preds |
| 307 | + |
| 308 | + return predictions |
0 commit comments