Skip to content

Commit 49c2d9c

Browse files
committed
update
1 parent 3130fae commit 49c2d9c

File tree

3 files changed

+312
-209
lines changed

3 files changed

+312
-209
lines changed
Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
"""
2+
Modified from face-alignment v1.4.1 api.py
3+
Original source: https://github.com/1adrianb/face-alignment
4+
License: BSD-3-Clause License
5+
"""
6+
import torch
7+
import warnings
8+
from enum import IntEnum
9+
from skimage import io
10+
import numpy as np
11+
from packaging import version
12+
from tqdm import tqdm
13+
14+
from face_alignment.utils import *
15+
from face_alignment.folder_data import FolderData
16+
from face_alignment.detection import sfd
17+
18+
class LandmarksType(IntEnum):
19+
"""Enum class defining the type of landmarks to detect.
20+
21+
``TWO_D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face
22+
``TWO_HALF_D`` - this points represent the projection of the 3D points into 3D
23+
``THREE_D`` - detect the points ``(x,y,z)``` in a 3D space
24+
25+
"""
26+
TWO_D = 1
27+
TWO_HALF_D = 2
28+
THREE_D = 3
29+
30+
31+
class NetworkSize(IntEnum):
32+
# TINY = 1
33+
# SMALL = 2
34+
# MEDIUM = 3
35+
LARGE = 4
36+
37+
38+
default_model_urls = {
39+
'2DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/2DFAN4-cd938726ad.zip',
40+
'3DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/3DFAN4-4a694010b9.zip',
41+
'depth': 'https://www.adrianbulat.com/downloads/python-fan/depth-6c4283c0e0.zip',
42+
}
43+
44+
models_urls = {
45+
'1.6': {
46+
'2DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/2DFAN4_1.6-c827573f02.zip',
47+
'3DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/3DFAN4_1.6-ec5cf40a1d.zip',
48+
'depth': 'https://www.adrianbulat.com/downloads/python-fan/depth_1.6-2aa3f18772.zip',
49+
},
50+
'1.5': {
51+
'2DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/2DFAN4_1.5-a60332318a.zip',
52+
'3DFAN-4': 'https://www.adrianbulat.com/downloads/python-fan/3DFAN4_1.5-176570af4d.zip',
53+
'depth': 'https://www.adrianbulat.com/downloads/python-fan/depth_1.5-bc10f98e39.zip',
54+
},
55+
}
56+
57+
58+
class FaceAlignment:
59+
def __init__(self, landmarks_type, network_size=NetworkSize.LARGE, net_path=None,
60+
device='cuda', dtype=torch.float32, flip_input=False, face_detector_kwargs=None, verbose=False):
61+
self.device = device
62+
self.flip_input = flip_input
63+
self.landmarks_type = landmarks_type
64+
self.verbose = verbose
65+
self.dtype = dtype
66+
67+
if version.parse(torch.__version__) < version.parse('1.5.0'):
68+
raise ImportError(f'Unsupported pytorch version detected. Minimum supported version of pytorch: 1.5.0\
69+
Either upgrade (recommended) your pytorch setup, or downgrade to face-alignment 1.2.0')
70+
71+
network_size = int(network_size)
72+
pytorch_version = torch.__version__
73+
if 'dev' in pytorch_version:
74+
pytorch_version = pytorch_version.rsplit('.', 2)[0]
75+
else:
76+
pytorch_version = pytorch_version.rsplit('.', 1)[0]
77+
78+
if 'cuda' in device:
79+
torch.backends.cudnn.benchmark = True
80+
81+
# Get the face detector
82+
# face_detector_module = __import__('face_alignment.detection.' + face_detector,
83+
# globals(), locals(), [face_detector], 0)
84+
face_detector_kwargs = face_detector_kwargs or {}
85+
self.face_detector = sfd.FaceDetector(device=device, verbose=verbose, **face_detector_kwargs)
86+
87+
# Initialise the face alignemnt networks
88+
if landmarks_type == LandmarksType.TWO_D:
89+
network_name = '2DFAN-' + str(network_size)
90+
else:
91+
network_name = '3DFAN-' + str(network_size)
92+
if net_path is None:
93+
net_path = load_file_from_url(models_urls.get(pytorch_version, default_model_urls)[network_name])
94+
self.face_alignment_net = torch.jit.load(net_path)
95+
96+
self.face_alignment_net.to(device, dtype=dtype)
97+
self.face_alignment_net.eval()
98+
99+
# Initialiase the depth prediciton network
100+
if landmarks_type == LandmarksType.THREE_D:
101+
self.depth_prediciton_net = torch.jit.load(
102+
load_file_from_url(models_urls.get(pytorch_version, default_model_urls)['depth']))
103+
104+
self.depth_prediciton_net.to(device, dtype=dtype)
105+
self.depth_prediciton_net.eval()
106+
107+
def get_landmarks(self, image_or_path, detected_faces=None, return_bboxes=False, return_landmark_score=False):
108+
"""Deprecated, please use get_landmarks_from_image
109+
110+
Arguments:
111+
image_or_path {string or numpy.array or torch.tensor} -- The input image or path to it
112+
113+
Keyword Arguments:
114+
detected_faces {list of numpy.array} -- list of bounding boxes, one for each face found
115+
in the image (default: {None})
116+
return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints.
117+
return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints.
118+
"""
119+
return self.get_landmarks_from_image(image_or_path, detected_faces, return_bboxes, return_landmark_score)
120+
121+
@torch.no_grad()
122+
def get_landmarks_from_image(self, image_or_path, detected_faces=None, return_bboxes=False,
123+
return_landmark_score=False):
124+
"""Predict the landmarks for each face present in the image.
125+
126+
This function predicts a set of 68 2D or 3D images, one for each image present.
127+
If detect_faces is None the method will also run a face detector.
128+
129+
Arguments:
130+
image_or_path {string or numpy.array or torch.tensor} -- The input image or path to it.
131+
132+
Keyword Arguments:
133+
detected_faces {list of numpy.array} -- list of bounding boxes, one for each face found
134+
in the image (default: {None})
135+
return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints.
136+
return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints.
137+
138+
Return:
139+
result:
140+
1. if both return_bboxes and return_landmark_score are False, result will be:
141+
landmark
142+
2. Otherwise, result will be one of the following, depending on the actual value of return_* arguments.
143+
(landmark, landmark_score, detected_face)
144+
(landmark, None, detected_face)
145+
(landmark, landmark_score, None )
146+
"""
147+
image = get_image(image_or_path)
148+
149+
if detected_faces is None:
150+
detected_faces = self.face_detector.detect_from_image(image.copy())
151+
152+
if len(detected_faces) == 0:
153+
warnings.warn("No faces were detected.")
154+
if return_bboxes or return_landmark_score:
155+
return None, None, None
156+
else:
157+
return None
158+
159+
landmarks = []
160+
landmarks_scores = []
161+
for i, d in enumerate(detected_faces):
162+
center = torch.tensor(
163+
[d[2] - (d[2] - d[0]) / 2.0, d[3] - (d[3] - d[1]) / 2.0])
164+
center[1] = center[1] - (d[3] - d[1]) * 0.12
165+
scale = (d[2] - d[0] + d[3] - d[1]) / self.face_detector.reference_scale
166+
167+
inp = crop(image, center, scale)
168+
inp = torch.from_numpy(inp.transpose(
169+
(2, 0, 1))).float()
170+
171+
inp = inp.to(self.device, dtype=self.dtype)
172+
inp.div_(255.0).unsqueeze_(0)
173+
174+
out = self.face_alignment_net(inp).detach()
175+
if self.flip_input:
176+
out += flip(self.face_alignment_net(flip(inp)).detach(), is_label=True)
177+
out = out.to(device='cpu', dtype=torch.float32).numpy()
178+
179+
pts, pts_img, scores = get_preds_fromhm(out, center.numpy(), scale)
180+
pts, pts_img = torch.from_numpy(pts), torch.from_numpy(pts_img)
181+
pts, pts_img = pts.view(68, 2) * 4, pts_img.view(68, 2)
182+
scores = scores.squeeze(0)
183+
184+
if self.landmarks_type == LandmarksType.THREE_D:
185+
heatmaps = np.zeros((68, 256, 256), dtype=np.float32)
186+
for i in range(68):
187+
if pts[i, 0] > 0 and pts[i, 1] > 0:
188+
heatmaps[i] = draw_gaussian(
189+
heatmaps[i], pts[i], 2)
190+
heatmaps = torch.from_numpy(
191+
heatmaps).unsqueeze_(0)
192+
193+
heatmaps = heatmaps.to(self.device, dtype=self.dtype)
194+
depth_pred = self.depth_prediciton_net(
195+
torch.cat((inp, heatmaps), 1)).data.cpu().view(68, 1).to(dtype=torch.float32)
196+
pts_img = torch.cat(
197+
(pts_img, depth_pred * (1.0 / (256.0 / (200.0 * scale)))), 1)
198+
199+
landmarks.append(pts_img.numpy())
200+
landmarks_scores.append(scores)
201+
202+
if not return_bboxes:
203+
detected_faces = None
204+
if not return_landmark_score:
205+
landmarks_scores = None
206+
if return_bboxes or return_landmark_score:
207+
return landmarks, landmarks_scores, detected_faces
208+
else:
209+
return landmarks
210+
211+
@torch.no_grad()
212+
def get_landmarks_from_batch(self, image_batch, detected_faces=None, return_bboxes=False,
213+
return_landmark_score=False):
214+
"""Predict the landmarks for each face present in the image.
215+
216+
This function predicts a set of 68 2D or 3D images, one for each image in a batch in parallel.
217+
If detect_faces is None the method will also run a face detector.
218+
219+
Arguments:
220+
image_batch {torch.tensor} -- The input images batch
221+
222+
Keyword Arguments:
223+
detected_faces {list of numpy.array} -- list of bounding boxes, one for each face found
224+
in the image (default: {None})
225+
return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints.
226+
return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints.
227+
228+
Return:
229+
result:
230+
1. if both return_bboxes and return_landmark_score are False, result will be:
231+
landmarks
232+
2. Otherwise, result will be one of the following, depending on the actual value of return_* arguments.
233+
(landmark, landmark_score, detected_face)
234+
(landmark, None, detected_face)
235+
(landmark, landmark_score, None )
236+
"""
237+
238+
if detected_faces is None:
239+
detected_faces = self.face_detector.detect_from_batch(image_batch)
240+
241+
if len(detected_faces) == 0:
242+
warnings.warn("No faces were detected.")
243+
if return_bboxes or return_landmark_score:
244+
return None, None, None
245+
else:
246+
return None
247+
248+
landmarks = []
249+
landmarks_scores_list = []
250+
# A batch for each frame
251+
for i, faces in enumerate(detected_faces):
252+
res = self.get_landmarks_from_image(
253+
image_batch[i].cpu().numpy().transpose(1, 2, 0),
254+
detected_faces=faces,
255+
return_landmark_score=return_landmark_score,
256+
)
257+
if return_landmark_score:
258+
landmark_set, landmarks_scores, _ = res
259+
landmarks_scores_list.append(landmarks_scores)
260+
else:
261+
landmark_set = res
262+
# Bacward compatibility
263+
if landmark_set is not None:
264+
landmark_set = np.concatenate(landmark_set, axis=0)
265+
else:
266+
landmark_set = []
267+
landmarks.append(landmark_set)
268+
269+
if not return_bboxes:
270+
detected_faces = None
271+
if not return_landmark_score:
272+
landmarks_scores_list = None
273+
if return_bboxes or return_landmark_score:
274+
return landmarks, landmarks_scores_list, detected_faces
275+
else:
276+
return landmarks
277+
278+
def get_landmarks_from_directory(self, path, extensions=['.jpg', '.png'], recursive=True, show_progress_bar=True,
279+
return_bboxes=False, return_landmark_score=False):
280+
"""Scan a directory for images with a given extension type(s) and predict the landmarks for each
281+
face present in the images found.
282+
283+
Arguments:
284+
path {str} -- path to the target directory containing the images
285+
286+
Keyword Arguments:
287+
extensions {list of str} -- list containing the image extensions considered (default: ['.jpg', '.png'])
288+
recursive {boolean} -- If True, scans for images recursively (default: True)
289+
show_progress_bar {boolean} -- If True displays a progress bar (default: True)
290+
return_bboxes {boolean} -- If True, return the face bounding boxes in addition to the keypoints.
291+
return_landmark_score {boolean} -- If True, return the keypoint scores along with the keypoints.
292+
"""
293+
dataset = FolderData(path, self.face_detector.tensor_or_path_to_ndarray, extensions, recursive, self.verbose)
294+
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=2, prefetch_factor=4)
295+
296+
predictions = {}
297+
for (image_path, image) in tqdm(dataloader, disable=not show_progress_bar):
298+
image_path, image = image_path[0], image[0]
299+
bounding_boxes = self.face_detector.detect_from_image(image)
300+
if return_bboxes or return_landmark_score:
301+
preds, bbox, score = self.get_landmarks_from_image(
302+
image, bounding_boxes, return_bboxes=return_bboxes, return_landmark_score=return_landmark_score)
303+
predictions[image_path] = (preds, bbox, score)
304+
else:
305+
preds = self.get_landmarks_from_image(image, bounding_boxes)
306+
predictions[image_path] = preds
307+
308+
return predictions

FunCineForge/speaker_diarization/local/vision_tools/lip_detection.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import os
22
import cv2
3-
from face_alignment import FaceAlignment, LandmarksType
3+
from .api import FaceAlignment, LandmarksType
44
import numpy as np
55

66
class LipDetector:
77
"""
8-
在 face crop 上检测嘴巴位置。
9-
使用 face_alignment 包的 FAN 模型实现
8+
在 face crop 上检测唇部位置,
9+
基于修改的 face_alignment api 调用 FAN 模型实现
1010
"""
1111

1212
def __init__(
@@ -23,7 +23,7 @@ def __init__(
2323

2424
if model_dir is not None:
2525
model_path = os.path.join(model_dir, 'fun_2d.pth')
26-
net_path = os.path.join(model_dir, 'fun_2d.zip')
26+
net_path = os.path.join(model_dir, 'fun_2d.zip') # 使用预下载模型避免长时间下载
2727
print(f"Loading FAN model from {model_path} on {device_str}...")
2828
else:
2929
model_path = None

0 commit comments

Comments
 (0)