Skip to content

Commit

Permalink
Optimize facial detection
Browse files Browse the repository at this point in the history
  • Loading branch information
lldacing committed Feb 19, 2025
1 parent 0d3c85b commit 5f1e91d
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 49 deletions.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Must uninstall or disable `ComfyUI-PuLID-Flux` and other PuLID-Flux nodes before
Need upgrade ComfyUI Version>=0.3.7

## Update logs
### 2025.02.19
- Fix: when selecting a face from multiple faces as a reference, embeddings and alignment features maybe not from the same face.
### 2025.02.18
- Supported selecting a face from multiple faces as a reference. [Example workflow](examples/PuLID_select_ref_face.png).
### 2025.01.27
Expand Down Expand Up @@ -85,9 +87,15 @@ Failed to build insightface
- `small-large`: Sort the area of bbox from small to large.
- `large-small`: Sort the area of bbox from large to small.
- `input_faces_index` - The target index of the sorted bboxes.
- `input_faces_align_mode` - Choose the detection method for aligning facial features.
- `0`: Old version method, When there is a face in an image, the selected facial embedding amount and alignment features maybe not consistent.
- `1`: Keep the selected facial embedding amount and alignment features consistent.
- There is a slight difference between the two mode, with the `align_face` value of `1` resulting smaller area than the `embed_face` value of `0`.
- PulidFluxFaceDetector
- Can check the facial features applied in `ApplyPulidFlux`.
- The `embed_face` and `align_face` should be the same face, but they are generated by different detectors, and the number detected may be not consistent, so they may be not the same face.
- When `input_faces_align_mode = 0`, the `embed_face` and `align_face` should be the same face, but they are generated by different detectors, and the number detected may be not consistent, so they may be not the same face.
- When `input_faces_align_mode = 1`, the `embed_face` and `align_face` are always the same face, they are generated by same detectors.
- `face_bbox_image` - Draw the detected facial bounding box (the result of the `embed_face`'s detector).

## Thanks

Expand Down
10 changes: 9 additions & 1 deletion README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
ComfyUI主体版本需要>=0.3.7

## 更新日志
### 2025.02.19
- 解决多张人脸时选择的人脸嵌入量和对齐特征不是同一个人脸的问题。
### 2025.02.18
- 支持从含有多张脸的图片中选择一张脸作为参考。[示例工作流](examples/PuLID_select_ref_face.png).
### 2025.01.27
Expand Down Expand Up @@ -81,9 +83,15 @@ Failed to build insightface
- `small-large`: 按bbox的面积从小到大排序。
- `large-small`: 按bbox的面积从大到小排序。
- `input_faces_index` - 从排序后的bbox选取的索引号。
- `input_faces_align_mode` - 选择对齐脸部特征的检测方式。
- `0`: 旧版本方式,一张图片中有张脸时选择的脸部嵌入量和对齐特征可能不一致。
- `1`: 保持选择的脸部嵌入量和对齐特征一致。
- 两种出图有细微差别,值`1``align_face`结果图比`0``embed_face`范围小一点。
- PulidFluxFaceDetector
- 用来检查在`ApplyPulidFlux`实际使用的面部特征。
- `embed_face``align_face` 理论上应该是同一张脸,但它们由不同的检测器产生,可能检测到的数量不一致,因此两张图可能不是同一张脸。
- `input_faces_align_mode = 0`时,`embed_face``align_face` 理论上应该是同一张脸,但它们由不同的检测器产生,可能检测到的数量不一致,因此两张图可能不是同一张脸。
- `input_faces_align_mode = 1`时,`embed_face``align_face` 由相同的检测器产生,两张图始终是同一张脸。
- `face_bbox_image` - 画出检测到的脸部边界框(`embed_face`的检测器结果)。

## 感谢

Expand Down
Binary file modified examples/PuLID_select_ref_face.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
40 changes: 37 additions & 3 deletions face_restoration_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def get_face_by_index(det_faces, face_sort_rule, face_index=0):
if not 0 <= face_index < len(sorted_faces):
# 返回第一个
face_index = 0
# 返回选择的脸部、原始索引值和排序后的bbox列表
return sorted_faces[face_index][1], sorted_faces[face_index][0], [face[1].bbox if has_bbox_attr else face[1] for face in sorted_faces]
# 返回选择的脸部、原始索引值和排序后的列表
return sorted_faces[face_index][1], sorted_faces[face_index][0], [face[1] for face in sorted_faces]


def get_largest_face(det_faces, h, w):
Expand Down Expand Up @@ -172,7 +172,8 @@ def get_face_landmarks_5(self,
input_img = cv2.resize(self.input_img, (w, h), interpolation=cv2.INTER_LANCZOS4)

with torch.no_grad():
bboxes = self.face_det.detect_faces(input_img, 0.97) * scale
# use 0.5 (old value is 0.97), keep consistent with Insightface, but still cannot ensure consistent quantity of bboxes.
bboxes = self.face_det.detect_faces(input_img, 0.5) * scale
for bbox in bboxes:
# remove faces with too small eye distance: side faces or too small faces
eye_dist = np.linalg.norm([bbox[5] - bbox[7], bbox[6] - bbox[8]])
Expand Down Expand Up @@ -420,3 +421,36 @@ def clean_all(self):
self.inverse_affine_matrices = []
self.det_faces = []
self.pad_input_imgs = []

def draw_on(img, faces):
dimg = img.copy()
for i in range(len(faces)):
face = faces[i]
box = face.bbox.astype(np.int32)
color = (0, 0, 255)
cv2.rectangle(dimg, (box[0], box[1]), (box[2], box[3]), color, 2)
if face.kps is not None:
kps = face.kps.astype(np.int32)
#print(landmark.shape)
for l in range(kps.shape[0]):
color = (0, 0, 255)
if l == 0 or l == 3:
color = (0, 255, 0)
cv2.circle(dimg, (kps[l][0], kps[l][1]), 1, color,
2)

cv2.putText(dimg,'index: %d'%i, (box[0]-1, box[1]-4),cv2.FONT_HERSHEY_COMPLEX,0.7,(0,255,0),1)

# if face.gender is not None and face.age is not None:
# cv2.putText(dimg,'%s,%d'%(face.sex,face.age), (box[0]-1, box[1]-4),cv2.FONT_HERSHEY_COMPLEX,0.7,(0,255,0),1)

#for key, value in face.items():
# if key.startswith('landmark_3d'):
# print(key, value.shape)
# print(value[0:10,:])
# lmk = np.round(value).astype(np.int)
# for l in range(lmk.shape[0]):
# color = (255, 0, 0)
# cv2.circle(dimg, (lmk[l][0], lmk[l][1]), 1, color,
# 2)
return dimg
122 changes: 79 additions & 43 deletions pulidflux.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import types
import zipfile

import cv2
import torch
from insightface.utils.download import download_file
from insightface.utils.storage import BASE_REPO_URL
from insightface.utils import face_align
from torch import nn
from torchvision import transforms
from torchvision.transforms import functional
Expand All @@ -12,7 +14,7 @@
import folder_paths
import comfy
from insightface.app import FaceAnalysis
from .face_restoration_helper import FaceRestoreHelper, get_face_by_index
from .face_restoration_helper import FaceRestoreHelper, get_face_by_index, draw_on

from comfy import model_management
from .eva_clip.constants import OPENAI_DATASET_MEAN, OPENAI_DATASET_STD
Expand Down Expand Up @@ -280,6 +282,7 @@ def apply_pulid_flux(self, model, pulid_flux, eva_clip, face_analysis, image, we

input_face_sort = options.get('input_faces_order', "large-small")
input_face_index = options.get('input_faces_index', 0)
input_face_align_mode = options.get('input_faces_align_mode', 1)
# Analyse multiple images at multiple sizes and combine largest area embeddings
for i in range(image.shape[0]):
# get insightface embeddings
Expand All @@ -289,26 +292,35 @@ def apply_pulid_flux(self, model, pulid_flux, eva_clip, face_analysis, image, we
face_analysis.det_model.input_size = size
face_info = face_analysis.get(image[i])
if face_info:
face_info, index, bboxes = get_face_by_index(face_info, face_sort_rule=input_face_sort, face_index=input_face_index)
face_info, index, sorted_faces = get_face_by_index(face_info, face_sort_rule=input_face_sort, face_index=input_face_index)
bboxes = [face.bbox for face in sorted_faces]
iface_embeds = torch.from_numpy(face_info.embedding).unsqueeze(0).to(device, dtype=dtype)
break
else:
# No face detected, skip this image
logging.warning(f'Warning: No face detected in image {str(i)}')
continue

# get eva_clip embeddings
face_helper.clean_all()
face_helper.read_image(image[i])
face_helper.get_face_landmarks_5(ref_sort_bboxes=bboxes, face_index=input_face_index)
face_helper.align_warp_face()

if len(face_helper.cropped_faces) == 0:
# No face detected, skip this image
continue

# Get aligned face image
align_face = face_helper.cropped_faces[0]
if input_face_align_mode == 1:
image_size = 512
M = face_align.estimate_norm(face_info.kps, image_size=image_size)
align_face = cv2.warpAffine(image[i], M, (image_size, image_size), borderMode=cv2.BORDER_CONSTANT,
borderValue=(135, 133, 132))
# align_face = face_align.norm_crop(image[i], landmark=face_info.kps, image_size=image_size)
del M
else:
# get eva_clip embeddings
face_helper.clean_all()
face_helper.read_image(image[i])
face_helper.get_face_landmarks_5(ref_sort_bboxes=bboxes, face_index=input_face_index)
face_helper.align_warp_face()

if len(face_helper.cropped_faces) == 0:
# No face detected, skip this image
continue

# Get aligned face image
align_face = face_helper.cropped_faces[0]
# Convert bgr face image to tensor
align_face = image_to_tensor(align_face).unsqueeze(0).permute(0, 3, 1, 2).to(device)
parsing_out = face_helper.face_parse(functional.normalize(align_face, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))[0]
Expand Down Expand Up @@ -341,6 +353,7 @@ def apply_pulid_flux(self, model, pulid_flux, eva_clip, face_analysis, image, we
if not cond:
# No faces detected, return the original model
logging.warning("PuLID warning: No faces detected in any of the given images, returning unmodified model.")
del eva_clip, face_analysis, pulid_flux, face_helper, attn_mask
return (model,)

# average embeddings
Expand Down Expand Up @@ -437,17 +450,25 @@ def INPUT_TYPES(s):
"default": 0, "min": 0, "max": 1000, "step": 1,
"tooltip": "If the value is greater than the size of bboxes, will set value to 0."
}),
"input_faces_align_mode": ("INT",
{
"default": 1, "min": 0, "max": 1, "step": 1,
"tooltip": "Align face mode.\n"
"0: align_face and embed_face use different detectors. The results maybe different.\n"
"1: align_face and embed_face use the same detector."
}),
}
}

RETURN_TYPES = ("OPTIONS",)
FUNCTION = "execute"
CATEGORY = "pulid"

def execute(self,input_faces_order, input_faces_index):
def execute(self,input_faces_order, input_faces_index, input_faces_align_mode=1):
options: dict = {
"input_faces_order": input_faces_order,
"input_faces_index": input_faces_index,
"input_faces_align_mode": input_faces_align_mode,
}
return (options, )

Expand All @@ -463,68 +484,83 @@ def INPUT_TYPES(s):
}
}

RETURN_TYPES = ("IMAGE", "IMAGE",)
RETURN_NAMES = ("embed_face", "align_face")
RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE",)
RETURN_NAMES = ("embed_face", "align_face", "face_bbox_image",)
FUNCTION = "execute"
CATEGORY = "pulid"
OUTPUT_IS_LIST = (True, True,)
OUTPUT_IS_LIST = (True, True, True,)

def execute(self, face_analysis, image, options):

device = comfy.model_management.get_torch_device()
face_helper = FaceRestoreHelper(
upscale_factor=1,
face_size=512,
crop_ratio=(1, 1),
det_model='retinaface_resnet50',
parsing_model='bisenet',
save_ext='png',
device=device,
model_rootpath=FACEXLIB_DIR
)

input_face_sort = options.get('input_faces_order', "large-small")
input_face_index = options.get('input_faces_index', 0)
input_face_align_mode = options.get('input_faces_align_mode', 1)

if input_face_align_mode == 0:
face_helper = FaceRestoreHelper(
upscale_factor=1,
face_size=512,
crop_ratio=(1, 1),
det_model='retinaface_resnet50',
parsing_model='bisenet',
save_ext='png',
device=device,
model_rootpath=FACEXLIB_DIR
)

# Analyse multiple images at multiple sizes and combine largest area embeddings
embed_faces=[]
align_faces=[]
draw_embed_face_bbox=[]
image = tensor_to_image(image)
for i in range(image.shape[0]):
bboxes = []
for size in [(size, size) for size in range(640, 256, -64)]:
face_analysis.det_model.input_size = size
face_info = face_analysis.get(image[i])
if face_info:
face_info, index, bboxes = get_face_by_index(face_info, face_sort_rule=input_face_sort,
face_info, index, sorted_faces = get_face_by_index(face_info, face_sort_rule=input_face_sort,
face_index=input_face_index)
bboxes = [face.bbox for face in sorted_faces]
embed_faces.append(crop_image(image[i], face_info.bbox, margin=10))
draw_embed_face_bbox.append(image_to_tensor(draw_on(image[i], sorted_faces)).unsqueeze(0))
break
else:
# No face detected, skip this image
logging.warning(f'Warning: No face detected in image {str(i)}')
continue

# get eva_clip embeddings
face_helper.clean_all()
face_helper.read_image(image[i])
face_helper.get_face_landmarks_5(ref_sort_bboxes=bboxes, face_index=input_face_index)
face_helper.align_warp_face()

if len(face_helper.cropped_faces) == 0:
# No face detected, skip this image
continue

# Get aligned face image
align_face = face_helper.cropped_faces[0]
if input_face_align_mode == 1:
image_size = 512
M = face_align.estimate_norm(face_info.kps, image_size=image_size)
align_face = cv2.warpAffine(image[i], M, (image_size, image_size), borderMode=cv2.BORDER_CONSTANT, borderValue=(135, 133, 132))
# align_face = face_align.norm_crop(image[i], landmark=face_info.kps, image_size=image_size)
del M
else:
# get eva_clip embeddings
face_helper.clean_all()
face_helper.read_image(image[i])
face_helper.get_face_landmarks_5(ref_sort_bboxes=bboxes, face_index=input_face_index)
face_helper.align_warp_face()

if len(face_helper.cropped_faces) == 0:
# No face detected, skip this image
continue

# Get aligned face image
align_face = face_helper.cropped_faces[0]
del face_helper
align_faces.append(image_to_tensor(align_face).unsqueeze(0))
del bboxes, align_face
del face_helper, image
del image
if len(embed_faces) == 0:
# No face detected, skip this image
logging.warning(f'Warning: No embed face detected in image')
if len(align_faces) == 0:
logging.warning(f'Warning: No align face detected in image')
return embed_faces, align_faces,
return embed_faces, align_faces, draw_embed_face_bbox,


def crop_image(image, bbox, margin=0):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "comfyui_pulid_flux_ll"
description = "The implementation for PuLID-Flux, support use with TeaCache and WaveSpeed, no model pollution."
version = "1.1.3"
version = "1.1.4"
license = {file = "LICENSE"}
dependencies = ['cython', 'facexlib', 'insightface', 'onnxruntime', 'onnxruntime-gpu; sys_platform != "darwin" and (platform_machine == "x86_64" or platform_machine == "AMD64")', 'ftfy', 'timm']

Expand Down

0 comments on commit 5f1e91d

Please sign in to comment.