diff --git a/README.md b/README.md index 0377972..f3b1c93 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ Must uninstall or disable `ComfyUI-PuLID-Flux` and other PuLID-Flux nodes before Need upgrade ComfyUI Version>=0.3.7 ## Update logs +### 2025.02.19 +- Fix: when selecting a face from multiple faces as a reference, embeddings and alignment features maybe not from the same face. ### 2025.02.18 - Supported selecting a face from multiple faces as a reference. [Example workflow](examples/PuLID_select_ref_face.png). ### 2025.01.27 @@ -85,9 +87,15 @@ Failed to build insightface - `small-large`: Sort the area of bbox from small to large. - `large-small`: Sort the area of bbox from large to small. - `input_faces_index` - The target index of the sorted bboxes. + - `input_faces_align_mode` - Choose the detection method for aligning facial features. + - `0`: Old version method, When there is a face in an image, the selected facial embedding amount and alignment features maybe not consistent. + - `1`: Keep the selected facial embedding amount and alignment features consistent. + - There is a slight difference between the two mode, with the `align_face` value of `1` resulting smaller area than the `embed_face` value of `0`. - PulidFluxFaceDetector - Can check the facial features applied in `ApplyPulidFlux`. - - The `embed_face` and `align_face` should be the same face, but they are generated by different detectors, and the number detected may be not consistent, so they may be not the same face. + - When `input_faces_align_mode = 0`, the `embed_face` and `align_face` should be the same face, but they are generated by different detectors, and the number detected may be not consistent, so they may be not the same face. + - When `input_faces_align_mode = 1`, the `embed_face` and `align_face` are always the same face, they are generated by same detectors. + - `face_bbox_image` - Draw the detected facial bounding box (the result of the `embed_face`'s detector). ## Thanks diff --git a/README_CN.md b/README_CN.md index b452895..391c08c 100644 --- a/README_CN.md +++ b/README_CN.md @@ -10,6 +10,8 @@ ComfyUI主体版本需要>=0.3.7 ## 更新日志 +### 2025.02.19 +- 解决多张人脸时选择的人脸嵌入量和对齐特征不是同一个人脸的问题。 ### 2025.02.18 - 支持从含有多张脸的图片中选择一张脸作为参考。[示例工作流](examples/PuLID_select_ref_face.png). ### 2025.01.27 @@ -81,9 +83,15 @@ Failed to build insightface - `small-large`: 按bbox的面积从小到大排序。 - `large-small`: 按bbox的面积从大到小排序。 - `input_faces_index` - 从排序后的bbox选取的索引号。 + - `input_faces_align_mode` - 选择对齐脸部特征的检测方式。 + - `0`: 旧版本方式,一张图片中有张脸时选择的脸部嵌入量和对齐特征可能不一致。 + - `1`: 保持选择的脸部嵌入量和对齐特征一致。 + - 两种出图有细微差别,值`1`的`align_face`结果图比`0`的`embed_face`范围小一点。 - PulidFluxFaceDetector - 用来检查在`ApplyPulidFlux`实际使用的面部特征。 - - `embed_face` 和 `align_face` 理论上应该是同一张脸,但它们由不同的检测器产生,可能检测到的数量不一致,因此两张图可能不是同一张脸。 + - `input_faces_align_mode = 0`时,`embed_face` 和 `align_face` 理论上应该是同一张脸,但它们由不同的检测器产生,可能检测到的数量不一致,因此两张图可能不是同一张脸。 + - `input_faces_align_mode = 1`时,`embed_face` 和 `align_face` 由相同的检测器产生,两张图始终是同一张脸。 + - `face_bbox_image` - 画出检测到的脸部边界框(`embed_face`的检测器结果)。 ## 感谢 diff --git a/examples/PuLID_select_ref_face.png b/examples/PuLID_select_ref_face.png index 1ab9bda..d6b22fb 100644 Binary files a/examples/PuLID_select_ref_face.png and b/examples/PuLID_select_ref_face.png differ diff --git a/face_restoration_helper.py b/face_restoration_helper.py index d73ed1a..cc4e28b 100644 --- a/face_restoration_helper.py +++ b/face_restoration_helper.py @@ -37,8 +37,8 @@ def get_face_by_index(det_faces, face_sort_rule, face_index=0): if not 0 <= face_index < len(sorted_faces): # 返回第一个 face_index = 0 - # 返回选择的脸部、原始索引值和排序后的bbox列表 - return sorted_faces[face_index][1], sorted_faces[face_index][0], [face[1].bbox if has_bbox_attr else face[1] for face in sorted_faces] + # 返回选择的脸部、原始索引值和排序后的列表 + return sorted_faces[face_index][1], sorted_faces[face_index][0], [face[1] for face in sorted_faces] def get_largest_face(det_faces, h, w): @@ -172,7 +172,8 @@ def get_face_landmarks_5(self, input_img = cv2.resize(self.input_img, (w, h), interpolation=cv2.INTER_LANCZOS4) with torch.no_grad(): - bboxes = self.face_det.detect_faces(input_img, 0.97) * scale + # use 0.5 (old value is 0.97), keep consistent with Insightface, but still cannot ensure consistent quantity of bboxes. + bboxes = self.face_det.detect_faces(input_img, 0.5) * scale for bbox in bboxes: # remove faces with too small eye distance: side faces or too small faces eye_dist = np.linalg.norm([bbox[5] - bbox[7], bbox[6] - bbox[8]]) @@ -420,3 +421,36 @@ def clean_all(self): self.inverse_affine_matrices = [] self.det_faces = [] self.pad_input_imgs = [] + +def draw_on(img, faces): + dimg = img.copy() + for i in range(len(faces)): + face = faces[i] + box = face.bbox.astype(np.int32) + color = (0, 0, 255) + cv2.rectangle(dimg, (box[0], box[1]), (box[2], box[3]), color, 2) + if face.kps is not None: + kps = face.kps.astype(np.int32) + #print(landmark.shape) + for l in range(kps.shape[0]): + color = (0, 0, 255) + if l == 0 or l == 3: + color = (0, 255, 0) + cv2.circle(dimg, (kps[l][0], kps[l][1]), 1, color, + 2) + + cv2.putText(dimg,'index: %d'%i, (box[0]-1, box[1]-4),cv2.FONT_HERSHEY_COMPLEX,0.7,(0,255,0),1) + + # if face.gender is not None and face.age is not None: + # cv2.putText(dimg,'%s,%d'%(face.sex,face.age), (box[0]-1, box[1]-4),cv2.FONT_HERSHEY_COMPLEX,0.7,(0,255,0),1) + + #for key, value in face.items(): + # if key.startswith('landmark_3d'): + # print(key, value.shape) + # print(value[0:10,:]) + # lmk = np.round(value).astype(np.int) + # for l in range(lmk.shape[0]): + # color = (255, 0, 0) + # cv2.circle(dimg, (lmk[l][0], lmk[l][1]), 1, color, + # 2) + return dimg \ No newline at end of file diff --git a/pulidflux.py b/pulidflux.py index b3376f2..afe96eb 100644 --- a/pulidflux.py +++ b/pulidflux.py @@ -1,9 +1,11 @@ import types import zipfile +import cv2 import torch from insightface.utils.download import download_file from insightface.utils.storage import BASE_REPO_URL +from insightface.utils import face_align from torch import nn from torchvision import transforms from torchvision.transforms import functional @@ -12,7 +14,7 @@ import folder_paths import comfy from insightface.app import FaceAnalysis -from .face_restoration_helper import FaceRestoreHelper, get_face_by_index +from .face_restoration_helper import FaceRestoreHelper, get_face_by_index, draw_on from comfy import model_management from .eva_clip.constants import OPENAI_DATASET_MEAN, OPENAI_DATASET_STD @@ -280,6 +282,7 @@ def apply_pulid_flux(self, model, pulid_flux, eva_clip, face_analysis, image, we input_face_sort = options.get('input_faces_order', "large-small") input_face_index = options.get('input_faces_index', 0) + input_face_align_mode = options.get('input_faces_align_mode', 1) # Analyse multiple images at multiple sizes and combine largest area embeddings for i in range(image.shape[0]): # get insightface embeddings @@ -289,7 +292,8 @@ def apply_pulid_flux(self, model, pulid_flux, eva_clip, face_analysis, image, we face_analysis.det_model.input_size = size face_info = face_analysis.get(image[i]) if face_info: - face_info, index, bboxes = get_face_by_index(face_info, face_sort_rule=input_face_sort, face_index=input_face_index) + face_info, index, sorted_faces = get_face_by_index(face_info, face_sort_rule=input_face_sort, face_index=input_face_index) + bboxes = [face.bbox for face in sorted_faces] iface_embeds = torch.from_numpy(face_info.embedding).unsqueeze(0).to(device, dtype=dtype) break else: @@ -297,18 +301,26 @@ def apply_pulid_flux(self, model, pulid_flux, eva_clip, face_analysis, image, we logging.warning(f'Warning: No face detected in image {str(i)}') continue - # get eva_clip embeddings - face_helper.clean_all() - face_helper.read_image(image[i]) - face_helper.get_face_landmarks_5(ref_sort_bboxes=bboxes, face_index=input_face_index) - face_helper.align_warp_face() - - if len(face_helper.cropped_faces) == 0: - # No face detected, skip this image - continue - - # Get aligned face image - align_face = face_helper.cropped_faces[0] + if input_face_align_mode == 1: + image_size = 512 + M = face_align.estimate_norm(face_info.kps, image_size=image_size) + align_face = cv2.warpAffine(image[i], M, (image_size, image_size), borderMode=cv2.BORDER_CONSTANT, + borderValue=(135, 133, 132)) + # align_face = face_align.norm_crop(image[i], landmark=face_info.kps, image_size=image_size) + del M + else: + # get eva_clip embeddings + face_helper.clean_all() + face_helper.read_image(image[i]) + face_helper.get_face_landmarks_5(ref_sort_bboxes=bboxes, face_index=input_face_index) + face_helper.align_warp_face() + + if len(face_helper.cropped_faces) == 0: + # No face detected, skip this image + continue + + # Get aligned face image + align_face = face_helper.cropped_faces[0] # Convert bgr face image to tensor align_face = image_to_tensor(align_face).unsqueeze(0).permute(0, 3, 1, 2).to(device) parsing_out = face_helper.face_parse(functional.normalize(align_face, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))[0] @@ -341,6 +353,7 @@ def apply_pulid_flux(self, model, pulid_flux, eva_clip, face_analysis, image, we if not cond: # No faces detected, return the original model logging.warning("PuLID warning: No faces detected in any of the given images, returning unmodified model.") + del eva_clip, face_analysis, pulid_flux, face_helper, attn_mask return (model,) # average embeddings @@ -437,6 +450,13 @@ def INPUT_TYPES(s): "default": 0, "min": 0, "max": 1000, "step": 1, "tooltip": "If the value is greater than the size of bboxes, will set value to 0." }), + "input_faces_align_mode": ("INT", + { + "default": 1, "min": 0, "max": 1, "step": 1, + "tooltip": "Align face mode.\n" + "0: align_face and embed_face use different detectors. The results maybe different.\n" + "1: align_face and embed_face use the same detector." + }), } } @@ -444,10 +464,11 @@ def INPUT_TYPES(s): FUNCTION = "execute" CATEGORY = "pulid" - def execute(self,input_faces_order, input_faces_index): + def execute(self,input_faces_order, input_faces_index, input_faces_align_mode=1): options: dict = { "input_faces_order": input_faces_order, "input_faces_index": input_faces_index, + "input_faces_align_mode": input_faces_align_mode, } return (options, ) @@ -463,31 +484,36 @@ def INPUT_TYPES(s): } } - RETURN_TYPES = ("IMAGE", "IMAGE",) - RETURN_NAMES = ("embed_face", "align_face") + RETURN_TYPES = ("IMAGE", "IMAGE", "IMAGE",) + RETURN_NAMES = ("embed_face", "align_face", "face_bbox_image",) FUNCTION = "execute" CATEGORY = "pulid" - OUTPUT_IS_LIST = (True, True,) + OUTPUT_IS_LIST = (True, True, True,) def execute(self, face_analysis, image, options): device = comfy.model_management.get_torch_device() - face_helper = FaceRestoreHelper( - upscale_factor=1, - face_size=512, - crop_ratio=(1, 1), - det_model='retinaface_resnet50', - parsing_model='bisenet', - save_ext='png', - device=device, - model_rootpath=FACEXLIB_DIR - ) input_face_sort = options.get('input_faces_order', "large-small") input_face_index = options.get('input_faces_index', 0) + input_face_align_mode = options.get('input_faces_align_mode', 1) + + if input_face_align_mode == 0: + face_helper = FaceRestoreHelper( + upscale_factor=1, + face_size=512, + crop_ratio=(1, 1), + det_model='retinaface_resnet50', + parsing_model='bisenet', + save_ext='png', + device=device, + model_rootpath=FACEXLIB_DIR + ) + # Analyse multiple images at multiple sizes and combine largest area embeddings embed_faces=[] align_faces=[] + draw_embed_face_bbox=[] image = tensor_to_image(image) for i in range(image.shape[0]): bboxes = [] @@ -495,36 +521,46 @@ def execute(self, face_analysis, image, options): face_analysis.det_model.input_size = size face_info = face_analysis.get(image[i]) if face_info: - face_info, index, bboxes = get_face_by_index(face_info, face_sort_rule=input_face_sort, + face_info, index, sorted_faces = get_face_by_index(face_info, face_sort_rule=input_face_sort, face_index=input_face_index) + bboxes = [face.bbox for face in sorted_faces] embed_faces.append(crop_image(image[i], face_info.bbox, margin=10)) + draw_embed_face_bbox.append(image_to_tensor(draw_on(image[i], sorted_faces)).unsqueeze(0)) break else: # No face detected, skip this image logging.warning(f'Warning: No face detected in image {str(i)}') continue - # get eva_clip embeddings - face_helper.clean_all() - face_helper.read_image(image[i]) - face_helper.get_face_landmarks_5(ref_sort_bboxes=bboxes, face_index=input_face_index) - face_helper.align_warp_face() - - if len(face_helper.cropped_faces) == 0: - # No face detected, skip this image - continue - - # Get aligned face image - align_face = face_helper.cropped_faces[0] + if input_face_align_mode == 1: + image_size = 512 + M = face_align.estimate_norm(face_info.kps, image_size=image_size) + align_face = cv2.warpAffine(image[i], M, (image_size, image_size), borderMode=cv2.BORDER_CONSTANT, borderValue=(135, 133, 132)) + # align_face = face_align.norm_crop(image[i], landmark=face_info.kps, image_size=image_size) + del M + else: + # get eva_clip embeddings + face_helper.clean_all() + face_helper.read_image(image[i]) + face_helper.get_face_landmarks_5(ref_sort_bboxes=bboxes, face_index=input_face_index) + face_helper.align_warp_face() + + if len(face_helper.cropped_faces) == 0: + # No face detected, skip this image + continue + + # Get aligned face image + align_face = face_helper.cropped_faces[0] + del face_helper align_faces.append(image_to_tensor(align_face).unsqueeze(0)) del bboxes, align_face - del face_helper, image + del image if len(embed_faces) == 0: # No face detected, skip this image logging.warning(f'Warning: No embed face detected in image') if len(align_faces) == 0: logging.warning(f'Warning: No align face detected in image') - return embed_faces, align_faces, + return embed_faces, align_faces, draw_embed_face_bbox, def crop_image(image, bbox, margin=0): diff --git a/pyproject.toml b/pyproject.toml index c9ae543..d974354 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "comfyui_pulid_flux_ll" description = "The implementation for PuLID-Flux, support use with TeaCache and WaveSpeed, no model pollution." -version = "1.1.3" +version = "1.1.4" license = {file = "LICENSE"} dependencies = ['cython', 'facexlib', 'insightface', 'onnxruntime', 'onnxruntime-gpu; sys_platform != "darwin" and (platform_machine == "x86_64" or platform_machine == "AMD64")', 'ftfy', 'timm']