Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions yolov8n/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
yolo_face_env/
1 change: 1 addition & 0 deletions yolov8n/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

64 changes: 64 additions & 0 deletions yolov8n/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
appnope==0.1.4
asttokens==3.0.0
certifi==2025.8.3
charset-normalizer==3.4.3
comm==0.2.2
contourpy==1.3.3
cycler==0.12.1
debugpy==1.8.13
decorator==5.2.1
executing==2.2.0
filelock==3.19.1
fonttools==4.59.2
fsspec==2025.9.0
idna==3.10
ipykernel==6.29.5
ipython==9.1.0
ipython_pygments_lexers==1.1.1
jedi==0.19.2
Jinja2==3.1.6
jupyter_client==8.6.3
jupyter_core==5.7.2
kiwisolver==1.4.9
MarkupSafe==3.0.2
matplotlib==3.10.6
matplotlib-inline==0.1.7
mpmath==1.3.0
nest-asyncio==1.6.0
networkx==3.5
numpy==2.2.5
opencv-python==4.12.0.88
packaging==24.2
pandas==2.2.3
parso==0.8.4
pexpect==4.9.0
pillow==11.3.0
platformdirs==4.3.7
polars==1.33.1
prompt_toolkit==3.0.50
psutil==7.0.0
ptyprocess==0.7.0
pure_eval==0.2.3
Pygments==2.19.1
pyparsing==3.2.3
python-dateutil==2.9.0.post0
pytz==2025.2
PyYAML==6.0.2
pyzmq==26.4.0
requests==2.32.5
scipy==1.16.2
setuptools==80.9.0
six==1.17.0
stack-data==0.6.3
sympy==1.14.0
torch==2.8.0
torchvision==0.23.0
tornado==6.4.2
traitlets==5.14.3
typing_extensions==4.15.0
tzdata==2025.2
ultralytics==8.3.199
ultralytics-thop==2.0.17
urllib3==2.5.0
wcwidth==0.2.13
wheel @ file:///opt/homebrew/Cellar/python%403.13/3.13.1/libexec/wheel-0.45.1-py3-none-any.whl#sha256=da46333d5dcbde6e20cf7e2f8fff9e9ce76e8c94dc4afd6fb95fc4bc2745fb5e
200 changes: 200 additions & 0 deletions yolov8n/yolo_mediapipe_face.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
import argparse
import time
from pathlib import Path

import cv2
import numpy as np
import torch
from ultralytics import YOLO
import mediapipe as mp


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="YOLO + MediaPipe ์–ผ๊ตด ๊ฒ€์ถœ ๋ฐ ๋žœ๋“œ๋งˆํฌ")
parser.add_argument(
"--model",
type=str,
default="face_yolov8n.pt",
help="YOLO face ๋ชจ๋ธ ๊ฒฝ๋กœ(.pt). ๊ธฐ๋ณธ๊ฐ’: face_yolov8n.pt",
)
parser.add_argument(
"--device",
type=str,
default="cuda" if torch.cuda.is_available() else "cpu",
choices=["cpu", "cuda"],
help="์ถ”๋ก  ๋””๋ฐ”์ด์Šค ์„ ํƒ",
)
parser.add_argument(
"--conf",
type=float,
default=0.5,
help="์‹ ๋ขฐ๋„ ์ž„๊ณ„๊ฐ’ (0~1)",
)
parser.add_argument(
"--camera",
type=int,
default=0,
help="์›น์บ  ์ธ๋ฑ์Šค (๊ธฐ๋ณธ 0)",
)
parser.add_argument(
"--show-fps",
action="store_true",
help="ํ™”๋ฉด์— FPS ํ‘œ๊ธฐ",
)
return parser.parse_args()


class YOLOMediaPipeFaceDetector:
def __init__(self, yolo_model_path):
# YOLO ๋ชจ๋ธ ๋กœ๋”ฉ
self.yolo_model = YOLO(yolo_model_path)

# MediaPipe Face Mesh ์ดˆ๊ธฐํ™”
self.mp_face_mesh = mp.solutions.face_mesh
self.face_mesh = self.mp_face_mesh.FaceMesh(
static_image_mode=False,
max_num_faces=5,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
self.mp_drawing = mp.solutions.drawing_utils
self.mp_drawing_styles = mp.solutions.drawing_styles

def detect_faces_and_landmarks(self, frame, conf_threshold=0.5):
# YOLO๋กœ ์–ผ๊ตด ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค ๊ฒ€์ถœ
yolo_results = self.yolo_model.predict(
source=frame,
conf=conf_threshold,
verbose=False,
)

boxes = []
confs = []

if yolo_results and len(yolo_results) > 0:
r = yolo_results[0]
if r.boxes is not None and len(r.boxes) > 0:
xyxy = r.boxes.xyxy.cpu().numpy()
scores = r.boxes.conf.cpu().numpy()
boxes = xyxy.tolist()
confs = scores.tolist()

# MediaPipe๋กœ ์–ผ๊ตด ๋žœ๋“œ๋งˆํฌ ๊ฒ€์ถœ
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
mp_results = self.face_mesh.process(rgb_frame)

landmarks = []
if mp_results.multi_face_landmarks:
for face_landmarks in mp_results.multi_face_landmarks:
landmarks.append(face_landmarks)

return boxes, confs, landmarks

def draw_results(self, frame, boxes, confs, landmarks):
output = frame.copy()
h, w = frame.shape[:2]

# YOLO ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค ๊ทธ๋ฆฌ๊ธฐ
for (x1, y1, x2, y2), conf in zip(boxes, confs):
x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
cv2.rectangle(output, (x1, y1), (x2, y2), (0, 255, 0), 2)
label = f"face {conf:.2f}"
(tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(output, (x1, max(0, y1 - th - 6)), (x1 + tw + 6, y1), (0, 255, 0), -1)
cv2.putText(output, label, (x1 + 3, y1 - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)

# MediaPipe ๋žœ๋“œ๋งˆํฌ ๊ทธ๋ฆฌ๊ธฐ
for face_landmarks in landmarks:
# ์ฃผ์š” ๋žœ๋“œ๋งˆํฌ ์ธ๋ฑ์Šค (MediaPipe Face Mesh ๊ธฐ์ค€)
# ์ฝ”๋: 1, ์ขŒ์•ˆ: 33, ์šฐ์•ˆ: 362, ์ž… ์ค‘์•™: 13
key_points = {
'nose_tip': 1,
'left_eye': 33,
'right_eye': 362,
'mouth_center': 13,
'left_mouth': 61,
'right_mouth': 291
}

colors = {
'nose_tip': (0, 0, 255), # ๋นจ๊ฐ„์ƒ‰
'left_eye': (255, 0, 0), # ํŒŒ๋ž€์ƒ‰
'right_eye': (0, 255, 0), # ์ดˆ๋ก์ƒ‰
'mouth_center': (255, 255, 0), # ๋…ธ๋ž€์ƒ‰
'left_mouth': (255, 0, 255), # ์ž์ฃผ์ƒ‰
'right_mouth': (0, 255, 255) # ์ฒญ๋ก์ƒ‰
}

for name, idx in key_points.items():
if idx < len(face_landmarks.landmark):
landmark = face_landmarks.landmark[idx]
x = int(landmark.x * w)
y = int(landmark.y * h)

cv2.circle(output, (x, y), 3, colors[name], -1)

# ์ฝ” ์ขŒํ‘œ ํ…์ŠคํŠธ ํ‘œ์‹œ
if name == 'nose_tip':
cv2.putText(output, f"nose({x},{y})", (x + 5, y - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255), 1)

return output


def main():
args = parse_args()

model_path = Path(args.model)
if not model_path.exists():
print("[์˜ค๋ฅ˜] YOLO ๋ชจ๋ธ(.pt) ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค.")
print("- ํ˜„์žฌ ๊ฒฝ๋กœ:", Path.cwd())
print("- ์ฐพ์€ ๊ฒฝ๋กœ:", model_path.resolve())
return

print("YOLO + MediaPipe ์–ผ๊ตด ๊ฒ€์ถœ๊ธฐ ์ดˆ๊ธฐํ™” ์ค‘...")
detector = YOLOMediaPipeFaceDetector(str(model_path))

print("์›น์บ  ์˜คํ”ˆ ์ค‘... (์ข…๋ฃŒ: q)")
cap = cv2.VideoCapture(args.camera)
if not cap.isOpened():
print("[์˜ค๋ฅ˜] ์›น์บ ์„ ์—ด ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
return

prev_time = time.time()
try:
while True:
ok, frame = cap.read()
if not ok:
print("[์˜ค๋ฅ˜] ํ”„๋ ˆ์ž„์„ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
break

# ์–ผ๊ตด ๋ฐ ๋žœ๋“œ๋งˆํฌ ๊ฒ€์ถœ
boxes, confs, landmarks = detector.detect_faces_and_landmarks(frame, args.conf)

# ๊ฒฐ๊ณผ ๊ทธ๋ฆฌ๊ธฐ
drawn = detector.draw_results(frame, boxes, confs, landmarks)

# FPS ํ‘œ์‹œ
if args.show_fps:
now = time.time()
fps = 1.0 / max(1e-6, (now - prev_time))
prev_time = now
cv2.putText(drawn, f"FPS: {fps:.1f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

# ์–ผ๊ตด ์ˆ˜ ํ‘œ์‹œ
cv2.putText(drawn, f"faces: {len(boxes)}", (10, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
cv2.putText(drawn, f"landmarks: {len(landmarks)}", (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

cv2.imshow("YOLO + MediaPipe Face Detection", drawn)

if cv2.waitKey(1) & 0xFF == ord('q'):
break

finally:
cap.release()
cv2.destroyAllWindows()


if __name__ == "__main__":
main()
Binary file added yolov8n/yolov8n-face.pt
Binary file not shown.
Loading