-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideo_capture_pytorch.py
68 lines (57 loc) · 1.96 KB
/
video_capture_pytorch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import cv2
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as tt
from efficientnet_pytorch import EfficientNet
face_classifier = cv2.CascadeClassifier("./haarcascades/haarcascade_face.xml")
model_state = torch.load("./pytorch_model_states/base_efficient_net_state.pth")
class_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
model = EfficientNet.from_pretrained('efficientnet-b0')
model.load_state_dict(model_state)
cap = cv2.VideoCapture(0)
while True:
# Grab a single frame of video
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
labels = []
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_classifier.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
roi_gray = gray[y : y + h, x : x + w]
roi_gray = cv2.resize(roi_gray, (48, 48), interpolation=cv2.INTER_AREA)
if np.sum([roi_gray]) != 0:
roi = tt.functional.to_pil_image(roi_gray)
roi = tt.functional.to_grayscale(roi)
roi = tt.ToTensor()(roi).unsqueeze(0)
# make a prediction on the ROI
tensor = model(roi)
pred = torch.max(tensor, dim=1)[1].tolist()
label = class_labels[pred[0]]
label_position = (x, y)
cv2.putText(
frame,
label,
label_position,
cv2.FONT_HERSHEY_COMPLEX,
2,
(0, 255, 0),
3,
)
else:
cv2.putText(
frame,
"No Face Found",
(20, 60),
cv2.FONT_HERSHEY_COMPLEX,
2,
(0, 255, 0),
3,
)
cv2.imshow("Emotion Detector", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release()
cv2.destroyAllWindows()