forked from Tandon-A/emotic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinference.py
135 lines (109 loc) · 6.33 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import cv2
import numpy as np
import os
import torch
from torchvision import transforms
from emotic import Emotic
def process_images(context_norm, body_norm, image_context_path=None, image_context=None, image_body=None, bbox=None):
''' Prepare context and body image.
:param context_norm: List containing mean and std values for context images.
:param body_norm: List containing mean and std values for body images.
:param image_context_path: Path of the context image.
:param image_context: Numpy array of the context image.
:param image_body: Numpy array of the body image.
:param bbox: List to specify the bounding box to generate the body image. bbox = [x1, y1, x2, y2].
:return: Transformed image_context tensor and image_body tensor.
'''
if image_context is None and image_context_path is None:
raise ValueError('both image_context and image_context_path cannot be none. Please specify one of the two.')
if image_body is None and bbox is None:
raise ValueError('both body image and bounding box cannot be none. Please specify one of the two')
if image_context_path is not None:
image_context = cv2.cvtColor(cv2.imread(image_context_path), cv2.COLOR_BGR2RGB)
if bbox is not None:
image_body = image_context[bbox[1]:bbox[3],bbox[0]:bbox[2]].copy()
image_context = cv2.resize(image_context, (224,224))
image_body = cv2.resize(image_body, (128,128))
test_transform = transforms.Compose([transforms.ToPILImage(),transforms.ToTensor()])
context_norm = transforms.Normalize(context_norm[0], context_norm[1])
body_norm = transforms.Normalize(body_norm[0], body_norm[1])
image_context = context_norm(test_transform(image_context)).unsqueeze(0)
image_body = body_norm(test_transform(image_body)).unsqueeze(0)
return image_context, image_body
def infer(context_norm, body_norm, ind2cat, ind2vad, device, thresholds, models, image_context_path=None, image_context=None, image_body=None, bbox=None, to_print=True):
''' Perform inference over an image.
:param context_norm: List containing mean and std values for context images.
:param body_norm: List containing mean and std values for body images.
:param ind2cat: Dictionary converting integer index to categorical emotion.
:param ind2vad: Dictionary converting integer index to continuous emotion dimension (Valence, Arousal and Dominance).
:param device: Torch device. Used to send tensors to GPU if available.
:param image_context_path: Path of the context image.
:param image_context: Numpy array of the context image.
:param image_body: Numpy array of the body image.
:param bbox: List to specify the bounding box to generate the body image. bbox = [x1, y1, x2, y2].
:param to_print: Variable to display inference results.
:return: Categorical Emotions list and continuous emotion dimensions numpy array.
'''
image_context, image_body = process_images(context_norm, body_norm, image_context_path=image_context_path, image_context=image_context, image_body=image_body, bbox=bbox)
model_context, model_body, emotic_model = models
with torch.no_grad():
image_context = image_context.to(device)
image_body = image_body.to(device)
pred_context = model_context(image_context)
pred_body = model_body(image_body)
pred_cat, pred_cont = emotic_model(pred_context, pred_body)
pred_cat = pred_cat.squeeze(0)
pred_cont = pred_cont.squeeze(0).to("cpu").data.numpy()
bool_cat_pred = torch.gt(pred_cat, thresholds)
cat_emotions = list()
for i in range(len(bool_cat_pred)):
if bool_cat_pred[i] == True:
cat_emotions.append(ind2cat[i])
if to_print == True:
print ('\n Image predictions')
print ('Continuous Dimnesions Predictions')
for i in range(len(pred_cont)):
print ('Continuous %10s %.5f' %(ind2vad[i], 10*pred_cont[i]))
print ('Categorical Emotion Predictions')
for emotion in cat_emotions:
print ('Categorical %16s' %(emotion))
return cat_emotions, 10*pred_cont
def inference_emotic(images_list, model_path, result_path, context_norm, body_norm, ind2cat, ind2vad, args):
''' Infer on list of images defined in a text file. Save the results in inference_file.txt in the directory specified by the result_path.
:param images_list: Text file specifying the images and their bounding box values to conduct inference. A row in the file is Path_of_image x1 y1 x2 y2.
:param model_path: Directory path to load models and val_thresholds to perform inference.
:param result_path: Directory path to save the results (text file containig categorical emotion and continuous emotion dimension prediction per image).
:param context_norm: List containing mean and std values for context images.
:param body_norm: List containing mean and std values for body images.
:param ind2cat: Dictionary converting integer index to categorical emotion.
:param ind2vad: Dictionary converting integer index to continuous emotion dimension (Valence, Arousal and Dominance).
:param args: Runtime arguments.
'''
with open(images_list, 'r') as f:
lines = f.readlines()
device = torch.device("cuda:%s" %(str(args.gpu)) if torch.cuda.is_available() else "cpu")
thresholds = torch.FloatTensor(np.load(os.path.join(result_path, 'val_thresholds.npy'))).to(device)
model_context = torch.load(os.path.join(model_path,'model_context1.pth')).to(device)
model_body = torch.load(os.path.join(model_path,'model_body1.pth')).to(device)
emotic_model = torch.load(os.path.join(model_path,'model_emotic1.pth')).to(device)
model_context.eval()
model_body.eval()
emotic_model.eval()
models = [model_context, model_body, emotic_model]
result_file = os.path.join(result_path, 'inference_list.txt')
with open(result_file, 'w') as f:
pass
for idx, line in enumerate(lines):
image_context_path, x1, y1, x2, y2 = line.split('\n')[0].split(' ')
bbox = [int(x1), int(y1), int(x2), int(y2)]
pred_cat, pred_cont = infer(context_norm, body_norm, ind2cat, ind2vad, device, thresholds, models, image_context_path=image_context_path, bbox=bbox)
write_line = list()
write_line.append(image_context_path)
for emotion in pred_cat:
write_line.append(emotion)
for continuous in pred_cont:
write_line.append(str('%.4f' %(continuous)))
write_line = ' '.join(write_line)
with open(result_file, 'a') as f:
f.writelines(write_line)
f.writelines('\n')