-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathobject_classifier_torch.py
114 lines (92 loc) · 3.26 KB
/
object_classifier_torch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
__author__ = 'Mr.Bemani'
import cv2 as cv
import numpy as np
from PIL import Image
import torch
import torchvision.transforms as transforms
device = "cuda" if torch.cuda.is_available() else "cpu"
print ("device: " + device)
input_size = (224, 224)
# Data transformations
transform = transforms.Compose([
transforms.Resize(input_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# Load the RKNN model
def load_rknn_model(model_path):
rknn = torch.load(model_path)
print (rknn)
#rknn.to(device)
# add release method to rknn object
def release():
print ("fake model.release() for compatibility")
if not hasattr(rknn, 'release'):
rknn.release = release
return rknn
# padd image to square
def pad_image(image: np.ndarray) -> np.ndarray:
if image is None:
raise ValueError('The input image is None.')
if len(image.shape) != 3:
raise ValueError('The input image must be a 3-channel image.')
if image.shape[2] != 3:
raise ValueError('The input image must be a 3-channel image.')
height, width = image.shape[0], image.shape[1]
if height == width:
return image
elif height > width:
pad = (height - width) // 2
image = cv.copyMakeBorder(image, 0, 0, pad, pad, cv.BORDER_CONSTANT, value=(0, 0, 0))
return image
else:
pad = (width - height) // 2
image = cv.copyMakeBorder(image, pad, pad, 0, 0, cv.BORDER_CONSTANT, value=(0, 0, 0))
return image
# Preprocess the input image
def preprocess_image(image: np.ndarray, input_size: tuple = (224, 224)) -> torch.Tensor:
if image is None:
raise ValueError('The input image is None.')
if len(image.shape) != 3:
raise ValueError('The input image must be a 3-channel image.')
h, w, c = image.shape
if c != 3:
raise ValueError('The input image must be a 3-channel image.')
sqr_image = None
if h / w < 0.9 or h / w > 1.1:
sqr_image = pad_image(image)
else:
sqr_image = image
if sqr_image.shape[0] != input_size[1] or sqr_image.shape[1] != input_size[0]:
sqr_image = cv.resize(sqr_image, input_size) # Resize the image according to your model input size
rgb_sqr_image = cv.cvtColor(sqr_image, cv.COLOR_BGR2RGB)
pil_image = Image.fromarray(rgb_sqr_image)
torch_image = transform(pil_image)
return torch_image
# Postprocess the output results
def postprocess(output):
# Implement your custom postprocessing logic here
# softmax
output = np.exp(output) / np.sum(np.exp(output))
return output
# Run inference
def run_inference(rknn_model, image, input_size=(224, 224)):
torch_image = preprocess_image(image, input_size)
images = torch_image.unsqueeze(0)#.to(device)
# evaluate model
rknn_model.eval()
with torch.no_grad():
outputs = rknn_model(images).cpu().numpy()
return postprocess(outputs)
if __name__ == '__main__':
import sys
# Load the model
rknn_model = load_rknn_model(sys.argv[1])
# Read the image
image = cv.imread(sys.argv[2])
# Run inference
outputs = run_inference(rknn_model, image)
print (outputs)
# Release the model
rknn_model.release()