-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpatialNetVLAD.py
108 lines (77 loc) · 3.23 KB
/
SpatialNetVLAD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from tensorflow.keras.layers import Layer, Conv2D
import tensorflow.keras.backend as K
import tensorflow as tf
class SpatialNetVLAD(Layer):
def __init__(self, pool_list=[3], num_clusters=64, **kwargs):
self.pool_list = pool_list
self.K = num_clusters
super(SpatialNetVLAD, self).__init__(**kwargs)
def build(self, input_shape):
self.D = input_shape[3] # input_channels
self.C = self.add_weight(
name='cluster_centers',
shape=(1,1,1,self.D,self.K),
initializer='zeros',
dtype='float32',
trainable=True
)
self.conv = Conv2D(
filters = self.K,
kernel_size=1,
strides = (1,1),
use_bias=False,
padding = 'valid',
kernel_initializer='zeros'
)
self.conv.build(input_shape)
super(SpatialNetVLAD, self).build(input_shape)
def call(self, x, mask=None):
input_shape = K.shape(x)
num_rows = input_shape[1]
num_cols = input_shape[2]
row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]
outputs = []
i = 0
for pool_num, num_pool_regions in enumerate(self.pool_list):
for jy in range(num_pool_regions):
for ix in range(num_pool_regions):
x1 = ix * col_length[pool_num]
x2 = ix * col_length[pool_num] + col_length[pool_num]
y1 = jy * row_length[pool_num]
y2 = jy * row_length[pool_num] + row_length[pool_num]
x1 = K.cast(K.round(x1), 'int32')
x2 = K.cast(K.round(x2), 'int32')
y1 = K.cast(K.round(y1), 'int32')
y2 = K.cast(K.round(y2), 'int32')
new_shape = [input_shape[0], y2 - y1,
x2 - x1, input_shape[3]]
x_crop = x[:, y1:y2, x1:x2, :]
xm = K.reshape(x_crop, new_shape)
vlad = self.encode_vlad(xm, i)
outputs.append(vlad)
i += 1
outputs = K.concatenate(outputs, axis=1)
return outputs
def encode_vlad(self, inputs, i):
s = self.conv(inputs)
a = tf.nn.softmax(s)
a = tf.expand_dims(a,-2)
v = tf.expand_dims(inputs,-1)-self.C
v = a*v
v = tf.reduce_sum(v,axis=[1,2])
v = tf.transpose(v,perm=[0,2,1])
v = self.matconvnetNormalize(v, 1e-12)
v = tf.transpose(v, perm=[0, 2, 1])
v = self.matconvnetNormalize(tf.compat.v1.layers.flatten(v), 1e-12)
return v
def matconvnetNormalize(self,inputs, epsilon):
return inputs / tf.sqrt(tf.compat.v1.reduce_sum(inputs ** 2, axis=-1, keep_dims=True)
+ epsilon)
def get_config(self):
config = super().get_config().copy()
config.update({
'pool_list': self.pool_list,
'num_clusters': self.K
})
return config