-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlayers.py
41 lines (32 loc) · 1.3 KB
/
layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import torch
from torch.autograd import Variable
from torch import nn
class Standout(nn.Module):
def __init__(self, last_layer, alpha, beta):
print("<<<<<<<<< THIS IS DEFINETLY A STANDOUT TRAINING >>>>>>>>>>>>>>>")
super(Standout, self).__init__()
self.pi = last_layer.weight
self.alpha = alpha
self.beta = beta
self.nonlinearity = nn.Sigmoid()
def forward(self, previous, current, p=0.5, deterministic=False):
# Function as in page 3 of paper: Variational Dropout
self.p = self.nonlinearity(self.alpha * previous.matmul(self.pi.t()) + self.beta)
self.mask = sample_mask(self.p)
# Deterministic version as in the paper
if(deterministic or torch.mean(self.p).data.cpu().numpy()==0):
return self.p * current
else:
return self.mask * current
def sample_mask(p):
"""Given a matrix of probabilities, this will sample a mask in PyTorch."""
if torch.cuda.is_available():
uniform = Variable(torch.Tensor(p.size()).uniform_(0, 1).cuda())
else:
uniform = Variable(torch.Tensor(p.size()).uniform_(0, 1))
mask = uniform < p
if torch.cuda.is_available():
mask = mask.type(torch.cuda.FloatTensor)
else:
mask = mask.type(torch.FloatTensor)
return mask