diff --git a/models.py b/models.py index 4837cd8b8..a02b246e6 100644 --- a/models.py +++ b/models.py @@ -91,14 +91,12 @@ class DecoderWithAttention(nn.Module): Decoder. """ - def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, decoder_layers=1, encoder_dim=2048, - dropout=0.5): + def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5): """ :param attention_dim: size of attention network :param embed_dim: embedding size :param decoder_dim: size of decoder's RNN :param vocab_size: size of vocabulary - :param decoder_layers: number of layers in the decoder :param encoder_dim: feature size of encoded images :param dropout: dropout """ @@ -109,14 +107,13 @@ def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, decoder_la self.embed_dim = embed_dim self.decoder_dim = decoder_dim self.vocab_size = vocab_size - self.decoder_layers = decoder_layers self.dropout = dropout self.attention = Attention(encoder_dim, decoder_dim, attention_dim) # attention network self.embedding = nn.Embedding(vocab_size, embed_dim) # embedding layer self.dropout = nn.Dropout(p=self.dropout) - self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, decoder_layers) # decoding LSTMCell + self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) # decoding LSTMCell self.init_h = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial hidden state of LSTMCell self.init_c = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial cell state of LSTMCell self.f_beta = nn.Linear(decoder_dim, encoder_dim) # linear layer to create a sigmoid-activated gate diff --git a/train.py b/train.py index cafaa75c5..e6e2425d9 100644 --- a/train.py +++ b/train.py @@ -18,7 +18,6 @@ emb_dim = 512 # dimension of word embeddings attention_dim = 512 # dimension of attention linear layers decoder_dim = 512 # dimension of decoder RNN -decoder_layers = 1 # number of layers in decoder RNN dropout = 0.5 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # sets device for model and PyTorch tensors cudnn.benchmark = True # set to true only if inputs to model are fixed size; otherwise lot of computational overhead @@ -57,7 +56,6 @@ def main(): embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), - decoder_layers=decoder_layers, dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr)