forked from pelegk11/Composer
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodels.py
114 lines (91 loc) · 3.47 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
The models used for music generation.
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import randn
import params
def vae_sampling(args):
z_mean, z_log_sigma_sq, vae_b1 = args
epsilon = randn(z_mean.size(), dtype=z_mean.dtype, device=z_mean.device) * vae_b1
return z_mean + torch.exp(z_log_sigma_sq * 0.5) * epsilon
class AutoencoderModel(nn.Module):
def __init__(self, input_shape, latent_space_size, dropout_rate, max_windows, batchnorm_momentum, use_vae=False, vae_b1=0.02, use_embedding=False, embedding_input_shape=None, embedding_shape=None):
super(AutoencoderModel, self).__init__()
"""
Create larger autoencoder with the options of making it variational and embedding.
:param input_shape:
:param latent_space_size:
:param dropout_rate:
:param max_windows:
:param batchnorm_momentum:
:param use_vae:
:param vae_b1:
:param use_embedding:
:param embedding_input_shape:
:param embedding_shape:
:return:
"""
if use_embedding:
x_in = Input(shape=embedding_input_shape)
print((None,) + embedding_input_shape)
x = Embedding(embedding_shape, latent_space_size, input_length=1)(x_in)
x = Flatten(name='encoder')(x)
else:
x_in = Input(shape=input_shape)
print((None,) + input_shape)
x = Reshape((input_shape[0], -1))(x_in)
print(K.int_shape(x))
if params.noise_rate > 0:
x = Lambda(lambda x: 1 - x)(x)
x = Dropout(params.noise_rate)(x)
x = Lambda(lambda x: 1 - x)(x)
print(K.int_shape(x))
x = TimeDistributed(Dense(2000, activation='relu'))(x)
print(K.int_shape(x))
x = TimeDistributed(Dense(200, activation='relu'))(x)
print(K.int_shape(x))
x = Flatten()(x)
print(K.int_shape(x))
x = Dense(1600, activation='relu')(x)
print(K.int_shape(x))
if use_vae:
z_mean = Dense(latent_space_size)(x)
z_log_sigma_sq = Dense(latent_space_size)(x)
x = Lambda(vae_sampling, output_shape=(latent_space_size,), name='encoder')([z_mean, z_log_sigma_sq, vae_b1])
else:
x = Dense(latent_space_size)(x)
x = BatchNormalization(momentum=batchnorm_momentum, name='encoder')(x)
print(K.int_shape(x))
# LATENT SPACE
x = Dense(1600, name='decoder')(x)
x = BatchNormalization(momentum=batchnorm_momentum)(x)
x = Activation('relu')(x)
if dropout_rate > 0:
x = Dropout(dropout_rate)(x)
print(K.int_shape(x))
x = Dense(max_windows * 200)(x)
print(K.int_shape(x))
x = Reshape((max_windows, 200))(x)
x = TimeDistributed(BatchNormalization(momentum=batchnorm_momentum))(x)
x = Activation('relu')(x)
if dropout_rate > 0:
x = Dropout(dropout_rate)(x)
print(K.int_shape(x))
x = TimeDistributed(Dense(2000))(x)
x = TimeDistributed(BatchNormalization(momentum=batchnorm_momentum))(x)
x = Activation('relu')(x)
if dropout_rate > 0:
x = Dropout(dropout_rate)(x)
print(K.int_shape(x))
#if params.encode_volume:
#x = TimeDistributed(Dense(input_shape[1] * input_shape[2]))(x)
#else:
x = TimeDistributed(Dense(input_shape[1] * input_shape[2], activation='sigmoid'))(x)
print(K.int_shape(x))
x = Reshape((input_shape[0], input_shape[1], input_shape[2]))(x)
print(K.int_shape(x))
return Model(x_in, x)