-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudio_utils.py
157 lines (109 loc) · 4.57 KB
/
audio_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import tensorflow as tf
import numpy as np
import librosa
#this function takes a file path and extracts a short term fourier transform from the linked file
#it returns a 2 dimensional spectrogram, where each row is a frequency bin and each column is slice in time
def read_audio_spectrum(filename, N_FFT):
#this is a constant for FFT size, affecting the number of frequency bins for fourier transformation
wav, sampleRate = librosa.load(filename)
spectrogram = librosa.stft(wav, N_FFT)
phase = np.angle(spectrogram)
magnitude = np.abs(spectrogram)
logMagnitude = np.log1p(magnitude)
return logMagnitude, sampleRate
#this function takes the result of optimization, which is a fourier transformation
#in the frequency domain, undoes the log calculation taken by read_audio_spectrum, and
#extracts the raw audio and writes it to out.wav
def writeOutput(result, sampleRate, filename):
result = np.squeeze(result).T
expResult = np.exp(result) - 1
invertedOut = librosa.istft(expResult)
librosa.output.write_wav(filename, invertedOut, sampleRate)
def writeOutput2(result, sampleRate, filename, shape):
N_Bins, N_Timesteps = shape
a = np.zeros(shape)
a[:N_Bins,:] = np.exp(result[0,0].T) - 1
N_FFT = 2048
# This code is supposed to do phase reconstruction
p = 2 * np.pi * np.random.random_sample(a.shape) - np.pi
for i in range(500):
S = a * np.exp(1j*p)
x = librosa.istft(S)
p = np.angle(librosa.stft(x, N_FFT))
librosa.output.write_wav(filename, x, sampleRate)
def getSavedOutput(meta_file, checkpoint_directory):
print("in get saved output")
result = None
with tf.Session() as session:
print("in sess")
saver = tf.train.import_meta_graph(meta_file)
print("imported meta graph")
saver.restore(session, tf.train.latest_checkpoint(checkpoint_directory))
print("restored")
graph = tf.get_default_graph()
print("created graph")
output = graph.get_tensor_by_name("OUT:0")
print("got output tensor")
result = session.run(output)
print("got result")
return result
def writeListToFile(costs, file_path):
outFile = open(file_path, "w")
for cost in costs:
outFile.write(str(cost) + "\n")
outFile.close()
def writeParamsToFile(params, file_path):
outFile = open(file_path, "w")
for key,value in params.items():
outFile.write(key + " = " + str(value) + "\n")
outFile.close()
def loadParamFromTxt(file_path):
with open(file_path) as f:
for line in f:
value = float(line.strip())
return value
def writeSingleParamToTxt(value, file_path):
outFile = open(file_path, "w")
outFile.write(str(value))
outFile.close()
def loadList(file_name, convertToFloat = False):
l = None
with open(file_name) as f:
if convertToFloat:
l = [float(line.strip()) for line in f]
else:
l = [line.strip() for line in f]
return l
def loadSavedNumpyArrays(filenames):
arrays = []
for filename in filenames:
loaded = np.load(filename)
arrays.append(loaded)
return arrays
def shuffle(X, Y):
assert X.shape[0] == Y.shape[0]
N_rows = X.shape[0]
permutation = np.random.permutation(np.arange(N_rows))
shuffledX = X[permutation, :, :, :]
shuffledY = Y[permutation, :]
return shuffledX, shuffledY
## Taken and slightly modified from deeplearning.ai 'convolutional model: application' assignment
#Assumes X and Y already shuffled
def random_mini_batches(X, Y, mini_batch_size = 64):
m = X.shape[0] # number of training examples
mini_batches = []
# Partition (X, Y). Minus the end case.
num_complete_minibatches = int(np.floor(m/mini_batch_size))
# number of mini batches of size mini_batch_size in your partitioning
for k in range(0, num_complete_minibatches):
mini_batch_X = X[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:,:,:]
mini_batch_Y = Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = X[num_complete_minibatches * mini_batch_size : m,:,:,:]
mini_batch_Y = Y[num_complete_minibatches * mini_batch_size : m,:]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches