-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathspectogram.py
213 lines (172 loc) · 8.34 KB
/
spectogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#!/usr/bin/env python
#coding: utf-8
""" This work is licensed under a Creative Commons Attribution 3.0 Unported License.
Frank Zalkow, 2012-2013 """
from PIL import Image
import numpy as np
import scipy.io.wavfile as wav
import cv2
import os
from matplotlib import pyplot as plt
from numpy.lib import stride_tricks
from image_transform import ImageTransform
from scipy.spatial import distance
""" short time fourier transform of audio signal """
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
""" scale frequency axis logarithmically """
def logscale_spec(spec, sr=44100, factor=20.):
timebins, freqbins = np.shape(spec)
scale = np.linspace(0, 1, freqbins) ** factor
scale *= (freqbins-1)/max(scale)
scale = np.unique(np.round(scale))
# create spectrogram with new freq bins
newspec = np.complex128(np.zeros([timebins, len(scale)]))
for i in range(0, len(scale)):
if i == len(scale)-1:
newspec[:,i] = np.sum(spec[:,scale[i]:], axis=1)
else:
newspec[:,i] = np.sum(spec[:,scale[i]:scale[i+1]], axis=1)
# list center freq of bins
allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
freqs = []
for i in range(0, len(scale)):
if i == len(scale)-1:
freqs += [np.mean(allfreqs[scale[i]:])]
else:
freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])]
return newspec, freqs
""" plot spectrogram"""
def plotstft(audiopath, generatefig=True, binsize=2**10, plotpath=None, colormap="jet"): #colormap="jet"
samplerate, samples = wav.read(audiopath)
s = stft(samples, binsize)
audio_path_split = audiopath.split('/')
if len(audio_path_split)-1 > 0:
plotpath = audio_path_split[0] + "/" + audio_path_split[1] + "/graphs/" + audio_path_split[2] # dodaj folder graphs u putanju za cuvanje grafika
plotpath = plotpath.replace('.wav', '.png') # zameni ekstenziju fajla na .png
plotpath = audiopath.replace('.wav', '.png')
sshow, freq = logscale_spec(s, factor=80.0, sr=samplerate)
ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
timebins, freqbins = np.shape(ims)
fig = plt.figure(figsize=(8, 4.25))
plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none")
plt.colorbar()
plt.xlabel("Time [s]")
plt.ylabel("Frequency dB[Hz]")
plt.xlim([0, timebins-1])
plt.ylim([0, freqbins])
xlocs = np.float32(np.linspace(0, timebins-1, 10))
plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate])
ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 20)))
plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])
#plt.clf()
fig.canvas.draw() # bitno!!! formira model grafika tj samu matricu grafika, ali je ne prikazuje korisniku!
if not(generatefig):
plt.show()
""" -temp- deo samo za prikaz sta ce ici u obucavanje mreze... posle obrisati.. """
# odlicno radi...
img_data = ImageTransform.fig2data(fig)
img_data = ImageTransform.transform(img_data)
plt.imshow(img_data, 'gray')
plt.figure()
img_data = ImageTransform.image_bin(img_data)
img_data = ImageTransform.invert(img_data)
img_data = ImageTransform.remove_noise(img_data) # zatvaranje 1.dilate 2.erode
img_data = ImageTransform.resize_graph(img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije
cv2.imwrite("test.png", img_data)
plt.imshow(img_data, 'gray')
plt.show()
else:
img_data = prepare_fig_to_img(fig) #za formiranje grafika u data-set-u ... TODO: napraviti zasebnu fun..
cv2.imwrite(plotpath, img_data)
#plt.close(fig) # sprecava memory leak - curenje memorije
return fig # vrati matlabov plot obj(numpy array)
def create_data_set_graphs():
"""
@brief
Funkcija koja ucitava sa standardnih direktorijuma data seta samples/ ASC,DESC,FLAT
ucitane .wav datoteke pretvara u png grafike koji se nalaze u samples/graphs/ ASC,DESC,FLAT
"""
print "\nCreating graphs from Data-Set samples..."
for asc_file in os.listdir("samples/ASC"):
if asc_file.endswith(".wav"):
fig = plotstft("samples/ASC/" + asc_file, generatefig=True)
asc_file = asc_file.replace('.wav', '.png')
img_data = prepare_fig_to_img(fig) #za formiranje grafika u data-set-u ...
cv2.imwrite("samples/ASC/graphs/" + asc_file, img_data)
for desc_file in os.listdir("samples/DESC"):
if desc_file.endswith(".wav"):
fig = plotstft("samples/DESC/" + desc_file, generatefig=True)
desc_file = desc_file.replace('.wav', '.png')
img_data = prepare_fig_to_img(fig) #za formiranje grafika u data-set-u ...
cv2.imwrite("samples/DESC/graphs/" + desc_file, img_data)
for flat_file in os.listdir("samples/FLAT"):
if flat_file.endswith(".wav"):
fig = plotstft("samples/FLAT/" + flat_file, generatefig=True)
flat_file = flat_file.replace('.wav', '.png')
img_data = prepare_fig_to_img(fig) #za formiranje grafika u data-set-u ...
cv2.imwrite("samples/FLAT/graphs/" + flat_file, img_data)
for soy_file in os.listdir("samples/SOY"):
if soy_file.endswith(".wav"):
fig = plotstft("samples/SOY/" + soy_file, generatefig=True)
soy_file = soy_file.replace('.wav', '.png')
img_data = prepare_fig_to_img(fig) #za formiranje grafika u data-set-u ...
cv2.imwrite("samples/SOY/graphs/" + soy_file, img_data)
print "Graphs successfully created! \n"
def prepare_fig_to_img(graph_fig):
"""
@brief
Ulaz: matlabov grafik objekat
Matlabova figura postaje slika, nad slikom se vrsi
1. crop-ovanje
2. grayscale
3. binarizacija
4. uklanjanje suma
5. resize
Izlaz: slika spremna za obucavanje mreze (numpy matrica)
"""
img_data = ImageTransform.fig2data(graph_fig)
img_data = ImageTransform.transform(img_data)
img_data = ImageTransform.image_bin(img_data)
img_data = ImageTransform.invert(img_data)
img_data = ImageTransform.remove_noise(img_data) # zatvaranje 1.dilate 2.erode
img_data = ImageTransform.resize_graph(img_data, 70, 33) #org 350x165, 350%5=70, 165%5=33, odrzane proporcije
return img_data
def load_data_set_graphs():
"""
@brief
Funkcija koja ucitava sa standardnih direktorijuma data seta samples/graphs/ ASC,DESC,FLAT
ucitane .png datoteke pretvara numpy matrice spremne za dalji rad
Izlaz: 4 matrice: ASC 1xn matrica img objekta , DESC 1xn matrica img objekta , FLAT 1xn matrica img objekta, SOY 1xn matrica img objekta
"""
asc_graphs_array = []
desc_graphs_array = []
flat_graphs_array = []
soy_graphs_array = []
for asc_file in os.listdir("samples/ASC/graphs/"):
if asc_file.endswith(".png"):
img_data = cv2.imread("samples/ASC/graphs/" + asc_file, 0)
asc_graphs_array.append(img_data)
for desc_file in os.listdir("samples/DESC/graphs/"):
if desc_file.endswith(".png"):
img_data = cv2.imread("samples/DESC/graphs/" + desc_file, 0)
desc_graphs_array.append(img_data)
for flat_file in os.listdir("samples/FLAT/graphs/"):
if flat_file.endswith(".png"):
img_data = cv2.imread("samples/FLAT/graphs/" + flat_file, 0)
flat_graphs_array.append(img_data)
for soy_file in os.listdir("samples/SOY/graphs/"):
if soy_file.endswith(".png"):
img_data = cv2.imread("samples/SOY/graphs/" + soy_file, 0)
soy_graphs_array.append(img_data)
return asc_graphs_array, desc_graphs_array, flat_graphs_array, soy_graphs_array