Skip to content

Commit d87ce7b

Browse files
neurlangYour Name
authored and
Your Name
committed
add Image
1 parent d8178cd commit d87ce7b

File tree

2 files changed

+47
-15
lines changed

2 files changed

+47
-15
lines changed

mel/impl.go

+42-13
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,36 @@ import "github.com/faiface/beep/wav"
99
import "github.com/mewkiz/flac"
1010
import "math"
1111

12+
func dumpbuffer(buf [][2]float64, mels int) (out []uint16) {
13+
stride := len(buf) / mels
14+
15+
var mgc_max, mgc_min = [2]float64{(-99999999.), (-99999999.)}, [2]float64{(9999999.), (9999999.)}
16+
17+
for x := 0; x < stride; x++ {
18+
for l := 0; l < 2; l++ {
19+
for y := 0; y < mels; y++ {
20+
var w = buf[stride*y+x][l]
21+
if w > mgc_max[l] {
22+
mgc_max[l] = w
23+
}
24+
if w < mgc_min[l] {
25+
mgc_min[l] = w
26+
}
27+
}
28+
}
29+
}
30+
for y := 0; y < mels; y++ {
31+
for x := 0; x < stride; x++ {
32+
val0 := (buf[stride*y+x][0] - mgc_min[0]) / (mgc_max[0] - mgc_min[0])
33+
val1 := (buf[stride*y+x][1] - mgc_min[1]) / (mgc_max[1] - mgc_min[1])
34+
val := uint16(int(255*val0)) | uint16(int(255*val1))<<8
35+
36+
out = append(out, val)
37+
}
38+
}
39+
return
40+
}
41+
1242
func dumpimage(name string, buf [][2]float64, mels int, reverse bool) error {
1343
f, err := os.Create(name)
1444
if err != nil {
@@ -19,19 +49,20 @@ func dumpimage(name string, buf [][2]float64, mels int, reverse bool) error {
1949

2050
img := image.NewRGBA(image.Rect(0, 0, stride, mels))
2151

22-
var mgc_max, mgc_min = [2]float64{(-99999999.),(-99999999.)}, [2]float64{(9999999.),(9999999.)}
52+
var mgc_max, mgc_min = [2]float64{(-99999999.), (-99999999.)}, [2]float64{(9999999.), (9999999.)}
2353

2454
for x := 0; x < stride; x++ {
2555
for l := 0; l < 2; l++ {
26-
for y := 0; y < mels; y++ {
27-
var w = buf[stride*y+x][l]
28-
if w > mgc_max[l] {
29-
mgc_max[l] = w
30-
}
31-
if w < mgc_min[l] {
32-
mgc_min[l] = w
56+
for y := 0; y < mels; y++ {
57+
var w = buf[stride*y+x][l]
58+
if w > mgc_max[l] {
59+
mgc_max[l] = w
60+
}
61+
if w < mgc_min[l] {
62+
mgc_min[l] = w
63+
}
3364
}
34-
}}
65+
}
3566
}
3667
for x := 0; x < stride; x++ {
3768
for y := 0; y < mels; y++ {
@@ -40,7 +71,7 @@ func dumpimage(name string, buf [][2]float64, mels int, reverse bool) error {
4071
val1 := (buf[stride*y+x][1] - mgc_min[1]) / (mgc_max[1] - mgc_min[1])
4172
col.R = uint8(int(255 * val0))
4273
col.G = uint8(int(255 * val1))
43-
col.B = uint8(int(255 * (val0+val1)*0.5))
74+
col.B = uint8(int(255 * (val0 + val1) * 0.5))
4475
col.A = uint8(255)
4576
if reverse {
4677
img.SetRGBA(x, mels-y-1, col)
@@ -150,7 +181,7 @@ func domel(filtersize, mels int, spectrum [][2]float64, mel_fmin, mel_fmax float
150181
inlo, modlo, inhi = 0, 0, 0
151182
}
152183
var tot [2]float64
153-
for l := 0; l< 2; l++ {
184+
for l := 0; l < 2; l++ {
154185

155186
var total float64
156187

@@ -210,5 +241,3 @@ func pad(buf []float64, filter int) []float64 {
210241
}
211242
return buf
212243
}
213-
214-

mel/mel.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ type Mel struct {
1111
MelFmax float64
1212
TuneMul float64
1313
TuneAdd float64
14-
Window int
14+
Window int
1515
Resolut int
1616
YReverse bool
1717
}
@@ -34,7 +34,6 @@ var ErrFileNotLoaded = errors.New("wavNotLoaded")
3434
// ToMel generates a mel spectrogram from a wave buffer and returns the mel buffer.
3535
func (m *Mel) ToMel(buf []float64) ([][2]float64, error) {
3636

37-
3837
buf = pad(buf, m.Window)
3938

4039
stft := stft.New(m.Window, m.Resolut)
@@ -78,6 +77,10 @@ func LoadWav(inputFile string) []float64 {
7877
return loadwav(inputFile)
7978
}
8079

80+
func (m *Mel) Image(buf [][2]float64) []uint16 {
81+
return dumpbuffer(buf, m.NumMels)
82+
}
83+
8184
// ToMel generates a mel spectrogram from an input FLAC audio file and saves it as a PNG image.
8285
func (m *Mel) ToMelFlac(inputFile, outputFile string) error {
8386

0 commit comments

Comments
 (0)