-
-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathcreateTextFromAudioFile.ts
74 lines (65 loc) · 2.38 KB
/
createTextFromAudioFile.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import * as wav from "wav";
import { Readable } from "stream";
import { createReadStream } from "fs";
import vosk, { RecognitionResults } from "vosk";
import { Ora } from "ora";
import { loadModel } from "./loadModel";
import { stat } from "fs/promises";
import { join } from "path";
import { __dirname, workingDir } from "./util";
export const createTextFromAudioFile = (
spinner: Ora,
fileName: string,
modelName: string
): Promise<RecognitionResults[]> =>
new Promise(async (resolve, reject) => {
const model = await loadModel(join(workingDir, "models", modelName));
spinner.text = "Listening...";
if (!fileName) {
throw new Error("Source audio file name is not provided.");
}
const fileStats = await stat(fileName);
const totalSize = fileStats.size;
let bytesRead = 0;
let lastLoggedPercentage = 0;
const updateProgressBar = (currentSize: number) => {
const percentage = Math.round((currentSize / totalSize) * 100);
const totalResults = results.length;
const preview = results.map((result) => result.text).join(" ");
if (percentage !== lastLoggedPercentage) {
spinner.text = `Listening... (${percentage}%).\nHeard so far:\n\n\t${totalResults > 1 ? `[...] ${preview.slice(-360)} [...]` : preview}`;
lastLoggedPercentage = percentage;
}
};
const wfReader = new wav.Reader();
const wfReadable = new Readable().wrap(wfReader);
let results: RecognitionResults[] = [];
wfReader.on("format", async (format: AudioFormat) => {
if (format.audioFormat !== 1 || format.channels !== 1) {
throw new Error("Audio file must be WAV format mono PCM.");
}
spinner.text = "Creating recognizer...";
const recognizer = new vosk.Recognizer({
model,
sampleRate: format.sampleRate,
});
recognizer.setWords(true);
spinner.text = "Listening. Heard so far: ";
for await (const data of wfReadable) {
bytesRead += data.length;
updateProgressBar(bytesRead);
const endOfSpeech = recognizer.acceptWaveform(data);
if (endOfSpeech) {
const result = recognizer.result();
results.push(result);
}
}
spinner.clear();
recognizer.free();
model.free();
resolve(results);
});
createReadStream(fileName, { highWaterMark: 4096 })
.pipe(wfReader)
.on("error", reject);
});