-
Notifications
You must be signed in to change notification settings - Fork 0
/
prepare-piper-vtt.js
42 lines (33 loc) · 1.26 KB
/
prepare-piper-vtt.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import { execSync } from 'child_process';
import { readFileSync, writeFileSync } from 'fs';
import pkg from 'webvtt-parser';
const { WebVTTParser } = pkg;
const videos = ['file1', 'file2'];
let counter = 0;
let output = '';
for (let video of videos) {
processVideo(`videos/${video}.mp4`, `videos/${video}.en.vtt`, 'dataset2');
}
writeFileSync(`dataset2/metadata.csv`, output);
function processVideo(videoPath, vttPath, outputPath) {
const audioPath = `${outputPath}/extracted_audio-${counter}.wav`;
videoToWav(videoPath, audioPath);
const vttData = readFileSync(vttPath, 'utf-8');
const parser = new WebVTTParser();
const tree = parser.parse(vttData, 'metadata');
for (let cue of tree.cues) {
sliceAudio(audioPath, cue.startTime, cue.endTime, `${outputPath}/wavs/${counter}.wav`);
let txt = cue.text.replace(/\n+/g, ' ');
txt = txt.replace(/(\ )+\s*/g, ' ');
output += `wavs/${counter}.wav|${txt}\n`;
counter++;
}
}
function videoToWav(videoPath, audioPath) {
const cmd = `ffmpeg -i "${videoPath}" -acodec pcm_s16le -ar 22050 -ac 1 "${audioPath}"`;
execSync(cmd);
}
function sliceAudio(audioPath, start, end, segmentPath) {
const cmd = `ffmpeg -i "${audioPath}" -ss ${start} -to ${end} -c copy "${segmentPath}"`;
execSync(cmd);
}