-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideo-to-text.js
122 lines (99 loc) · 4.5 KB
/
video-to-text.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
const fs = require('fs');
const path = require('path');
const OpenAI = require('openai'); // Make sure to initialize OpenAI client with your API key
const ytdl = require('@distube/ytdl-core');
const sanitizeFilename = require('sanitize-filename');
require('dotenv').config();
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
// Ensure the directory exists, if not create it
function ensureDirectoryExistence(dirPath) {
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, { recursive: true });
}
}
// Function to sanitize and ensure valid filenames
function generateValidFilename(title, extension) {
const sanitizedTitle = sanitizeFilename(title).replace(/\s+/g, '_');
return `${sanitizedTitle}.${extension}`;
}
// Download and convert audio to MP3
async function downloadAndConvertAudio(videoUrl, downloadPath) {
const video_identifier = `[${videoUrl.replace("https://www.youtube.com/watch?v=", "")}] `;
try {
console.log(video_identifier + "Searching video information...");
const info = await ytdl.getInfo(videoUrl);
if (info.videoDetails) {
const originalTitle = info.videoDetails.title;
const filename = generateValidFilename(originalTitle, 'mp3');
const finalFilePath = path.join(downloadPath, filename);
console.log(video_identifier + "Downloading video audio...");
const audioStream = await ytdl(videoUrl, { quality: 'highestaudio', filter: 'audioonly' });
// Ensure the download directory exists
ensureDirectoryExistence(downloadPath);
console.info(video_identifier + "Writing audio file as '" + finalFilePath + "'..");
const wstream = fs.createWriteStream(finalFilePath);
audioStream.pipe(wstream);
return new Promise((resolve, reject) => {
wstream.on("finish", () => {
console.log(video_identifier + "Download and conversion to MP3 finished!");
resolve(finalFilePath);
});
wstream.on("error", (err) => {
console.error(video_identifier + "Error writing audio stream:", err);
reject(err);
});
});
} else {
throw new Error(videoUrl + " no video details found.");
}
} catch (error) {
console.error("Error during audio download:", error);
throw error;
}
}
// Transcribe audio file using OpenAI Whisper API
async function transcribeAudio(audioFilePath, outputFolder) {
try {
console.log("Transcribing audio file:", audioFilePath);
const transcription = await openai.audio.transcriptions.create({
file: fs.createReadStream(audioFilePath),
model: "whisper-1",
});
// Ensure the output folder exists
ensureDirectoryExistence(outputFolder);
// Save transcription to a text file inside the output folder
const transcriptionFilePath = path.join(outputFolder, `${path.parse(audioFilePath).name}.txt`);
fs.writeFileSync(transcriptionFilePath, transcription.text);
console.log("Transcription saved at:", transcriptionFilePath);
return transcriptionFilePath;
} catch (error) {
console.error("Error during transcription:", error);
throw error;
}
}
// Main function to handle both downloading and transcribing
async function main() {
const videoUrl = process.argv[2]; // Get the YouTube video URL from command line
if (!videoUrl) {
console.error("Please provide a YouTube video URL.");
process.exit(1);
}
const downloadPath = path.join(__dirname, 'output_audios');
const transcriptionPath = path.join(__dirname, 'output_texts');
try {
// Step 1: Download and convert the YouTube video audio to MP3
const audioFilePath = await downloadAndConvertAudio(videoUrl, downloadPath);
// Step 2: Transcribe the audio file and save it as a text file
const transcriptionFilePath = await transcribeAudio(audioFilePath, transcriptionPath);
// if you want to delete the audio file after transcription, uncomment the line below
// fs.unlinkSync(audioFilePath); // console.log(`Audio file deleted: ${audioFilePath}`);
console.log("Process completed successfully!");
console.log(`Transcription saved at: ${transcriptionFilePath}`);
} catch (error) {
console.error("Error during the process:", error);
}
}
// Run the main function
main();