diff --git a/functions/src/api/setupFastify.ts b/functions/src/api/setupFastify.ts index 48266b3..2f66882 100644 --- a/functions/src/api/setupFastify.ts +++ b/functions/src/api/setupFastify.ts @@ -43,6 +43,10 @@ export const setupFastify = () => { fastify.register(cors, { origin: '*', }) + fastify.addHook('onSend', (_, reply, _2, done: () => void) => { + reply.header('Cache-Control', 'must-revalidate,no-cache,no-store') + done() + }) registerSwagger(fastify) fastify.register(sponsorsRoutes) diff --git a/scripts/.gitignore b/scripts/.gitignore index 06927e8..82ab10b 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -1,3 +1,5 @@ client_secret.json openplanner.json miniature/ +out_srt/ +out_keywords diff --git a/scripts/package-lock.json b/scripts/package-lock.json index 9730c95..5719710 100644 --- a/scripts/package-lock.json +++ b/scripts/package-lock.json @@ -9,9 +9,19 @@ "version": "1.0.0", "license": "ISC", "dependencies": { + "@supercharge/promise-pool": "^3.2.0", + "axios": "^1.7.2", "googleapis": "^140.0.1" } }, + "node_modules/@supercharge/promise-pool": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/@supercharge/promise-pool/-/promise-pool-3.2.0.tgz", + "integrity": "sha512-pj0cAALblTZBPtMltWOlZTQSLT07jIaFNeM8TWoJD1cQMgDB9mcMlVMoetiB35OzNJpqQ2b+QEtwiR9f20mADg==", + "engines": { + "node": ">=8" + } + }, "node_modules/agent-base": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz", @@ -23,6 +33,21 @@ "node": ">= 14" } }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "node_modules/axios": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.2.tgz", + "integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -73,6 +98,17 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/debug": { "version": "4.3.5", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.5.tgz", @@ -105,6 +141,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/ecdsa-sig-formatter": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", @@ -137,6 +181,38 @@ "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" }, + "node_modules/follow-redirects": { + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/function-bind": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", @@ -363,6 +439,25 @@ "safe-buffer": "^5.0.1" } }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", @@ -398,6 +493,11 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "node_modules/qs": { "version": "6.12.3", "resolved": "https://registry.npmjs.org/qs/-/qs-6.12.3.tgz", @@ -502,6 +602,11 @@ } }, "dependencies": { + "@supercharge/promise-pool": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/@supercharge/promise-pool/-/promise-pool-3.2.0.tgz", + "integrity": "sha512-pj0cAALblTZBPtMltWOlZTQSLT07jIaFNeM8TWoJD1cQMgDB9mcMlVMoetiB35OzNJpqQ2b+QEtwiR9f20mADg==" + }, "agent-base": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.1.tgz", @@ -510,6 +615,21 @@ "debug": "^4.3.4" } }, + "asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "axios": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.2.tgz", + "integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==", + "requires": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, "base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -537,6 +657,14 @@ "set-function-length": "^1.2.1" } }, + "combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "requires": { + "delayed-stream": "~1.0.0" + } + }, "debug": { "version": "4.3.5", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.5.tgz", @@ -555,6 +683,11 @@ "gopd": "^1.0.1" } }, + "delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==" + }, "ecdsa-sig-formatter": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", @@ -581,6 +714,21 @@ "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" }, + "follow-redirects": { + "version": "1.15.6", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz", + "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==" + }, + "form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "requires": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + } + }, "function-bind": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", @@ -745,6 +893,19 @@ "safe-buffer": "^5.0.1" } }, + "mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==" + }, + "mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "requires": { + "mime-db": "1.52.0" + } + }, "ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", @@ -763,6 +924,11 @@ "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.2.tgz", "integrity": "sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==" }, + "proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "qs": { "version": "6.12.3", "resolved": "https://registry.npmjs.org/qs/-/qs-6.12.3.tgz", diff --git a/scripts/package.json b/scripts/package.json index 278aab8..2a3a6c0 100644 --- a/scripts/package.json +++ b/scripts/package.json @@ -10,6 +10,8 @@ "author": "", "license": "ISC", "dependencies": { - "googleapis": "^140.0.1" + "googleapis": "^140.0.1", + "@supercharge/promise-pool": "^3.2.0", + "axios": "^1.7.2" } } diff --git a/scripts/youtubeSubtitleEdit.js b/scripts/youtubeSubtitleEdit.js new file mode 100644 index 0000000..37a8169 --- /dev/null +++ b/scripts/youtubeSubtitleEdit.js @@ -0,0 +1,262 @@ +import { getVideosLast72Hours, initYoutube, updateVideo, updateVideoThumbnail } from './youtubeAPI.js' +import fs from 'fs' +import axios from 'axios' +import path from 'path' +import { PromisePool } from '@supercharge/promise-pool' + +const GLADIA_KEY_PATH = path.resolve(process.env.HOME, '.credentials', 'gladia_api.key') +const OPENAI_KEY_PATH = path.resolve(process.env.HOME, '.credentials', 'openai_api.key') +const POLLING_INTERVAL = 5000 // 5 seconds +const CONCURRENT_JOBS = 10 +const PLAYLIST_ID = 'replace-me' + +// This whole is here to generate subtitles for a youtube video +// using Gladia & ChatGPT. It won't upload subtitles to youtube +// but only export SRT files into `out_srt` directory. +// +// Configuration: +// - Create a file containing gladia API key in ~/.credentials/gladia_api.key +// - Create a file containing chatgpt API key in ~/.credentials/openai_api.key +// - Ensure you have youtube credentials for API in ~/.credentials/youtube.credentials.json +// - Ensure you have client_secret.json file to bypass Oauth2 from youtube +// - Update const variable PLAYLIST_ID in this script with one containing all videos +// +// Notes: +// - Videos & the playlist MUST not be in private, in "non-visible" at least +// - You change the concurrency to better follow what's happening +// - If any SRT or keywords are already generated, they won't be recreated. + +const getApiKey = (filePath) => { + if (!fs.existsSync(filePath)) { + console.error(`❌ Error: API key file not found at: ${filePath}`) + console.log('🔄 Please create a file at the above location with your API key.') + process.exit(1) + } + return fs.readFileSync(filePath, 'utf-8').trim() +} + +const GLADIA_API_KEY = getApiKey(GLADIA_KEY_PATH) +const OPENAI_API_KEY = getApiKey(OPENAI_KEY_PATH) +const GLADIA_TRANSCRIPTION_ENDPOINT = 'https://api.gladia.io/v2/transcription' + +const joinYoutubeAndOpenPlannerData = (youtubeVideos, openPlannerData) => { + const videosWithOpenPlannerData = youtubeVideos.map((video) => { + const videoTitle = video.snippet.title + + const session = openPlannerData.sessions.find( + (session) => videoTitle.includes(session.title) || session.title.includes(videoTitle) + ) + + return { + videoId: video.contentDetails.videoId, + publishedAt: video.contentDetails.videoPublishedAt, + session, + } + }) + + const videosWithValidSession = videosWithOpenPlannerData.filter((video) => video.session) + + console.log(`ℹī¸ Matching videos: ${videosWithValidSession.length}`) + console.log( + `ℹī¸ Non matching video title or no speakers: ${videosWithOpenPlannerData + .filter((video) => !video.session) + .map((video) => video.snippet.title) + .join(', ')}` + ) + + return videosWithValidSession +} + +async function getTranscriptionIdFromGladia(audioUrl, customVocabulary) { + const headers = { + 'Content-Type': 'application/json', + 'x-gladia-key': GLADIA_API_KEY, + } + + const payload = { + audio_url: audioUrl, + subtitles: true, + subtitles_config: { + formats: ['srt'], + }, + custom_vocabulary: customVocabulary, + } + + let response = {} + try { + response = await axios.post(GLADIA_TRANSCRIPTION_ENDPOINT, payload, { headers }) + + if (response.status !== 201) { + console.error(`❌ Failed to initiate transcription for URL: ${audioUrl}`) + console.error(response.data) + throw new Error('Failed to initiate transcription') + } + + const transcriptionId = response.data.id + if (!transcriptionId) { + throw new Error('Transcription ID not found in response') + } + + return transcriptionId + } catch (error) { + console.error(`❌ Failed to initiate transcription for URL: ${audioUrl}: `, payload, error) + } +} + +function saveSubtitlesToSrt(subtitles, filename) { + fs.writeFileSync(filename, subtitles) +} + +async function getFullTranscriptionFromGladia(transcriptionId) { + const headers = { + 'x-gladia-key': GLADIA_API_KEY, + } + + let isCompleted = false + let subtitles = null + + while (!isCompleted) { + const response = await axios.get(`${GLADIA_TRANSCRIPTION_ENDPOINT}/${transcriptionId}`, { headers }) + + if (response.status !== 200) { + throw new Error('Failed to fetch full transcription') + } + + if (response.data.status === 'done') { + isCompleted = true + subtitles = response.data.result.transcription.subtitles[0].subtitles + } else { + await new Promise((resolve) => setTimeout(resolve, POLLING_INTERVAL)) + } + } + + if (subtitles) { + return subtitles + } + + throw new Error('Subtitles not found in response') +} + +async function generateKeywords(session) { + const prompt = `Extract 10 technology-related keywords from the following abstract. Keywords shouldn't be french words, but rather technology names or methods. Give me a json list raw:\n\n${session.abstract}` + let keywords = [] + try { + const response = await axios.post( + 'https://api.openai.com/v1/chat/completions', + { + model: 'gpt-3.5-turbo', + messages: [ + { role: 'system', content: 'You are a helpful assistant.' }, + { role: 'user', content: prompt }, + ], + max_tokens: 500, + temperature: 0, + }, + { + headers: { + Authorization: `Bearer ${OPENAI_API_KEY}`, + 'Content-Type': 'application/json', + }, + } + ) + + if (response.data && response.data.choices && response.data.choices.length > 0) { + const rawKeywords = response.data.choices[0].message.content + keywords = rawKeywords.replaceAll('`', '').replace('json', '') + return JSON.parse(keywords) + } + + throw new Error('No keywords found in response') + } catch (error) { + console.error(`❌ Error generating keywords for session: ${session.title}`, error.message, keywords) + return keywords + } +} + +function saveKeywordsToJson(keywords, videoId) { + const outKeywordsDir = './out_keywords' + if (!fs.existsSync(outKeywordsDir)) { + fs.mkdirSync(outKeywordsDir) + } + + const jsonFilename = path.join(outKeywordsDir, `${videoId}.json`) + fs.writeFileSync(jsonFilename, JSON.stringify(keywords, null, 2)) +} + +const processVideo = async (video, outSrtDir, outKeywordsDir) => { + const srtFilename = path.join(outSrtDir, `${video.videoId}.srt`) + const jsonFilename = path.join(outKeywordsDir, `${video.videoId}.json`) + + // Check if subtitles and keywords already exist + const srtExists = fs.existsSync(srtFilename) + const keywordsExist = fs.existsSync(jsonFilename) + let customVocabulary = [] + + if (keywordsExist) { + customVocabulary = JSON.parse(fs.readFileSync(jsonFilename)) + console.log(`ℹī¸ Keywords JSON file already exists for video ID: ${video.videoId}, using existing keywords.`) + } else { + const keywords = await generateKeywords(video.session) + if (keywords.length > 0) { + saveKeywordsToJson(keywords, video.videoId) + customVocabulary = keywords + console.log( + `✅ Generated and saved keywords for session title: ${video.session.title} (ID: ${video.videoId})` + ) + } + } + + if (srtExists) { + console.log(`ℹī¸ SRT file already exists for video ID: ${video.videoId}, skipping transcription...`) + return + } + + try { + const audioUrl = `https://www.youtube.com/watch?v=${video.videoId}` + console.log(`🚀 Initiating transcription for ${video.session.title} (ID: ${video.videoId})`) + + const transcriptionId = await getTranscriptionIdFromGladia(audioUrl, customVocabulary) + + if (transcriptionId == '') { + return + } + console.log(`🚀 Awaiting transcription results for ${video.session.title} (ID: ${video.videoId})`) + const subtitles = await getFullTranscriptionFromGladia(transcriptionId) + + saveSubtitlesToSrt(subtitles, srtFilename) + console.log(`✅ Processed and saved SRT for ${video.session.title} (ID: ${video.videoId})`) + } catch (error) { + console.error(`❌ Failed to process video ID: ${video.videoId}`, error.message) + } +} + +const main = async () => { + const { auth, channelId } = await initYoutube() + const openPlannerFileName = 'openplanner.json' + const openPlannerContent = JSON.parse(fs.readFileSync(openPlannerFileName)) + + const videos = await getVideosLast72Hours(auth, channelId, PLAYLIST_ID) + console.log('ℹī¸ Retrieved videos: ' + videos.length) + + const videosWithValidSession = joinYoutubeAndOpenPlannerData(videos, openPlannerContent) + + const outSrtDir = './out_srt' + if (!fs.existsSync(outSrtDir)) { + fs.mkdirSync(outSrtDir) + } + + const outKeywordsDir = './out_keywords' + if (!fs.existsSync(outKeywordsDir)) { + fs.mkdirSync(outKeywordsDir) + } + + await PromisePool.withConcurrency(CONCURRENT_JOBS) + .for(videosWithValidSession) + .process(async (video, index, pool) => { + await processVideo(video, outSrtDir, outKeywordsDir) + }) + + console.log('🏁 Completed all video processing.') +} + +main()