From c772d436b51e49c4621db533698c65c6335f6bd8 Mon Sep 17 00:00:00 2001 From: rjones30 Date: Sun, 21 Jan 2024 17:08:04 -0500 Subject: [PATCH] * added manual video postprocessing script by r.t.jones [rtj] --- postprocess-livestream.py | 151 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100755 postprocess-livestream.py diff --git a/postprocess-livestream.py b/postprocess-livestream.py new file mode 100755 index 0000000..ef553f8 --- /dev/null +++ b/postprocess-livestream.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +# +# postprocess-livestream.py - script to streamline the post-processing of youtube +# livestream recordings of worship services at PCC. +# +# author: rjones30@gmail.com +# version: october 9, 2023 + +import os +import sys +import shutil +import pandas +import subprocess +from pydub import AudioSegment + +def download_livestream(sermon): + """ + Download the recording from a livestream event, using the url + found in argument sermon['livestream']. The result is a new + mkv file saved in the sermons directory under the current + working directory. The relatve pathname is the return value. + """ + resp = subprocess.run( + [ + "/usr/local/bin/yt-dlp", + "--print", "filename", + sermon['livestream'], + ], + stdout=subprocess.PIPE) + if resp.returncode != 0: + print("download_livestream error - cannot download livestream from", + sermon['livestream']) + print("Invalid url in livestream column of csv file?") + sys.exit(1) + filename = resp.stdout.decode().rstrip() + if os.path.exists(f"sermons/{filename}"): + print(f"download_livestream info - {filename} already downloaded, using existing copy") + return f"sermons/{filename}" + else: + print(f"download_livestream info - {filename} not found in sermons/, downloading from youtube") + resp = subprocess.run( + [ + "/usr/local/bin/yt-dlp", + sermon['livestream'], + ]) + if resp.returncode != 0: + print("download_livestream error - cannot download livestream from", + sermon['livestream']) + print("Invalid url in livestream column of csv file?") + sys.exit(2) + shutil.move(filename, f"sermons/{filename}") + return f"sermons/{filename}" + +def process_video(mkvfile, sermon): + """ + Truncate the video in the livestream image to the time between + the sermon start and end times recorded in the "sermon starts" + and "sermon ends" columns of the sermon argument, and return + the path to the output mp4 file. + """ + filename = "sermon_part.mp4" + resp = subprocess.run( + [ + "ffmpeg", "-y", "-i", mkvfile, + "-c", "copy", + "-ss", sermon['sermon starts'], + "-to", sermon['sermon ends'], + filename, + ]) + if resp.returncode != 0: + print("process_video error - cannot truncate livestream image", + mkvfile) + sys.exit(2) + return filename + +def process_audio(mp4file, sermon): + """ + Apply a denoise filter to the sound track contained in the mp4file + input file, encode it in new audio (mp3) file, and return the path + Truncate the video in the livestream image to the time between + the sermon start and end times recorded in the "sermon starts" + and "sermon ends" columns of the sermon argument, and return + the path to the output mp4 file. + """ + filename = "sermon_part.mp3" + resp = subprocess.run( + [ + "ffmpeg", "-y", "-i", mp4file, + filename, + ]) + if resp.returncode != 0: + print("process_audio error - cannot extract audio from mp4 video", + mp4file) + sys.exit(2) + sound = AudioSegment.from_mp3(filename) + sound = sound.set_channels(1) + sound.export("process/reduce.wav", format="wav") + resp = subprocess.run( + [ + "data/noisereducer", + "-i", "process/reduce.wav", + "-o", "process/denoised.wav", + "-p", "data/noise.wav", + "--noiseGain", "12", + "--sensitivity", "6", + "--smoothing", "3", + ]) + if resp.returncode != 0: + print("process_audio error - cannot run noisereducer on wav audio", + "process/reduce.wav") + sys.exit(2) + os.remove(filename) + new_sound = AudioSegment.from_wav("process/denoised.wav") + new_sound.export(filename, codec="libmp3lame") + os.remove("process/denoised.wav") + os.remove("process/reduce.wav") + return filename + +headings = [] +sermons = [] +sermon_data = pandas.read_csv("pcc-sermons.csv") +headings = sermon_data.columns.values +for i in range(999999999): + sermon = {} + try: + for heading in headings: + sermon[heading] = sermon_data.loc[i, heading] + except: + print(f"quitting after {i} rows read") + break + sermons.append(sermon) + +for sermon in sermons: + if not pandas.isna(sermon['title']) and not pandas.isna(sermon['preacher']): + if pandas.isna(sermon['video']) or pandas.isna(sermon['audio']): + if not pandas.isna(sermon['livestream']): + mon = int(sermon['date'].split('/')[0]) + day = int(sermon['date'].split('/')[1]) + year = int(sermon['date'].split('/')[2]) + sername = f"{year}.{mon:02d}.{day:02d}A {sermon['title']} - {sermon['preacher']}" + print("working on sermon", sername) + mkvfile = download_livestream(sermon) + print("download_livestream returns", mkvfile) + mp4file = process_video(mkvfile, sermon) + print("process_video returns", mp4file) + mp3file = process_audio(mp4file, sermon) + print("process_audio returns", mp3file) + shutil.move(mp4file, f"sermons/{sername}.mp4") + shutil.move(mp3file, f"sermons/{sername}.mp3") + print(f"now you can upload sermon video from sermons/{sername}.mp4") + print(f"now you can upload sermon audio from sermons/{sername}.mp3")