-
Notifications
You must be signed in to change notification settings - Fork 0
/
postprocess-livestream.py
executable file
·151 lines (145 loc) · 5.4 KB
/
postprocess-livestream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python3
#
# postprocess-livestream.py - script to streamline the post-processing of youtube
# livestream recordings of worship services at PCC.
#
# author: rjones30@gmail.com
# version: october 9, 2023
import os
import sys
import shutil
import pandas
import subprocess
from pydub import AudioSegment
def download_livestream(sermon):
"""
Download the recording from a livestream event, using the url
found in argument sermon['livestream']. The result is a new
mkv file saved in the sermons directory under the current
working directory. The relatve pathname is the return value.
"""
resp = subprocess.run(
[
"/usr/local/bin/yt-dlp",
"--print", "filename",
sermon['livestream'],
],
stdout=subprocess.PIPE)
if resp.returncode != 0:
print("download_livestream error - cannot download livestream from",
sermon['livestream'])
print("Invalid url in livestream column of csv file?")
sys.exit(1)
filename = resp.stdout.decode().rstrip()
if os.path.exists(f"sermons/{filename}"):
print(f"download_livestream info - {filename} already downloaded, using existing copy")
return f"sermons/{filename}"
else:
print(f"download_livestream info - {filename} not found in sermons/, downloading from youtube")
resp = subprocess.run(
[
"/usr/local/bin/yt-dlp",
sermon['livestream'],
])
if resp.returncode != 0:
print("download_livestream error - cannot download livestream from",
sermon['livestream'])
print("Invalid url in livestream column of csv file?")
sys.exit(2)
shutil.move(filename, f"sermons/{filename}")
return f"sermons/{filename}"
def process_video(mkvfile, sermon):
"""
Truncate the video in the livestream image to the time between
the sermon start and end times recorded in the "sermon starts"
and "sermon ends" columns of the sermon argument, and return
the path to the output mp4 file.
"""
filename = "sermon_part.mp4"
resp = subprocess.run(
[
"ffmpeg", "-y", "-i", mkvfile,
"-c", "copy",
"-ss", sermon['sermon starts'],
"-to", sermon['sermon ends'],
filename,
])
if resp.returncode != 0:
print("process_video error - cannot truncate livestream image",
mkvfile)
sys.exit(2)
return filename
def process_audio(mp4file, sermon):
"""
Apply a denoise filter to the sound track contained in the mp4file
input file, encode it in new audio (mp3) file, and return the path
Truncate the video in the livestream image to the time between
the sermon start and end times recorded in the "sermon starts"
and "sermon ends" columns of the sermon argument, and return
the path to the output mp4 file.
"""
filename = "sermon_part.mp3"
resp = subprocess.run(
[
"ffmpeg", "-y", "-i", mp4file,
filename,
])
if resp.returncode != 0:
print("process_audio error - cannot extract audio from mp4 video",
mp4file)
sys.exit(2)
sound = AudioSegment.from_mp3(filename)
sound = sound.set_channels(1)
sound.export("process/reduce.wav", format="wav")
resp = subprocess.run(
[
"data/noisereducer",
"-i", "process/reduce.wav",
"-o", "process/denoised.wav",
"-p", "data/noise.wav",
"--noiseGain", "12",
"--sensitivity", "6",
"--smoothing", "3",
])
if resp.returncode != 0:
print("process_audio error - cannot run noisereducer on wav audio",
"process/reduce.wav")
sys.exit(2)
os.remove(filename)
new_sound = AudioSegment.from_wav("process/denoised.wav")
new_sound.export(filename, codec="libmp3lame")
os.remove("process/denoised.wav")
os.remove("process/reduce.wav")
return filename
headings = []
sermons = []
sermon_data = pandas.read_csv("pcc-sermons.csv")
headings = sermon_data.columns.values
for i in range(999999999):
sermon = {}
try:
for heading in headings:
sermon[heading] = sermon_data.loc[i, heading]
except:
print(f"quitting after {i} rows read")
break
sermons.append(sermon)
for sermon in sermons:
if not pandas.isna(sermon['title']) and not pandas.isna(sermon['preacher']):
if pandas.isna(sermon['video']) or pandas.isna(sermon['audio']):
if not pandas.isna(sermon['livestream']):
mon = int(sermon['date'].split('/')[0])
day = int(sermon['date'].split('/')[1])
year = int(sermon['date'].split('/')[2])
sername = f"{year}.{mon:02d}.{day:02d}A {sermon['title']} - {sermon['preacher']}"
print("working on sermon", sername)
mkvfile = download_livestream(sermon)
print("download_livestream returns", mkvfile)
mp4file = process_video(mkvfile, sermon)
print("process_video returns", mp4file)
mp3file = process_audio(mp4file, sermon)
print("process_audio returns", mp3file)
shutil.move(mp4file, f"sermons/{sername}.mp4")
shutil.move(mp3file, f"sermons/{sername}.mp3")
print(f"now you can upload sermon video from sermons/{sername}.mp4")
print(f"now you can upload sermon audio from sermons/{sername}.mp3")