forked from cta-wave/mezzanine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
add_second_audio_track.py
119 lines (96 loc) · 3.85 KB
/
add_second_audio_track.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python
import argparse
import os
import subprocess
import sys
from pathlib import Path
from pydub import AudioSegment, effects
import pyttsx3
def set_amplitude(audio, target_dBFS):
dBFS_delta = target_dBFS - audio.dBFS
return audio.apply_gain(dBFS_delta)
if __name__ == "__main__":
# Basic argument handling
parser = argparse.ArgumentParser(description="WAVE Mezzanine 2nd Audio Generator.")
parser.add_argument('input', help="Source file.")
args = parser.parse_args()
# Check that source and boundaries files are present
if not os.path.isfile(args.input):
sys.exit("Source file \""+args.input+"\" does not exist.")
mezzanine = Path(args.input)
mezzanine_out = Path(str(mezzanine.parent)+'\\'+str(mezzanine.stem)+'_2ndAudio[English]'+str(mezzanine.suffix))
# Generate audio to mix with mezzanine audio, creating an additional different audio track
engine = pyttsx3.init()
engine.setProperty('rate', 100)
engine.setProperty('volume', 1.0)
engine.save_to_file("English", 'temp_voice.wav')
engine.runAndWait()
# Normalise voice audio
raw = AudioSegment.from_file('temp_voice.wav', 'wav')
normalized = effects.normalize(raw)
# normalized = set_amplitude(raw, -23.0)
normalized.export("temp_nvoice.wav", format="wav")
# Get voice audio length
voice_duration = str(subprocess.check_output(['ffprobe', '-i', 'temp_nvoice.wav', '-show_entries', 'format=duration', '-v', 'quiet', '-of', 'csv']))
voice_duration = float(voice_duration.split(',')[1][:-5])
silence_duration = 15 - voice_duration
# Generate silence (anullsrc=r=48000:cl=mono)
#ffmpeg -t <silence_duration> -f lavfi -i anullsrc=channel_layout=mono:sample_rate=48000 temp_silence.wav
subprocess.call(['ffmpeg',
'-t', str(silence_duration),
'-f', 'lavfi',
'-i', 'anullsrc=channel_layout=mono:sample_rate=48000',
'-y',
'temp_silence.wav'])
# Concatenate voice and silence
#ffmpeg -i temp_nvoice.wav -i temp_silence.wav -filter_complex "[0] [1] concat=n=2:v=0:a=1 [a]" -map "[a]" temp_voiceandsilence.wav
subprocess.call(['ffmpeg',
'-i', 'temp_nvoice.wav',
'-i', 'temp_silence.wav',
'-filter_complex', '[0] [1] concat=n=2:v=0:a=1 [a]',
'-map', '[a]',
'-y',
'temp_voiceandsilence.wav'])
# Mix mezzanine audio with voice+silence
#ffmpeg -i <mezzanine_file> -stream_loop -1 -i temp_voiceandsilence.wav -filter_complex "[0:a] [1] amix=inputs=2:duration=first:dropout_transition=2:weights=1 1 [a]" -map "[a]" temp_audiotrack2.wav
subprocess.call(['ffmpeg',
'-i', str(mezzanine),
'-stream_loop', '-1',
'-i', 'temp_voiceandsilence.wav',
'-filter_complex', '[0:a][1] amix=inputs=2:duration=first:dropout_transition=2:weights=1 1 [a]',
'-map', '[a]',
'-y',
'temp_audiotrack2.wav'])
# Normalise mixed audio
raw = AudioSegment.from_file('temp_audiotrack2.wav', 'wav')
#normalized = effects.normalize(raw)
normalized = set_amplitude(raw, -23.0)
normalized.export("temp_naudiotrack2.wav", format="wav")
# Encode audio track
subprocess.call(['ffmpeg',
'-i', 'temp_naudiotrack2.wav',
'-c:a','aac',
'-b:a', '320k', '-ac', '2',
'-y',
'temp_audiotrack2.aac'])
# Mux new audio track into mezzanine
#ffmpeg -i <mezzanine_file> -i temp_audiotrack2.aac -map 0 -map 1 -vcodec copy -acodec copy <mezzanine_file_with_2nd_audio>
subprocess.call(['ffmpeg',
'-i', str(mezzanine),
'-i', 'temp_audiotrack2.aac',
'-map', '0',
'-map', '1',
'-vcodec','copy',
'-acodec','copy',
'-y',
str(mezzanine_out)])
# Remove the temporaray audio files
print("Removing temporary files...", end='', flush=True)
os.remove('temp_voice.wav')
os.remove('temp_nvoice.wav')
os.remove('temp_silence.wav')
os.remove('temp_voiceandsilence.wav')
os.remove('temp_audiotrack2.wav')
os.remove('temp_naudiotrack2.wav')
os.remove('temp_audiotrack2.aac')
print("Done")