-
Notifications
You must be signed in to change notification settings - Fork 1
/
push_vtt_file.py
138 lines (116 loc) · 4.8 KB
/
push_vtt_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import socket
import datetime
import re
import time
#https://stackoverflow.com/questions/54371492/changing-the-format-of-timestamp-in-python-3-7
def to_timedelta(t):
timept = re.split(r'[:.]+', t)
h, m, s, ss = list(map(int, timept))
return datetime.timedelta(hours=h, minutes=m, seconds=s, milliseconds=ss)
def select(cues,currenttime,prevtime):
return [ c for c in cues if c['start'] > prevtime and c['start'] <= currenttime ]
def unescape(s):
return s.replace('<','<').replace('>','>').replace('"','"').replace('&apos',"'").replace('&','&')
def read_caption_file(vttfile):
#https://stackoverflow.com/questions/48640490/python-2-7-matching-a-subtitle-events-in-vtt-subtitles-using-a-regular-expressi
regex = re.compile(r"""(^[0-9]{2}[:][0-9]{2}[:][0-9]{2}[.,][0-9]{3}) # match TC-IN in group1
[ ]-->[ ] # VTT/SRT style TC-IN--TC-OUT separator
([0-9]{2}[:][0-9]{2}[:][0-9]{2}[.,][0-9]{3}) # match TC-OUT n group2
(.*)\r?\n([\s\S]*?)\s*(?:(?:\r?\n){2}|\Z) # additional VTT info (like) alignment
# subtitle_content """, re.MULTILINE|re.VERBOSE)
with open(vttfile, 'r', encoding = 'utf-8') as webvttFileObject:
vttcontent = webvttFileObject.read()
cues = []
for match in regex.finditer(vttcontent):
group1, group2, group3, group4 = match.groups()
tc_in = to_timedelta( group1.strip())
tc_out = to_timedelta( group2.strip())
vtt_extra_info = group3
text = group4
cue = dict()
cue = {'start': tc_in, 'end':tc_out,'extra':vtt_extra_info,'text': unescape(text)}
cues.append(cue)
return cues
def select(cues, elapsed, cutoff):
result = [ c for c in cues if elapsed >= c['start'] and cutoff < c['start'] ]
return result
def cue_start_end(cues):
if len(cues) == 0:
return datetime.timedelta()
return cues[0]['start'], cues[-1]['end']
def connect_encoder(host, port,channel):
if host and port and channel in '12':
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((host, int(port))) # connect to link encoder using the specified credentials
fieldinsertmode = {'1' : b"\x01\x33\x0D" * 2, '2': b"\x01\x34\x0D" * 2 } [ str(channel)]
rollupcode = b"\x14\x2D\x14\x70"
s.sendall(fieldinsertmode + rollupcode)
text = bytes("Captions by CT-Cast\n", "iso-8859-1")
s.sendall(fieldinsertmode + rollupcode4 + text)
return s
else:
print("Dry run; host, port and channel must be specified")
return None
def send_cues(cues, connection):
shortwords = []
for c in cues:
lines = c['text'].split('\n')
for line in lines:
words = line.split(' ')
print(c['start'], words)
# Cheap split
MAX_LEN = 32
for word in words:
while(len(word)):
if len(word) <= MAX_LEN:
shortwords.append(word)
break
shortwords.append(word[0:MAX_LEN -1] + '-')
word = word[MAX_LEN:]
shortwords.append('\n')
print()
lines = ['']
for word in shortwords:
if word == '\n':
lines.append('')
continue
candidate = lines[-1]
if len(candidate) > 0:
candidate += ' '
candidate += word
if len( candidate ) <= MAX_LEN:
lines[-1] = candidate
else:
lines.append(word)
one_line = '\n'.join(lines).replace('\n\n','\n')
#print('Raw text')
#print( one_line)
if connection:
raw_text = bytes(one_line, "iso-8859-1")
connection.sendall(raw_text)
def main():
vttfilename = 'ex.vtt'
dry_run = True
speed_factor = 2
channel = 1
host = None
port = None
all_cues = read_caption_file(vttfilename)
first_cue_at,cues_finish_at = cue_start_end(all_cues)
print(f"{len(all_cues)} cues read. First queue at {first_cue_at}, ending at {cues_finish_at}")
connection = connect_encoder(host,port, channel)
previous_elapsed = datetime.timedelta(seconds = -1)
start_clock_time = datetime.datetime.now()
while(True):
elapsed = datetime.datetime.now() - start_clock_time
elapsed *= speed_factor
if elapsed >= cues_finish_at:
break
cues = select(all_cues, elapsed, previous_elapsed)
previous_elapsed = elapsed
if len(cues) > 0:
send_cues(cues, connection)
time.sleep(1)
print("Finished")
if __name__ == '__main__':
main()