forked from lowerquality/gentle
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathalign.py
60 lines (50 loc) · 1.97 KB
/
align.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import argparse
import logging
import multiprocessing
import os
import sys
import gentle
parser = argparse.ArgumentParser(
description='Align a transcript to audio by generating a new language model. Outputs JSON')
parser.add_argument(
'--nthreads', default=multiprocessing.cpu_count(), type=int,
help='number of alignment threads')
parser.add_argument(
'-o', '--output', metavar='output', type=str,
help='output filename')
parser.add_argument(
'--conservative', dest='conservative', action='store_true',
help='conservative alignment')
parser.set_defaults(conservative=False)
parser.add_argument(
'--disfluency', dest='disfluency', action='store_true',
help='include disfluencies (uh, um) in alignment')
parser.set_defaults(disfluency=False)
parser.add_argument(
'--log', default="INFO",
help='the log level (DEBUG, INFO, WARNING, ERROR, or CRITICAL)')
parser.add_argument(
'audiofile', type=str,
help='audio file')
parser.add_argument(
'txtfile', type=str,
help='transcript text file')
args = parser.parse_args()
log_level = args.log.upper()
logging.getLogger().setLevel(log_level)
disfluencies = set(['uh', 'um'])
def on_progress(p):
for k,v in p.items():
logging.debug("%s: %s" % (k, v))
with open(args.txtfile, encoding="utf-8") as fh:
transcript = fh.read()
resources = gentle.Resources()
logging.info("converting audio to 8K sampled wav")
with gentle.resampled(args.audiofile) as wavfile:
logging.info("starting alignment")
aligner = gentle.ForcedAligner(resources, transcript, nthreads=args.nthreads, disfluency=args.disfluency, conservative=args.conservative, disfluencies=disfluencies)
result = aligner.transcribe(wavfile, progress_cb=on_progress, logging=logging)
fh = open(args.output, 'w', encoding="utf-8") if args.output else sys.stdout
fh.write(result.to_json(indent=2))
if args.output:
logging.info("output written to %s" % (args.output))