Skip to content

Commit

Permalink
add kaldi-format support
Browse files Browse the repository at this point in the history
  • Loading branch information
selaselah committed Jun 5, 2017
1 parent 6b4ae4e commit d29fd0a
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 10 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,10 @@ optional arguments:
-i, --print-instances
Print all individual sentences and their errors.
-r, --print-errors Print all individual sentences that contain errors.
-id, --has-ids Hypothesis and reference files have ids in the last
token?
--head-ids Hypothesis and reference files have ids in the head
token? (Kaldi format)
--tail-ids Hypothesis and reference files have ids in the last
token? (Sphinx format)
-c, --confusions Print tables of which words were confused.
-p, --print-wer-vs-length
Print table of average WER grouped by reference
Expand Down
6 changes: 4 additions & 2 deletions asr_evaluation/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ def get_parser():
help='Print all individual sentences and their errors.')
print_args.add_argument('-r', '--print-errors', action='store_true',
help='Print all individual sentences that contain errors.')
parser.add_argument('-id', '--has-ids', action='store_true',
help='Hypothesis and reference files have ids in the last token?')
parser.add_argument('--head-ids', action='store_true',
help='Hypothesis and reference files have ids in the first token? (Kaldi format)')
parser.add_argument('--tail-ids', action='store_true',
help='Hypothesis and reference files have ids in the last token? (Sphinx format)')
parser.add_argument('-c', '--confusions', action='store_true', help='Print tables of which words were confused.')
parser.add_argument('-p', '--print-wer-vs-length', action='store_true',
help='Print table of average WER grouped by reference sentence length.')
Expand Down
32 changes: 26 additions & 6 deletions asr_evaluation/asr_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
# Some defaults
print_instances_p = False
print_errors_p = False
files_have_ids = False
files_head_ids = False
files_tail_ids = False
confusions = False
min_count = 0
wer_vs_length_p = True
Expand Down Expand Up @@ -101,9 +102,12 @@ def process_line_pair(ref_line, hyp_line, case_insensitive=False, remove_empty_r
id_ = None

# If the files have IDs, then split the ID off from the text
if files_have_ids:
if files_head_ids:
id_ = ref[0]
ref, hyp = remove_head_id(ref, hyp)
elif files_tail_ids:
id_ = ref[-1]
ref, hyp = remove_sentence_ids(ref, hyp)
ref, hyp = remove_tail_id(ref, hyp)

if case_insensitive:
ref = list(map(str.lower, ref))
Expand Down Expand Up @@ -149,19 +153,35 @@ def set_global_variables(args):
"""Copy argparse args into global variables."""
global print_instances_p
global print_errors_p
global files_have_ids
global files_head_ids
global files_tail_ids
global confusions
global min_count
global wer_vs_length_p
# Put the command line options into global variables.
print_instances_p = args.print_instances
print_errors_p = args.print_errors
files_have_ids = args.has_ids
files_head_ids = args.head_ids
files_tail_ids = args.tail_ids
confusions = args.confusions
min_count = args.min_word_count
wer_vs_length_p = args.print_wer_vs_length

def remove_sentence_ids(ref, hyp):
def remove_head_id(ref, hyp):
"""Assumes that the ID is the begin token of the string which is common
in Kaldi but not in Sphinx."""
ref_id = ref[0]
hyp_id = hyp[0]
if ref_id != hyp_id:
print('Reference and hypothesis IDs do not match! '
'ref="{}" hyp="{}"\n'
'File lines in hyp file should match those in the ref file.'.format(ref_id, hyp_id))
exit(-1)
ref = ref[1:]
hyp = hyp[1:]
return ref, hyp

def remove_tail_id(ref, hyp):
"""Assumes that the ID is the final token of the string which is common
in Sphinx but not in Kaldi."""
ref_id = ref[-1]
Expand Down

0 comments on commit d29fd0a

Please sign in to comment.