-
Notifications
You must be signed in to change notification settings - Fork 0
/
asr.py
131 lines (110 loc) · 5.4 KB
/
asr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import jiwer
from typing import List, Mapping, Tuple, Union
import Levenshtein
import jiwer.transforms as tr
""""Базовый препроцессинг"""
_default_transform = tr.Compose(
[
tr.RemoveMultipleSpaces(),
tr.Strip(),
tr.SentencesToListOfWords(),
tr.RemoveEmptyStrings(),
]
)
"""
Получение показателей :
H - количество правильно распознанных слов;
S - количество операций ручной замены;
D - число удалений слов;
I - число вставки слов;
через расстояние Левенштейна
"""
def _get_operation_counts(
source_string: str, destination_string: str
) -> Tuple[int, int, int, int]:
editops = Levenshtein.editops(source_string, destination_string)
substitutions = sum(1 if op[0] == "replace" else 0 for op in editops)
deletions = sum(1 if op[0] == "delete" else 0 for op in editops)
insertions = sum(1 if op[0] == "insert" else 0 for op in editops)
hits = len(source_string) - (substitutions + deletions)
return hits, substitutions, deletions, insertions
"""Подсчет метрик"""
""""Метрики, реализованные ранее в библиотеке"""
#Word Error Rate (WER)
def asr_wer(truth: Union[str, List[str]],
hypothesis: Union[str, List[str]],
truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
**kwargs ) -> float:
return jiwer.wer(truth, hypothesis, truth_transform, hypothesis_transform)
# Match Error Rate (MER)
def asr_mer(truth: Union[str, List[str]],
hypothesis: Union[str, List[str]],
truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
**kwargs) -> float:
return jiwer.mer(truth, hypothesis, truth_transform, hypothesis_transform)
# Word Information Preserved (WIP)
def asr_wip(truth: Union[str, List[str]],
hypothesis: Union[str, List[str]],
truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
**kwargs) -> float:
return jiwer.mer(truth, hypothesis, truth_transform, hypothesis_transform)
# Word Information Lost (WIL)
def asr_wil(truth: Union[str, List[str]],
hypothesis: Union[str, List[str]],
truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
**kwargs) -> float:
return jiwer.wil(truth, hypothesis, truth_transform, hypothesis_transform)
""""Метрики, реализованные ранее в библиотеке"""
# Word Recognition Rate (WRR)
def asr_wrr(truth: Union[str, List[str]],
hypothesis: Union[str, List[str]],
truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
**kwargs ) -> float:
return 1 - jiwer.wer(truth, hypothesis, truth_transform, hypothesis_transform)
# Word Correctly Recognized (WCR)
def asr_wсr(truth: Union[str, List[str]],
hypothesis: Union[str, List[str]],
truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
**kwargs ) -> float:
H, S, D, I = _get_operation_counts(truth, hypothesis)
wcr = (float(len(hypothesis) - D - S))/float(H + S + D)
return wcr
""""Создание массива с метриками"""
def all(truth: Union[str, List[str]],
hypothesis: Union[str, List[str]],
truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
**kwargs):
all_m = []
all_m.append(asr_wer(truth, hypothesis, truth_transform, hypothesis_transform))
all_m.append(asr_wrr(truth, hypothesis, truth_transform, hypothesis_transform))
all_m.append(asr_mer(truth, hypothesis, truth_transform, hypothesis_transform))
all_m.append(asr_wip(truth, hypothesis, truth_transform, hypothesis_transform))
all_m.append(asr_wil(truth, hypothesis, truth_transform, hypothesis_transform))
all_m.append(asr_wсr(truth, hypothesis, truth_transform, hypothesis_transform))
return all_m
def all_metrics_map(truth: Union[str, List[str]],
hypothesis: Union[str, List[str]],
truth_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
hypothesis_transform: Union[tr.Compose, tr.AbstractTransform] = _default_transform,
**kwargs)-> Mapping[str, float]:
wer = asr_wer(truth, hypothesis, truth_transform, hypothesis_transform)
wrr = asr_wrr(truth, hypothesis, truth_transform, hypothesis_transform)
mer = asr_mer(truth, hypothesis, truth_transform, hypothesis_transform)
wcr = asr_wсr(truth, hypothesis, truth_transform, hypothesis_transform)
wil = asr_wil(truth, hypothesis, truth_transform, hypothesis_transform)
wip = asr_wip(truth, hypothesis, truth_transform, hypothesis_transform)
return {
"wer": wer,
"wrr": wrr,
"mer": mer,
"wcr": wcr,
"wil": wil,
"wip": wip,
}