revdotcom · nishchalb · Apr 18, 2024 · Apr 17, 2024 · Apr 17, 2024 · Apr 17, 2024
diff --git a/src/Nlp.cpp b/src/Nlp.cpp
@@ -28,26 +28,25 @@ NlpFstLoader::NlpFstLoader(std::vector<RawNlpRecord> &records, Json::Value norma
  std::string last_label;
  bool firstTk = true;
 
+ auto logger = logger::GetOrCreateLogger("NlpFstLoader");
  // fuse multiple rows that have the same id/label into one entry only
  for (auto &row : records) {
- mNlpRows.push_back(row);
  auto curr_tk = row.token;
  auto curr_label = row.best_label;
  auto curr_label_id = row.best_label_id;
  auto punctuation = row.punctuation;
  auto curr_row_tags = row.wer_tags;
 
  // Update wer tags in records to real string labels
- vector<string> real_wer_tags;
  for (auto &tag : curr_row_tags) {
- auto real_tag = tag;
  if (mWerSidecar != Json::nullValue) {
- real_tag = "###" + real_tag + "_" + mWerSidecar[real_tag]["entity_type"].asString() + "###";
+ tag.entity_type = mWerSidecar[tag.tag_id]["entity_type"].asString();
+ logger->info(tag.entity_type);
  }
- real_wer_tags.push_back(real_tag);
  }
- row.wer_tags = real_wer_tags;
+ row.wer_tags = curr_row_tags;
  std::string speaker = row.speakerId;
+ mNlpRows.push_back(row);
 
  if (processLabels && curr_label != "") {
  if (firstTk || curr_label != last_label) {
@@ -411,17 +410,18 @@ std::string NlpReader::GetBestLabel(std::string &labels) {
  return labels;
 }
 
-std::vector<std::string> NlpReader::GetWerTags(std::string &wer_tags_str) {
- std::vector<std::string> wer_tags;
+std::vector<WerTagEntry> NlpReader::GetWerTags(std::string &wer_tags_str) {
+ std::vector<WerTagEntry> wer_tags;
  if (wer_tags_str == "[]") {
  return wer_tags;
  }
  // wer_tags_str looks like: ['89', '90', '100']
  int current_pos = 2;
  auto pos = wer_tags_str.find("'", current_pos);
  while (pos != -1) {
- std::string wer_tag = wer_tags_str.substr(current_pos, pos - current_pos);
- wer_tags.push_back(wer_tag);
+ WerTagEntry entry;
+ entry.tag_id = wer_tags_str.substr(current_pos, pos - current_pos);
+ wer_tags.push_back(entry);
  current_pos = wer_tags_str.find("'", pos + 1) + 1;
  if (current_pos == 0) {
  break;

diff --git a/src/Nlp.h b/src/Nlp.h
@@ -16,6 +16,11 @@
 using namespace std;
 using namespace fst;
 
+struct WerTagEntry {
+ string tag_id;
+ string entity_type;
+};
+
 struct RawNlpRecord {
  string token;
  string speakerId;
@@ -27,7 +32,7 @@ struct RawNlpRecord {
  string labels;
  string best_label;
  string best_label_id;
- vector<string> wer_tags;
+ vector<WerTagEntry> wer_tags;
  string confidence;
 };
 
@@ -37,7 +42,7 @@ class NlpReader {
  virtual ~NlpReader();
  vector<RawNlpRecord> read_from_disk(const std::string &filename);
  string GetBestLabel(std::string &labels);
- vector<string> GetWerTags(std::string &wer_tags_str);
+ vector<WerTagEntry> GetWerTags(std::string &wer_tags_str);
  string GetLabelId(std::string &label);
 };
 

diff --git a/src/fstalign.cpp b/src/fstalign.cpp
@@ -619,7 +619,7 @@ void write_stitches_to_nlp(vector<Stitching>& stitches, ofstream &output_nlp_fil
  << "[";
  /* for (auto wer_tag : nlpRow.wer_tags) { */
  for (auto it = stitch.nlpRow.wer_tags.begin(); it != stitch.nlpRow.wer_tags.end(); ++it) {
- output_nlp_file << "'" << *it << "'";
+ output_nlp_file << "'" << it->tag_id << "'";
  if (std::next(it) != stitch.nlpRow.wer_tags.end()) {
  output_nlp_file << ", ";
  }

diff --git a/src/wer.cpp b/src/wer.cpp
@@ -350,19 +350,16 @@ void RecordTagWer(const vector<Stitching>& stitches) {
  for (const auto &stitch : stitches) {
  if (!stitch.nlpRow.wer_tags.empty()) {
  for (auto wer_tag : stitch.nlpRow.wer_tags) {
- int tag_start = wer_tag.find_first_not_of('#');
- int tag_end = wer_tag.find('_');
- string wer_tag_id = wer_tag.substr(tag_start, tag_end - tag_start);
- wer_results.insert(std::pair<std::string, WerResult>(wer_tag_id, {0, 0, 0, 0, 0}));
+ wer_results.insert(std::pair<std::string, WerResult>(wer_tag.tag_id, {0, 0, 0, 0, 0}));
  // Check with rfind since other comments can be there
  bool del = stitch.comment.rfind("del", 0) == 0;
  bool ins = stitch.comment.rfind("ins", 0) == 0;
  bool sub = stitch.comment.rfind("sub", 0) == 0;
- wer_results[wer_tag_id].insertions += ins;
- wer_results[wer_tag_id].deletions += del;
- wer_results[wer_tag_id].substitutions += sub;
+ wer_results[wer_tag.tag_id].insertions += ins;
+ wer_results[wer_tag.tag_id].deletions += del;
+ wer_results[wer_tag.tag_id].substitutions += sub;
  if (!ins) {
- wer_results[wer_tag_id].numWordsInReference += 1;
+ wer_results[wer_tag.tag_id].numWordsInReference += 1;
  }
  }
  }
@@ -555,7 +552,7 @@ void WriteSbs(wer_alignment &topAlignment, const vector<Stitching>& stitches, st
  string tk_wer_tags = "";
  auto wer_tags = p_stitch.nlpRow.wer_tags;
  for (auto wer_tag: wer_tags) {
- tk_wer_tags = tk_wer_tags + wer_tag + "|";
+ tk_wer_tags = tk_wer_tags + "###" + wer_tag.tag_id + "_" + wer_tag.entity_type + "###|";
  }
  string ref_tk = p_stitch.reftk;
  string hyp_tk = p_stitch.hyptk;

diff --git a/test/data/short.aligned.case.nlp b/test/data/short.aligned.case.nlp
@@ -23,7 +23,7 @@ sure|1|0.0000|0.0000|.||LC|[]|[]||||
 When|1|0.0000|0.0000|||UC|[]|[]||||
 I|1|0.0000|0.0000|||CA|[]|[]||||
 hear|1|0.0000|0.0000|||LC|[]|[]||||
-Foobar|1|0.0000|0.0000|,||UC|[]|[]||||
+Foobar|1|0.0000|0.0000|,||UC|[]|['1', '2']||||
 I|1|0.0000|0.0000|||CA|[]|[]||||
 think|1|0.0000|0.0000|||LC|[]|[]||||
 about|1|0.0000|0.0000|||LC|[]|[]||||

diff --git a/test/data/short.aligned.punc.nlp b/test/data/short.aligned.punc.nlp
@@ -31,7 +31,7 @@ sure|1|0.0000|0.0000|.||LC|[]|[]||||
 When|1|0.0000|0.0000|||UC|[]|[]||||
 I|1|0.0000|0.0000|||CA|[]|[]||||
 hear|1|0.0000|0.0000|||LC|[]|[]||||
-Foobar|1|0.0000|0.0000|,||UC|[]|[]||||
+Foobar|1|0.0000|0.0000|,||UC|[]|['1', '2']||||
 ,|1|0.0000|0.0000|||||[]||||
 I|1|0.0000|0.0000|||CA|[]|[]||||
 think|1|0.0000|0.0000|||LC|[]|[]||||

diff --git a/test/data/short.aligned.punc_case.nlp b/test/data/short.aligned.punc_case.nlp
@@ -31,7 +31,7 @@ sure|1|0.0000|0.0000|.||LC|[]|[]||||
 When|1|0.0000|0.0000|||UC|[]|[]||||
 I|1|0.0000|0.0000|||CA|[]|[]||||
 hear|1|0.0000|0.0000|||LC|[]|[]||||
-Foobar|1|0.0000|0.0000|,||UC|[]|[]||||
+Foobar|1|0.0000|0.0000|,||UC|[]|['1', '2']||||
 ,|1|0.0000|0.0000|||||[]||||
 I|1|0.0000|0.0000|||CA|[]|[]||||
 think|1|0.0000|0.0000|||LC|[]|[]||||

diff --git a/test/data/short.sbs.txt b/test/data/short.sbs.txt
@@ -0,0 +1,131 @@
+ ref_token hyp_token IsErr Class Wer_Tag_Entities
+ <crosstalk> <crosstalk> 
+ Yeah Yeah 
+ , , 
+ yeah <del> ERR 
+ , <del> ERR 
+ right right 
+ . <del> ERR 
+ Yeah <del> ERR 
+ , <del> ERR 
+ all <del> ERR 
+ right <del> ERR 
+ , I'll ERR 
+ probably do ERR 
+ just just 
+ that that 
+ . ? ERR 
+ Are Are 
+ there there 
+ any any 
+ visuals visuals 
+ that that 
+ come come 
+ to to 
+ mind mind 
+ or or ___100002_SYN_1-1___ 
+ <ins> ? ERR 
+ Yeah Yeah 
+ , , 
+ sure sure 
+ . . 
+ When When 
+ I I 
+ hear hear 
+ Foobar Foobar ###1_PROPER_NOUN###|###2_SPACY>ORG###|
+ , , 
+ I I 
+ think think 
+ about about 
+ just just 
+ that that 
+ : : 
+ <ins> Foobar ERR 
+ foo , ERR 
+ a a 
+------------------------------------------------------------
+ Line Group 
+ 5 yeah , <-> ***
+ 8 . Yeah , all right , probably <-> I'll do
+ 17 . <-> ?
+ 27 *** <-> ?
+ 43 foo <-> Foobar ,
+------------------------------------------------------------
+ Unigram Prec. Recall 
+ ? 0/2 (0.0 %) 0/0 (0.0 %)
+ I'll 0/1 (0.0 %) 0/0 (0.0 %)
+ all 0/0 (0.0 %) 0/1 (0.0 %)
+ do 0/1 (0.0 %) 0/0 (0.0 %)
+ foo 0/0 (0.0 %) 0/1 (0.0 %)
+ probably 0/0 (0.0 %) 0/1 (0.0 %)
+ yeah 0/0 (0.0 %) 0/1 (0.0 %)
+ Foobar 1/2 (50.0 %) 1/1 (100.0 %)
+ , 3/4 (75.0 %) 3/6 (50.0 %)
+ . 1/1 (100.0 %) 1/3 (33.3 %)
+ right 1/1 (100.0 %) 1/2 (50.0 %)
+ Yeah 2/2 (100.0 %) 2/3 (66.7 %)
+ : 1/1 (100.0 %) 1/1 (100.0 %)
+ <crosstalk> 1/1 (100.0 %) 1/1 (100.0 %)
+ Are 1/1 (100.0 %) 1/1 (100.0 %)
+ I 2/2 (100.0 %) 2/2 (100.0 %)
+ When 1/1 (100.0 %) 1/1 (100.0 %)
+ a 1/1 (100.0 %) 1/1 (100.0 %)
+ about 1/1 (100.0 %) 1/1 (100.0 %)
+ any 1/1 (100.0 %) 1/1 (100.0 %)
+ come 1/1 (100.0 %) 1/1 (100.0 %)
+ hear 1/1 (100.0 %) 1/1 (100.0 %)
+ just 2/2 (100.0 %) 2/2 (100.0 %)
+ mind 1/1 (100.0 %) 1/1 (100.0 %)
+ or 1/1 (100.0 %) 1/1 (100.0 %)
+ sure 1/1 (100.0 %) 1/1 (100.0 %)
+ that 3/3 (100.0 %) 3/3 (100.0 %)
+ there 1/1 (100.0 %) 1/1 (100.0 %)
+ think 1/1 (100.0 %) 1/1 (100.0 %)
+ to 1/1 (100.0 %) 1/1 (100.0 %)
+ visuals 1/1 (100.0 %) 1/1 (100.0 %)
+------------------------------------------------------------
+ Bigram Precision Recall 
+ , a 0/1 (0.0 %) 0/0 (0.0 %)
+ , all 0/0 (0.0 %) 0/1 (0.0 %)
+ , probably 0/0 (0.0 %) 0/1 (0.0 %)
+ , right 0/0 (0.0 %) 0/1 (0.0 %)
+ , yeah 0/0 (0.0 %) 0/1 (0.0 %)
+ . Are 0/0 (0.0 %) 0/1 (0.0 %)
+ . Yeah 0/0 (0.0 %) 0/1 (0.0 %)
+ : Foobar 0/1 (0.0 %) 0/0 (0.0 %)
+ ? Are 0/1 (0.0 %) 0/0 (0.0 %)
+ ? Yeah 0/1 (0.0 %) 0/0 (0.0 %)
+ I'll do 0/1 (0.0 %) 0/0 (0.0 %)
+ all right 0/0 (0.0 %) 0/1 (0.0 %)
+ do just 0/1 (0.0 %) 0/0 (0.0 %)
+ foo a 0/0 (0.0 %) 0/1 (0.0 %)
+ or ? 0/1 (0.0 %) 0/0 (0.0 %)
+ probably just 0/0 (0.0 %) 0/1 (0.0 %)
+ right , 0/0 (0.0 %) 0/1 (0.0 %)
+ right . 0/0 (0.0 %) 0/1 (0.0 %)
+ that . 0/0 (0.0 %) 0/1 (0.0 %)
+ that ? 0/1 (0.0 %) 0/0 (0.0 %)
+ yeah , 0/0 (0.0 %) 0/1 (0.0 %)
+ Foobar , 1/2 (50.0 %) 1/1 (100.0 %)
+ Yeah , 2/2 (100.0 %) 2/3 (66.7 %)
+ , I 1/1 (100.0 %) 1/1 (100.0 %)
+ , sure 1/1 (100.0 %) 1/1 (100.0 %)
+ . When 1/1 (100.0 %) 1/1 (100.0 %)
+ <crosstalk> Yeah 1/1 (100.0 %) 1/1 (100.0 %)
+ Are there 1/1 (100.0 %) 1/1 (100.0 %)
+ I hear 1/1 (100.0 %) 1/1 (100.0 %)
+ I think 1/1 (100.0 %) 1/1 (100.0 %)
+ When I 1/1 (100.0 %) 1/1 (100.0 %)
+ about just 1/1 (100.0 %) 1/1 (100.0 %)
+ any visuals 1/1 (100.0 %) 1/1 (100.0 %)
+ come to 1/1 (100.0 %) 1/1 (100.0 %)
+ hear Foobar 1/1 (100.0 %) 1/1 (100.0 %)
+ just that 2/2 (100.0 %) 2/2 (100.0 %)
+ mind or 1/1 (100.0 %) 1/1 (100.0 %)
+ sure . 1/1 (100.0 %) 1/1 (100.0 %)
+ that : 1/1 (100.0 %) 1/1 (100.0 %)
+ that come 1/1 (100.0 %) 1/1 (100.0 %)
+ there any 1/1 (100.0 %) 1/1 (100.0 %)
+ think about 1/1 (100.0 %) 1/1 (100.0 %)
+ to mind 1/1 (100.0 %) 1/1 (100.0 %)
+ visuals that 1/1 (100.0 %) 1/1 (100.0 %)
diff --git a/test/data/short_punc.ref.nlp b/test/data/short_punc.ref.nlp
@@ -1,33 +1,33 @@
-token|speaker|ts|endTs|punctuation|case|tags
-<crosstalk>|2||||LC|[]
-Yeah|1|||,|UC|[]
-yeah|1|||,|LC|[]
-right|1|||.|LC|[]
-Yeah|1|||,|UC|[]
-all|1||||LC|[]
-right|1|||,|LC|[]
-probably|1||||LC|[]
-just|1||||LC|[]
-that|1|||.|LC|[]
-Are|3||||UC|[]
-there|3||||LC|[]
-any|3||||LC|[]
-visuals|3||||LC|[]
-that|3||||LC|[]
-come|3||||LC|[]
-to|3||||LC|[]
-mind|3||||LC|[]
-or-|3||||LC|[]
-Yeah|1|||,|UC|[]
-sure|1|||.|LC|[]
-When|1||||UC|[]
-I|1||||CA|[]
-hear|1||||LC|[]
-Foobar|1|||,|UC|[]
-I|1||||CA|[]
-think|1||||LC|[]
-about|1||||LC|[]
-just|1||||LC|[]
-that|1|||:|LC|[]
-foo|1||||LC|[]
-a|1||||LC|[]
+token|speaker|ts|endTs|punctuation|case|tags|wer_tags
+<crosstalk>|2||||LC|[]|[]
+Yeah|1|||,|UC|[]|[]
+yeah|1|||,|LC|[]|[]
+right|1|||.|LC|[]|[]
+Yeah|1|||,|UC|[]|[]
+all|1||||LC|[]|[]
+right|1|||,|LC|[]|[]
+probably|1||||LC|[]|[]
+just|1||||LC|[]|[]
+that|1|||.|LC|[]|[]
+Are|3||||UC|[]|[]
+there|3||||LC|[]|[]
+any|3||||LC|[]|[]
+visuals|3||||LC|[]|[]
+that|3||||LC|[]|[]
+come|3||||LC|[]|[]
+to|3||||LC|[]|[]
+mind|3||||LC|[]|[]
+or-|3||||LC|[]|[]
+Yeah|1|||,|UC|[]|[]
+sure|1|||.|LC|[]|[]
+When|1||||UC|[]|[]
+I|1||||CA|[]|[]
+hear|1||||LC|[]|[]
+Foobar|1|||,|UC|[]|['1', '2']
+I|1||||CA|[]|[]
+think|1||||LC|[]|[]
+about|1||||LC|[]|[]
+just|1||||LC|[]|[]
+that|1|||:|LC|[]|[]
+foo|1||||LC|[]|[]
+a|1||||LC|[]|[]
diff --git a/test/data/short_punc.wer_tag.json b/test/data/short_punc.wer_tag.json
@@ -0,0 +1,8 @@
+{
+ "1": {
+ "entity_type": "PROPER_NOUN"
+ },
+ "2": {
+ "entity_type": "SPACY>ORG"
+ }
+}
diff --git a/test/fstalign_Test.cc b/test/fstalign_Test.cc
@@ -680,11 +680,14 @@ TEST_CASE_METHOD(UniqueTestsFixture, "main-adapted-composition()") {
  }
 
  SECTION("NLP Hypothesis: wer with case and punctuation(nlp output)") {
+ const auto wer_sidecar_path = TEST_DATA + "short_punc.wer_tag.json";
  const auto result =
- exec(command("wer", approach, "short_punc.ref.nlp", "short_punc.hyp.nlp", sbs_output, nlp_output, TEST_SYNONYMS)+" --use-punctuation --use-case");
+ exec(command("wer", approach, "short_punc.ref.nlp", "short_punc.hyp.nlp", sbs_output, nlp_output, TEST_SYNONYMS)+" --use-punctuation --use-case --wer-sidecar " + wer_sidecar_path);
  const auto testFile = std::string{TEST_DATA} + "short.aligned.punc_case.nlp";
+ const auto testSbsFile = std::string{TEST_DATA} + "short.sbs.txt";
 
  REQUIRE(compareFiles(nlp_output.c_str(), testFile.c_str()));
+ REQUIRE(compareFiles(sbs_output.c_str(), testSbsFile.c_str()));
  REQUIRE_THAT(result, Contains("WER: 13/42 = 0.3095"));
  REQUIRE_THAT(result, Contains("WER: INS:2 DEL:7 SUB:4"));
  }