Skip to content

Commit

Permalink
fix test
Browse files Browse the repository at this point in the history
  • Loading branch information
vince62s committed Oct 5, 2024
1 parent ece2c35 commit 5cf97d6
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions eole/transforms/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,12 @@ def _tokenize(self, tokens, side="src", is_train=False):
# This method embeds a custom logic to correctly handle certain placeholders
# in case the tokenizer doesn't preserve them.
sentence = " ".join(tokens)
delim_list = [mapped_toks[0] for mapped_toks in self.mapped_tokens] + [
self.eos_token
]
if self.mapped_tokens is not None:
delim_list = [mapped_toks[0] for mapped_toks in self.mapped_tokens] + [
self.eos_token
]
else:
delim_list = [self.eos_token]
pattern = f"({'|'.join(map(re.escape, delim_list))})"
# Split sentence on EOS and Added-Tokens
sent_list = re.split(pattern, sentence)
Expand Down

0 comments on commit 5cf97d6

Please sign in to comment.