Skip to content

Commit

Permalink
Version 1.5.1: Minor bugfixes and improvements. (#48)
Browse files Browse the repository at this point in the history
* Improve regex efficiency.

* Fix edge case for srt subtitle.

* Add LLM-based translation quality evaluator.

* Fix issue always causing false format-checking for context generation.

* Update noise suppression test-suites.

* Bump up to version 1.5.1
  • Loading branch information
zh-plus authored Jul 1, 2024
1 parent b4cc646 commit d9dde29
Show file tree
Hide file tree
Showing 14 changed files with 224 additions and 36 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
## 1.4.1

Minor bugfixes and improvements.

### Other Changes:

- Fix default check_format returning False issue.
- Fix edges cases for srt file generation.
- Prepare translation evaluator for benchmarking.

## 1.5.0

This update add Gemini Models support for translation.
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ To maintain context between translation segments, the process is sequential for
languages.
- [x] [Feature] Add custom OpenAI & Anthropic endpoint support.
- [ ] [Feature] Add local translation model support (e.g. [SakuraLLM](https://github.com/SakuraLLM/Sakura-13B-Galgame)).
- [ ] [Quality] Construct translation quality benchmark test for each patch.
- [ ] [Others] Add transcribed examples.
- [ ] Song
- [ ] Podcast
Expand Down
36 changes: 35 additions & 1 deletion openlrc/agents.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
# Copyright (C) 2024. Hao Zheng
# All rights reserved.
import abc
import json
import re
from typing import Optional, Tuple, List, Type, Union

from json_repair import repair_json

from openlrc.chatbot import route_chatbot, GPTBot, ClaudeBot
from openlrc.context import TranslationContext, TranslateInfo
from openlrc.logger import logger
from openlrc.prompter import ChunkedTranslatePrompter, ContextReviewPrompter, ProofreaderPrompter, PROOFREAD_PREFIX, \
ContextReviewerValidatePrompter
ContextReviewerValidatePrompter, TranslationEvaluatorPrompter
from openlrc.validators import POTENTIAL_PREFIX_COMBOS


Expand Down Expand Up @@ -211,3 +214,34 @@ def proofread(self, texts: List[str], translations, context: TranslationContext)
resp = self.chatbot.message(messages_list, output_checker=self.prompter.check_format)[0]
revised = self._parse_responses(resp)
return revised


class TranslationEvaluatorAgent(Agent):
TEMPERATURE = 0.95

def __init__(self, chatbot_model: str = 'gpt-3.5-turbo', fee_limit: float = 0.3, proxy: str = None,
base_url_config: Optional[dict] = None):
super().__init__()
self.chatbot = self._initialize_chatbot(chatbot_model, fee_limit, proxy, base_url_config)
self.prompter = TranslationEvaluatorPrompter()

def evaluate(self, src_texts, target_texts) -> dict:
messages_list = [
{'role': 'system', 'content': self.prompter.system()},
{'role': 'user', 'content': self.prompter.user(src_texts, target_texts)},
]
resp = self.chatbot.message(messages_list, stop_sequences=[self.prompter.stop_sequence])[0]
content = self.chatbot.get_content(resp)

# Repair potentially broken JSON
content = repair_json(content)

# Returned response should be in JSON format
json_resp = json.loads(content)
# acc = json_resp['accuracy']
# fluency = json_resp['fluency']
# completeness = json_resp['completeness']
# cultural_adaptation = json_resp['cultural adaptation']
# consistency = json_resp['consistency']

return json_resp
2 changes: 1 addition & 1 deletion openlrc/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def update(self, **args):

@property
def non_glossary_guideline(self) -> str:
cleaned_text = re.sub(r'### Glossary(.*?\n)*?### Characters', '### Characters', self.guideline, flags=re.DOTALL)
cleaned_text = re.sub(r'### Glossary.*?### Characters', '### Characters', self.guideline, flags=re.DOTALL)
return cleaned_text


Expand Down
2 changes: 1 addition & 1 deletion openlrc/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# All rights reserved.
from lingua import Language

# Check https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/transcribe.py#L184 for details
# Check https://github.com/SYSTRAN/faster-whisper/blob/master/faster_whisper/transcribe.py for details
default_asr_options = {
"beam_size": 3,
"best_of": 5,
Expand Down
54 changes: 54 additions & 0 deletions openlrc/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (C) 2024. Hao Zheng
# All rights reserved.
import abc

from openlrc.agents import TranslationEvaluatorAgent
from openlrc.logger import logger


class TranslationEvaluator(abc.ABC):
"""
Base class for all evaluators.
"""

@abc.abstractmethod
def evaluate(self, src_texts, target_texts, src_lang, target_lang):
"""
Evaluate the translated texts.
:return: The evaluation result.
"""
raise NotImplementedError()


class LLMTranslationEvaluator(TranslationEvaluator):
"""
Evaluate the translated texts using large language models.
"""

def __init__(self, chatbot_model: str = 'gpt-3.5-turbo'):
self.agenet = TranslationEvaluatorAgent(chatbot_model=chatbot_model)
self.recommended_model = {
'gpt-4',
'claude-3-sonnet',
'claude-3-opus',
'gemini-1.5-pro'
}

for m in self.recommended_model:
if chatbot_model.startswith(m):
self.agenet = TranslationEvaluatorAgent(chatbot_model=chatbot_model)
break
else:
logger.warning(f'Chatbot model {chatbot_model} is not in the recommended list for evaluating translations.')

def evaluate(self, src_texts, target_texts, src_lang=None, target_lang=None):
return self.agenet.evaluate(src_texts, target_texts)


class EmbeddingTranslationEvaluator(TranslationEvaluator):
"""
Evaluate the translated texts using embeddings.
"""

def evaluate(self, src_texts, target_texts, src_lang, target_lang):
pass
6 changes: 3 additions & 3 deletions openlrc/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def noise_suppression(self, audio_paths: Union[str, Path, List[str], List[Path]]
atten_lim_db = self.options['atten_lim_db']

model, df_state, _ = init_df()
chunk_size = 300 # 5 min
chunk_size = 180 # 3 min

ns_audio_paths = []
for audio_path, output_path in zip(audio_paths, self.output_paths):
Expand All @@ -69,7 +69,7 @@ def noise_suppression(self, audio_paths: Union[str, Path, List[str], List[Path]]
if not ns_path.exists():
audio, info = load_audio(audio_path, sr=df_state.sr())

# Split audio into 10 min chunks
# Split audio into 3 min chunks
audio_chunks = [audio[:, i:i + chunk_size * info.sample_rate]
for i in range(0, audio.shape[1], chunk_size * info.sample_rate)]

Expand All @@ -79,7 +79,7 @@ def noise_suppression(self, audio_paths: Union[str, Path, List[str], List[Path]]

enhanced = torch.cat(enhanced_chunks, dim=1)

assert enhanced.shape == audio.shape, 'Enhanced audio shape does not match original audio shape.'
assert enhanced.shape == audio.shape, f'Enhanced audio shape does not match original audio shape: {enhanced.shape} != {audio.shape}'

save_audio(ns_path, enhanced, sr=df_state.sr())

Expand Down
80 changes: 76 additions & 4 deletions openlrc/prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from openlrc.context import TranslateInfo
from openlrc.validators import ChunkedTranslateValidator, AtomicTranslateValidator, ProofreaderValidator, \
ContextReviewerValidateValidator
ContextReviewerValidateValidator, TranslationEvaluatorValidator

ORIGINAL_PREFIX = 'Original>'
TRANSLATION_PREFIX = 'Translation>'
Expand Down Expand Up @@ -91,7 +91,7 @@
The translation should be in a lovely colloquial style and suitable for high-quality subtitles.
I’m going to tip \$1000 for a better translation!
I’m going to tip $1000 for a better translation!
### retry_instructions
There was an issue with the previous translation.
Expand All @@ -108,7 +108,7 @@ def check_format(self, user_input: str, generated_content: str) -> bool:
if hasattr(self, 'validator') and self.validator:
return self.validator.validate(user_input, generated_content)
else:
return False
return True


class TranslatePrompter(Prompter, ABC):
Expand Down Expand Up @@ -303,7 +303,7 @@ def system(self):

class ContextReviewerValidatePrompter(Prompter):
def __init__(self):
self.validator = ContextReviewerValidateValidator('en')
self.validator = ContextReviewerValidateValidator()

def system(self):
return f'''Ignore all previous instructions.
Expand Down Expand Up @@ -370,5 +370,77 @@ def system(self):
Output:
False'''

def user(self, context):
return f'''Input:\n{context}\nOutput:'''


class TranslationEvaluatorPrompter(Prompter):
def __init__(self):
self.validator = TranslationEvaluatorValidator()
self.stop_sequence = '<--END-OF-JSON-->'

def system(self):
return f'''Ignore all previous instructions.
### Context:
You are an expert in evaluating subtitle translations. Your task is to assess the quality of a translated subtitle text based on several key factors. The original text and its translation are provided for your review.
### Objective:
The goal is to provide a comprehensive evaluation of the translated subtitle text by scoring it on five specific criteria: Accuracy, Fluency, Completeness, Cultural Adaptation, and Consistency. Each criterion should be rated on a scale from 1 to 10, with 1 being the lowest quality and 10 being the highest.
### Style:
The evaluation should be detailed, objective, and professional. Use clear and concise language to convey your assessment.
### Tone:
Maintain a constructive and neutral tone throughout your evaluation. Focus on providing actionable feedback that can help improve the quality of the translation.
### Audience:
Your evaluation will be read by subtitle translators, quality assurance teams, and project managers who are looking to understand the strengths and weaknesses of the translation.
### Response Format:
Please provide your evaluation in the following JSON format:
{{
"accuracy": {{"score": [1-10], "justification": "[Justification]"}},
"fluency": {{"score": [1-10], "justification": "[Justification]"}},
"completeness": {{"score": [1-10], "justification": "[Justification]"}},
"cultural adaptation": {{"score": [1-10], "justification": "[Justification]"}},
"consistency": {{"score": [1-10], "justification": "[Justification]"}}
}}
{self.stop_sequence}
### Example1:
Input:
Original Texts:
Those who resist change may find themselves left behind.
On the other hand, those who embrace change can thrive in the new environment.
Translated Texts:
那些抗拒变化的人可能会发现自己被抛在后面。
另一方面,那些接受变化的人可以在新环境中发展。
Output:
result = {{
"accuracy": {{"score": <example integer score>, "justification": "<example-string>"}},
"fluency": {{"score": <example integer score>, "justification": "<example-string>"}},
"completeness": {{"score": <example integer score>, "justification": "<example-string>"}},
"cultural adaptation": {{"score": <example integer score>, "justification": "<example-string>"}},
"consistency"': {{"score": <example integer score>, "justification": "<example-string>"}}
}}
{self.stop_sequence}
Note that the result are processed by an automated system, so it is imperative that you adhere to the required output format.
'''

def user(self, original: List[str], translation: List[str]):
original_str = '\n'.join(original)
translation_str = '\n'.join(translation)
return f'''Input:
Original Texts:
{original_str}
Translated Texts:
{translation_str}
Output:
'''
12 changes: 2 additions & 10 deletions openlrc/subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def from_lrc(cls, filename):
return cls(language=lang, segments=segments, filename=filename)

@classmethod
def from_srt(cls, filename):
def from_srt(cls, filename: Union[str, Path]):
"""
Processes an SRT (SubRip Subtitle) file according to the SRT specifications outlined
at http://www.textfiles.com/uploads/kds-srt.txt.
Expand All @@ -184,14 +184,6 @@ def from_srt(cls, filename):
- A blank line indicating the end of a subtitle entry.
This function is designed to read or manipulate an SRT file based on the provided filename.
Args:
filename (str): The path to the SRT file to be processed.
Returns:
The return value is not specified in the provided docstring. Depending on the implementation,
this function could return a data structure representing the parsed SRT file, a success status,
or possibly nothing.
"""
filename = Path(filename)
with open(filename, encoding='utf-8') as f:
Expand All @@ -212,7 +204,7 @@ def from_srt(cls, filename):

# Multi-line subtitle
text = []
while lines[i].strip():
while i < len(lines) and lines[i].strip():
text.append(lines[i].strip())
i += 1

Expand Down
5 changes: 1 addition & 4 deletions openlrc/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,6 @@ def seg_from_words(seg: Segment, seg_id, words, tokens):
seg.temperature, seg.avg_logprob, seg.compression_ratio, seg.no_speech_prob, words)

def mid_split(seg_entry):
"""
Todo: Split into multiple segments (>2)
"""
text = seg_entry.text
doc = nlp(text)

Expand Down Expand Up @@ -118,7 +115,7 @@ def is_punct(char):
for k in range(len(seg_entry.words) - 1):
gaps.append(seg_entry.words[k + 1].start - seg_entry.words[k].end)
max_gap = max(gaps)
split_idx = gaps.index(max_gap) # TODO: Multiple largest or Multiple long gap
split_idx = gaps.index(max_gap)

if max_gap >= 2: # Split using the max gap
former_words = seg_entry.words[:split_idx]
Expand Down
Loading

0 comments on commit d9dde29

Please sign in to comment.