Skip to content

Commit

Permalink
Update tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
zh-plus committed Jun 27, 2023
1 parent 37997d1 commit 9970db6
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 13 deletions.
6 changes: 3 additions & 3 deletions openlrc/opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def merge_same(self):

def merge_short(self, threshold=1.5):
"""
Merge the short text.
Merge the short duration subtitle.
"""
new_elements = []

Expand Down Expand Up @@ -71,7 +71,7 @@ def merge_short(self, threshold=1.5):
merged_element.text += ' ' + element.text
merged_element.end = element.end

logger.debug(f'Merge short text: {len(self.subtitle.segments)} -> {len(new_elements)}')
logger.debug(f'Merge the short duration subtitle: {len(self.subtitle.segments)} -> {len(new_elements)}')

self.subtitle.segments = new_elements

Expand Down Expand Up @@ -118,7 +118,7 @@ def remove_unk(self):
new_elements = self.subtitle.segments

for i, element in enumerate(new_elements):
new_elements[i].text = element.text.replace('<unk>', ' ')
new_elements[i].text = element.text.replace('<unk>', '')

logger.debug('Remove <unk> done.')

Expand Down
46 changes: 46 additions & 0 deletions tests/data/test_subtitle.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"language": "en",
"generator": "test",
"segments": [
{
"start": 0.0,
"end": 3.0,
"text": "你好"
},
{
"start": 3.0,
"end": 5.0,
"text": "你好"
},
{
"start": 6.0,
"end": 9.0,
"text": "好好好好好好好好好好好好好好好好好好好好好好好好"
},
{
"start": 10.0,
"end": 10.1,
"text": "好好"
},
{
"start": 12,
"end": 15,
"text": "这太长打发螺丝扣搭街坊拉克斯酱豆腐垃圾啊阿里山扩大飞机拉克斯基的flak涉及到了反馈啊螺丝扣搭街坊拉啊手动阀手动阀阿斯顿发射点发射点发生发射点发射点发萨看见对方"
},
{
"start": 16,
"end": 19,
"text": "繁體的字"
},
{
"start": 20,
"end": 22,
"text": "<unk>unk<unk>"
},
{
"start": 23,
"end": 25,
"text": ""
}
]
}
73 changes: 73 additions & 0 deletions tests/test_opt.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,75 @@
# Copyright (C) 2023. Hao Zheng
# All rights reserved.

import json
import os

import pytest

from openlrc.opt import SubtitleOptimizer
from openlrc.subtitle import Subtitle


@pytest.fixture
def subtitle():
return Subtitle('data/test_subtitle.json')


def test_merge_same(subtitle):
original_len = len(subtitle)
optimizer = SubtitleOptimizer(subtitle)
optimizer.merge_same()
assert len(optimizer.subtitle.segments) == original_len - 1


def test_merge_short(subtitle):
original_len = len(subtitle)
optimizer = SubtitleOptimizer(subtitle)
optimizer.merge_short()
assert len(optimizer.subtitle.segments) == original_len - 1


def test_merge_repeat(subtitle):
optimizer = SubtitleOptimizer(subtitle)
optimizer.merge_repeat()
assert optimizer.subtitle.segments[2].text == '好好...'


def test_cut_long(subtitle):
optimizer = SubtitleOptimizer(subtitle)
optimizer.cut_long(threshold=3, keep=2)
assert optimizer.subtitle.segments[4].text == '这太(Cut to 2)'


def test_traditional2mandarin(subtitle):
optimizer = SubtitleOptimizer(subtitle)
optimizer.traditional2mandarin()
assert optimizer.subtitle.segments[5].text == '繁体的字'


def test_remove_unk(subtitle):
optimizer = SubtitleOptimizer(subtitle)
optimizer.remove_unk()
assert optimizer.subtitle.segments[6].text == 'unk'


def test_remove_empty(subtitle):
original_len = len(subtitle)
optimizer = SubtitleOptimizer(subtitle)
optimizer.remove_empty()
assert len(optimizer.subtitle.segments) == original_len - 1


def test_save(subtitle):
optimizer = SubtitleOptimizer(subtitle)
optimizer.perform_all()
optimizer.save(output_name='test_subtitle_optimized.json')

with open('test_subtitle_optimized.json', 'r', encoding='utf-8') as f:
optimized_subtitle = json.load(f)

assert optimized_subtitle['language'] == 'en'
assert optimized_subtitle['generator'] == 'test'
assert len(optimized_subtitle['segments']) == 5

os.remove('test_subtitle_optimized.json')
38 changes: 28 additions & 10 deletions tests/test_prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,9 @@ def prompter():
return BaseTranslatePrompter('ja', 'zh-cn', 'movie', 'Title', 'Background', 'Synopsis')


def test_user_prompt(prompter):
user_input = '''#1
Original>
変わりゆく時代において、
Translation>'''
expected_output = '''<title>Title</title>
@pytest.fixture()
def formatted_user_input():
return '''<title>Title</title>
<background>Background</background>
<synopsis>Synopsis</synopsis>
<context>
Expand All @@ -26,17 +23,34 @@ def test_user_prompt(prompter):
</context>
<chunk_id> Scene 1 Chunk 1 <chunk_id>
Please translate these subtitles for movie named Title from Japanese to Mandarin Chinese.
Please translate these subtitles for movie named Title from Japanese to Chinese (China).
#1
Original>
変わりゆく時代において、
Translation>
#2
Original>
生き残る秘訣は、進化し続けることです。
Translation>
<summary></summary>
<scene></scene>'''


def test_user_prompt(prompter, formatted_user_input):
user_input = '''#1
Original>
変わりゆく時代において、
Translation>
#2
Original>
生き残る秘訣は、進化し続けることです。
Translation>'''
assert prompter.user(1, user_input, ['test chunk1 summary', 'test chunk2 summary'],
'test scene content') == expected_output
'test scene content') == formatted_user_input


def test_format_texts():
Expand All @@ -45,9 +59,10 @@ def test_format_texts():
assert BaseTranslatePrompter.format_texts(texts) == expected_output


def test_check_format():
def test_check_format(formatted_user_input):
prompter = BaseTranslatePrompter('ja', 'zh-cn', 'movie', 'Title', 'Synopsis')
messages = []
messages = [{'role': 'system', 'content': 'system content'},
{'role': 'user', 'content': formatted_user_input}]
content = '''<title>Title</title>
<background>Background</background>
<synopsis>Synopsis</synopsis>
Expand All @@ -61,10 +76,13 @@ def test_check_format():
Original>
変わりゆく時代において、
Translation>
在不断变化的时代里,
#2
Original>
生き残る秘訣は、進化し続けることです。
Translation>
生存的秘诀是不断进化。
<summary>Summary</summary>
<scene>Scene</scene>
Expand Down

0 comments on commit 9970db6

Please sign in to comment.