From ddbb4b5ae5603d7afd285be04293edde7a8058bf Mon Sep 17 00:00:00 2001 From: Jan Cizmar Date: Tue, 3 Sep 2024 17:17:09 +0200 Subject: [PATCH] fix: Exact plural forms for basic MT translators --- .../PluralTranslationUtil.kt | 68 +++++++++++++------ .../unit/util/PluralTranslationUtilTest.kt | 31 +++++++++ 2 files changed, 77 insertions(+), 22 deletions(-) create mode 100644 backend/data/src/test/kotlin/io/tolgee/unit/util/PluralTranslationUtilTest.kt diff --git a/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt b/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt index 0eb97f6dd7..49d81d73b0 100644 --- a/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt +++ b/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt @@ -12,21 +12,14 @@ class PluralTranslationUtil( private val item: MtBatchItemParams, private val translateFn: (String) -> MtTranslatorResult, ) { - val forms by lazy { - context.getPluralFormsReplacingReplaceParam(baseTranslationText) - ?: throw IllegalStateException("Plural forms are null") - } - fun translate(): MtTranslatorResult { return result } private val preparedFormSourceStrings: Sequence> by lazy { - return@lazy targetExamples.asSequence().map { - val form = sourceRules?.select(it.value.toDouble()) - val formValue = forms.forms[form] ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: "" - it.key to formValue.replaceReplaceNumberPlaceholderWithExample(it.value) - } + val targetLanguageTag = context.getLanguage(item.targetLanguageId).tag + val sourceLanguageTag = context.baseLanguage.tag + getPreparedSourceStrings(sourceLanguageTag, targetLanguageTag, forms) } private val translated by lazy { @@ -35,6 +28,11 @@ class PluralTranslationUtil( } } + private val forms by lazy { + context.getPluralFormsReplacingReplaceParam(baseTranslationText) + ?: throw IllegalStateException("Plural forms are null") + } + private val result: MtTranslatorResult by lazy { val result = translated.map { (form, result) -> @@ -59,18 +57,6 @@ class PluralTranslationUtil( ) } - private val targetExamples by lazy { - val targetLanguageTag = context.getLanguage(item.targetLanguageId).tag - val targetULocale = getULocaleFromTag(targetLanguageTag) - val targetRules = PluralRules.forLocale(targetULocale) - getPluralFormExamples(targetRules) - } - - private val sourceRules by lazy { - val sourceLanguageTag = context.baseLanguage.tag - getRulesByTag(sourceLanguageTag) - } - private fun String.replaceNumberTags(): String { return this.replace(TOLGEE_TAG_REGEX, "#") } @@ -126,5 +112,43 @@ class PluralTranslationUtil( val sourceULocale = getULocaleFromTag(languageTag) return PluralRules.forLocale(sourceULocale) } + + fun getPreparedSourceStrings( + sourceLanguageTag: String, + targetLanguageTag: String, + forms: PluralForms, + ): Sequence> { + val sourceRules = getRulesByTag(sourceLanguageTag) + val keywordCases = + getTargetExamples(targetLanguageTag).asSequence().map { + val form = sourceRules?.select(it.value.toDouble()) + val formValue = forms.forms[form] ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: "" + it.key to formValue.replaceReplaceNumberPlaceholderWithExample(it.value) + } + + val exactCases = + forms.forms.asSequence().filter { + it.key.startsWith("=") + }.mapNotNull { + val number = it.key.substring(1).toDoubleOrNull() ?: return@mapNotNull null + it.key to it.value.replaceReplaceNumberPlaceholderWithExample(number) + } + + return keywordCases + exactCases + } + + private fun String.toDoubleOrNull(): Number? { + return try { + this.toBigDecimalOrNull() + } catch (e: NumberFormatException) { + null + } + } + + private fun getTargetExamples(targetLanguageTag: String): Map { + val targetULocale = getULocaleFromTag(targetLanguageTag) + val targetRules = PluralRules.forLocale(targetULocale) + return getPluralFormExamples(targetRules) + } } } diff --git a/backend/data/src/test/kotlin/io/tolgee/unit/util/PluralTranslationUtilTest.kt b/backend/data/src/test/kotlin/io/tolgee/unit/util/PluralTranslationUtilTest.kt new file mode 100644 index 0000000000..d84f4ea25b --- /dev/null +++ b/backend/data/src/test/kotlin/io/tolgee/unit/util/PluralTranslationUtilTest.kt @@ -0,0 +1,31 @@ +package io.tolgee.unit.util + +import io.tolgee.formats.getPluralFormsReplacingReplaceParam +import io.tolgee.service.machineTranslation.PluralTranslationUtil +import io.tolgee.testing.assert +import org.junit.jupiter.api.Test + +class PluralTranslationUtilTest { + @Test + fun `provides correct forms for basic MT providers`() { + val baseString = """{number, plural, one {# apple} =1 {one apple} =2 {Two apples} =5 {# apples} other {# apples}}""" + val result = + PluralTranslationUtil.getPreparedSourceStrings( + "en", + "cs", + getPluralFormsReplacingReplaceParam(baseString, PluralTranslationUtil.REPLACE_NUMBER_PLACEHOLDER)!!, + ) + + result.toMap().assert.isEqualTo( + mapOf( + "one" to "1 apple", + "few" to "2 apples", + "many" to "0.5 apples", + "other" to "10 apples", + "=1" to "one apple", + "=2" to "Two apples", + "=5" to "5 apples", + ), + ) + } +}