Skip to content

Commit

Permalink
fix: Exact plural forms for basic MT translators
Browse files Browse the repository at this point in the history
  • Loading branch information
JanCizmar committed Sep 3, 2024
1 parent 3765634 commit ddbb4b5
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,14 @@ class PluralTranslationUtil(
private val item: MtBatchItemParams,
private val translateFn: (String) -> MtTranslatorResult,
) {
val forms by lazy {
context.getPluralFormsReplacingReplaceParam(baseTranslationText)
?: throw IllegalStateException("Plural forms are null")
}

fun translate(): MtTranslatorResult {
return result
}

private val preparedFormSourceStrings: Sequence<Pair<String, String>> by lazy {
return@lazy targetExamples.asSequence().map {
val form = sourceRules?.select(it.value.toDouble())
val formValue = forms.forms[form] ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: ""
it.key to formValue.replaceReplaceNumberPlaceholderWithExample(it.value)
}
val targetLanguageTag = context.getLanguage(item.targetLanguageId).tag
val sourceLanguageTag = context.baseLanguage.tag
getPreparedSourceStrings(sourceLanguageTag, targetLanguageTag, forms)
}

private val translated by lazy {
Expand All @@ -35,6 +28,11 @@ class PluralTranslationUtil(
}
}

private val forms by lazy {
context.getPluralFormsReplacingReplaceParam(baseTranslationText)
?: throw IllegalStateException("Plural forms are null")
}

private val result: MtTranslatorResult by lazy {
val result =
translated.map { (form, result) ->
Expand All @@ -59,18 +57,6 @@ class PluralTranslationUtil(
)
}

private val targetExamples by lazy {
val targetLanguageTag = context.getLanguage(item.targetLanguageId).tag
val targetULocale = getULocaleFromTag(targetLanguageTag)
val targetRules = PluralRules.forLocale(targetULocale)
getPluralFormExamples(targetRules)
}

private val sourceRules by lazy {
val sourceLanguageTag = context.baseLanguage.tag
getRulesByTag(sourceLanguageTag)
}

private fun String.replaceNumberTags(): String {
return this.replace(TOLGEE_TAG_REGEX, "#")
}
Expand Down Expand Up @@ -126,5 +112,43 @@ class PluralTranslationUtil(
val sourceULocale = getULocaleFromTag(languageTag)
return PluralRules.forLocale(sourceULocale)
}

fun getPreparedSourceStrings(
sourceLanguageTag: String,
targetLanguageTag: String,
forms: PluralForms,
): Sequence<Pair<String, String>> {
val sourceRules = getRulesByTag(sourceLanguageTag)
val keywordCases =
getTargetExamples(targetLanguageTag).asSequence().map {
val form = sourceRules?.select(it.value.toDouble())
val formValue = forms.forms[form] ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: ""
it.key to formValue.replaceReplaceNumberPlaceholderWithExample(it.value)
}

val exactCases =
forms.forms.asSequence().filter {
it.key.startsWith("=")
}.mapNotNull {
val number = it.key.substring(1).toDoubleOrNull() ?: return@mapNotNull null
it.key to it.value.replaceReplaceNumberPlaceholderWithExample(number)
}

return keywordCases + exactCases
}

private fun String.toDoubleOrNull(): Number? {
return try {
this.toBigDecimalOrNull()
} catch (e: NumberFormatException) {
null
}
}

private fun getTargetExamples(targetLanguageTag: String): Map<String, Number> {
val targetULocale = getULocaleFromTag(targetLanguageTag)
val targetRules = PluralRules.forLocale(targetULocale)
return getPluralFormExamples(targetRules)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package io.tolgee.unit.util

import io.tolgee.formats.getPluralFormsReplacingReplaceParam
import io.tolgee.service.machineTranslation.PluralTranslationUtil
import io.tolgee.testing.assert
import org.junit.jupiter.api.Test

class PluralTranslationUtilTest {
@Test
fun `provides correct forms for basic MT providers`() {
val baseString = """{number, plural, one {# apple} =1 {one apple} =2 {Two apples} =5 {# apples} other {# apples}}"""
val result =
PluralTranslationUtil.getPreparedSourceStrings(
"en",
"cs",
getPluralFormsReplacingReplaceParam(baseString, PluralTranslationUtil.REPLACE_NUMBER_PLACEHOLDER)!!,
)

result.toMap().assert.isEqualTo(
mapOf(
"one" to "<x id=\"tolgee-number\">1</x> apple",
"few" to "<x id=\"tolgee-number\">2</x> apples",
"many" to "<x id=\"tolgee-number\">0.5</x> apples",
"other" to "<x id=\"tolgee-number\">10</x> apples",
"=1" to "one apple",
"=2" to "Two apples",
"=5" to "<x id=\"tolgee-number\">5</x> apples",
),
)
}
}

0 comments on commit ddbb4b5

Please sign in to comment.