From c1c45c119fdde749caf611c61f36c61a0866be9e Mon Sep 17 00:00:00 2001 From: Skip Balk Date: Tue, 3 Sep 2024 13:39:20 +0200 Subject: [PATCH] Add support for non-trivial plural-case expansion. --- .../PluralTranslationUtil.kt | 41 ++++++++++-- .../MtBatchTranslatorTest.kt | 63 ++++++++++++++++--- 2 files changed, 90 insertions(+), 14 deletions(-) diff --git a/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt b/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt index 0eb97f6dd7..a211285a02 100644 --- a/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt +++ b/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt @@ -21,16 +21,44 @@ class PluralTranslationUtil( return result } - private val preparedFormSourceStrings: Sequence> by lazy { - return@lazy targetExamples.asSequence().map { + private fun preparedFormSourceStrings(): Sequence> { + val formBasedCases = preparedFormSourceStringsBasedOnForm().toList() + val localeBasedCases = preparedFormSourceStringsBasedOnLocale().toList() + val localeBasedCaseKeys = localeBasedCases.map { e -> e.first }.toSet() + + // N.B.: locale-based cases take precedence over form-based cases, + // yet form-cases appear first so that we naturally end with: 'few', 'many', 'other' + val mergedCases = ArrayList>() + formBasedCases.stream().filter { e -> !localeBasedCaseKeys.contains(e.first) }.forEach { e -> mergedCases.add(e) } + localeBasedCases.stream().forEach { e -> mergedCases.add(e) } + return mergedCases.asSequence() + } + + private fun preparedFormSourceStringsBasedOnLocale(): Sequence> { + return targetExamples.asSequence().map { val form = sourceRules?.select(it.value.toDouble()) - val formValue = forms.forms[form] ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: "" + val formValue = forms.forms[form] ?: forms.forms["=" + it.value] ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: "" it.key to formValue.replaceReplaceNumberPlaceholderWithExample(it.value) } } + private fun preparedFormSourceStringsBasedOnForm(): Sequence> { + return forms.forms.asSequence().map { + if (it.key.startsWith("=") && it.key.substring(1).toDoubleOrNull() != null) { + it.key to it.value.replaceReplaceNumberPlaceholderWithExample(it.key.substring(1).toDouble()) + } else { + val numValue = targetExamples[it.key]?.toDouble() ?: 10.0 + val formValue = + forms.forms[it.key] ?: forms.forms[sourceRules?.select(numValue)] ?: forms.forms["=" + it.value] + ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: "" + + it.key to formValue.replaceReplaceNumberPlaceholderWithExample(numValue) + } + } + } + private val translated by lazy { - preparedFormSourceStrings.map { + preparedFormSourceStrings().map { it.first to translateFn(it.second) } } @@ -98,8 +126,9 @@ class PluralTranslationUtil( pluralForms: PluralForms, ): Sequence> { return getTargetNumberExamples(targetLanguageTag).asSequence().map { - val form = getRulesByTag(sourceLanguageTag)?.select(it.value.toDouble()) - val formValue = pluralForms.forms[form] ?: pluralForms.forms[PluralRules.KEYWORD_OTHER] ?: "" + val originalForm = "=" + it.value + val rewrittenForm = getRulesByTag(sourceLanguageTag)?.select(it.value.toDouble()) + val formValue = pluralForms.forms[rewrittenForm] ?: pluralForms.forms[originalForm] ?: pluralForms.forms[PluralRules.KEYWORD_OTHER] ?: "" it.key to formValue.replaceReplaceNumberPlaceholderWithExample(it.value, addTag = false) } } diff --git a/backend/data/src/test/kotlin/io/tolgee/service/machineTranslation/MtBatchTranslatorTest.kt b/backend/data/src/test/kotlin/io/tolgee/service/machineTranslation/MtBatchTranslatorTest.kt index 7a0854c579..d687143964 100644 --- a/backend/data/src/test/kotlin/io/tolgee/service/machineTranslation/MtBatchTranslatorTest.kt +++ b/backend/data/src/test/kotlin/io/tolgee/service/machineTranslation/MtBatchTranslatorTest.kt @@ -49,15 +49,21 @@ class MtBatchTranslatorTest { ), ).first() - translated.translatedText.assert.isEqualTo( + assertSameLines( + translated.translatedText ?: "", "{value, plural,\n" + - "one {Jeden pes}\n" + + "=0 {žádný pes..!}\n" + + "=1 {Jeden pes..!}\n" + + "=13 {'#' psa}\n" + + "zero {žádný pes..?}\n" + + "one {Jeden pes..?}\n" + "few {'#' psi}\n" + "many {'#' psa}\n" + "other {'#' psů}\n" + "}", ) - translated.actualPrice.assert.isEqualTo(400) + + translated.actualPrice.assert.isEqualTo(100) } @Test @@ -80,17 +86,34 @@ class MtBatchTranslatorTest { ), ).first() - translated.translatedText.assert.isEqualTo( + assertSameLines( + translated.translatedText ?: "", "{value, plural,\n" + - "one {Jeden pes}\n" + + "=0 {žádný pes..!}\n" + + "=1 {Jeden pes..!}\n" + + "=13 {'#' psa}\n" + + "zero {žádný pes..?}\n" + + "one {Jeden pes..?}\n" + "few {'#' psi}\n" + "many {'#' psa}\n" + "other {'#' psů}\n" + "}", ) + translated.actualPrice.assert.isEqualTo(100) } + private fun assertSameLines( + txt1: String, + txt2: String, + ) { + val out1 = txt1.split("\n") + val out2 = txt2.split("\n") + + // we are not picky about the order + out1.toSortedSet().assert.isEqualTo(out2.toSortedSet()) + } + private fun prepareValidKey() { preparedKey = KeyForMt( @@ -98,7 +121,7 @@ class MtBatchTranslatorTest { name = "key", namespace = "test", description = "test", - baseTranslation = "{value, plural, one {# dog} other {# dogs}}", + baseTranslation = "{value, plural, =0 {No dog..!} zero {No dog..?} one {# dog} other {# dogs}}", isPlural = true, ) } @@ -110,7 +133,11 @@ class MtBatchTranslatorTest { translatedText = null, translatedPluralForms = mapOf( - "one" to "Jeden pes", + "=0" to "žádný pes..!", + "=1" to "Jeden pes..!", + "=13" to "# psa", + "zero" to "žádný pes..?", + "one" to "Jeden pes..?", "few" to "# psi", "many" to "# psa", "other" to "# psů", @@ -125,7 +152,27 @@ class MtBatchTranslatorTest { mtServiceManagerResults = listOf( TranslateResult( - translatedText = "Jeden pes", + translatedText = "žádný pes..!", + actualPrice = 100, + usedService = MtServiceType.GOOGLE, + ), + TranslateResult( + translatedText = "Jeden pes..!", + actualPrice = 100, + usedService = MtServiceType.GOOGLE, + ), + TranslateResult( + translatedText = "13 psů", + actualPrice = 100, + usedService = MtServiceType.GOOGLE, + ), + TranslateResult( + translatedText = "žádný pes..?", + actualPrice = 100, + usedService = MtServiceType.GOOGLE, + ), + TranslateResult( + translatedText = "Jeden pes..?", actualPrice = 100, usedService = MtServiceType.GOOGLE, ),