Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: ICU plural-case handling during Machine-Translation #2445

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,44 @@ class PluralTranslationUtil(
return result
}

private val preparedFormSourceStrings: Sequence<Pair<String, String>> by lazy {
return@lazy targetExamples.asSequence().map {
private fun preparedFormSourceStrings(): Sequence<Pair<String, String>> {
val formBasedCases = preparedFormSourceStringsBasedOnForm().toList()
val localeBasedCases = preparedFormSourceStringsBasedOnLocale().toList()
val localeBasedCaseKeys = localeBasedCases.map { e -> e.first }.toSet()

// N.B.: locale-based cases take precedence over form-based cases,
// yet form-cases appear first so that we naturally end with: 'few', 'many', 'other'
val mergedCases = ArrayList<Pair<String, String>>()
formBasedCases.stream().filter { e -> !localeBasedCaseKeys.contains(e.first) }.forEach { e -> mergedCases.add(e) }
localeBasedCases.stream().forEach { e -> mergedCases.add(e) }
return mergedCases.asSequence()
}

private fun preparedFormSourceStringsBasedOnLocale(): Sequence<Pair<String, String>> {
return targetExamples.asSequence().map {
val form = sourceRules?.select(it.value.toDouble())
val formValue = forms.forms[form] ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: ""
val formValue = forms.forms[form] ?: forms.forms["=" + it.value] ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: ""
it.key to formValue.replaceReplaceNumberPlaceholderWithExample(it.value)
}
}

private fun preparedFormSourceStringsBasedOnForm(): Sequence<Pair<String, String>> {
return forms.forms.asSequence().map {
if (it.key.startsWith("=") && it.key.substring(1).toDoubleOrNull() != null) {
it.key to it.value.replaceReplaceNumberPlaceholderWithExample(it.key.substring(1).toDouble())
} else {
val numValue = targetExamples[it.key]?.toDouble() ?: 10.0
val formValue =
forms.forms[it.key] ?: forms.forms[sourceRules?.select(numValue)] ?: forms.forms["=" + it.value]
?: forms.forms[PluralRules.KEYWORD_OTHER] ?: ""

it.key to formValue.replaceReplaceNumberPlaceholderWithExample(numValue)
}
}
}

private val translated by lazy {
preparedFormSourceStrings.map {
preparedFormSourceStrings().map {
it.first to translateFn(it.second)
}
}
Expand Down Expand Up @@ -98,8 +126,9 @@ class PluralTranslationUtil(
pluralForms: PluralForms,
): Sequence<Pair<String, String>> {
return getTargetNumberExamples(targetLanguageTag).asSequence().map {
val form = getRulesByTag(sourceLanguageTag)?.select(it.value.toDouble())
val formValue = pluralForms.forms[form] ?: pluralForms.forms[PluralRules.KEYWORD_OTHER] ?: ""
val originalForm = "=" + it.value
val rewrittenForm = getRulesByTag(sourceLanguageTag)?.select(it.value.toDouble())
val formValue = pluralForms.forms[rewrittenForm] ?: pluralForms.forms[originalForm] ?: pluralForms.forms[PluralRules.KEYWORD_OTHER] ?: ""
it.key to formValue.replaceReplaceNumberPlaceholderWithExample(it.value, addTag = false)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,21 @@ class MtBatchTranslatorTest {
),
).first()

translated.translatedText.assert.isEqualTo(
assertSameLines(
translated.translatedText ?: "",
"{value, plural,\n" +
"one {Jeden pes}\n" +
"=0 {žádný pes..!}\n" +
"=1 {Jeden pes..!}\n" +
"=13 {'#' psa}\n" +
"zero {žádný pes..?}\n" +
"one {Jeden pes..?}\n" +
"few {'#' psi}\n" +
"many {'#' psa}\n" +
"other {'#' psů}\n" +
"}",
)
translated.actualPrice.assert.isEqualTo(400)

translated.actualPrice.assert.isEqualTo(100)
}

@Test
Expand All @@ -80,25 +86,42 @@ class MtBatchTranslatorTest {
),
).first()

translated.translatedText.assert.isEqualTo(
assertSameLines(
translated.translatedText ?: "",
"{value, plural,\n" +
"one {Jeden pes}\n" +
"=0 {žádný pes..!}\n" +
"=1 {Jeden pes..!}\n" +
"=13 {'#' psa}\n" +
"zero {žádný pes..?}\n" +
"one {Jeden pes..?}\n" +
"few {'#' psi}\n" +
"many {'#' psa}\n" +
"other {'#' psů}\n" +
"}",
)

translated.actualPrice.assert.isEqualTo(100)
}

private fun assertSameLines(
txt1: String,
txt2: String,
) {
val out1 = txt1.split("\n")
val out2 = txt2.split("\n")

// we are not picky about the order
out1.toSortedSet().assert.isEqualTo(out2.toSortedSet())
}

private fun prepareValidKey() {
preparedKey =
KeyForMt(
id = 1,
name = "key",
namespace = "test",
description = "test",
baseTranslation = "{value, plural, one {# dog} other {# dogs}}",
baseTranslation = "{value, plural, =0 {No dog..!} zero {No dog..?} one {# dog} other {# dogs}}",
isPlural = true,
)
}
Expand All @@ -110,7 +133,11 @@ class MtBatchTranslatorTest {
translatedText = null,
translatedPluralForms =
mapOf(
"one" to "Jeden pes",
"=0" to "žádný pes..!",
"=1" to "Jeden pes..!",
"=13" to "# psa",
"zero" to "žádný pes..?",
"one" to "Jeden pes..?",
"few" to "# psi",
"many" to "# psa",
"other" to "# psů",
Expand All @@ -125,7 +152,27 @@ class MtBatchTranslatorTest {
mtServiceManagerResults =
listOf(
TranslateResult(
translatedText = "Jeden pes",
translatedText = "žádný pes..!",
actualPrice = 100,
usedService = MtServiceType.GOOGLE,
),
TranslateResult(
translatedText = "Jeden pes..!",
actualPrice = 100,
usedService = MtServiceType.GOOGLE,
),
TranslateResult(
translatedText = "<x id=\"tolgee-number\">13</x> psů",
actualPrice = 100,
usedService = MtServiceType.GOOGLE,
),
TranslateResult(
translatedText = "žádný pes..?",
actualPrice = 100,
usedService = MtServiceType.GOOGLE,
),
TranslateResult(
translatedText = "Jeden pes..?",
actualPrice = 100,
usedService = MtServiceType.GOOGLE,
),
Expand Down