From 12b3fd62cc48a0bd7b15b3ef06fe85e637505645 Mon Sep 17 00:00:00 2001 From: Jan Cizmar Date: Wed, 4 Sep 2024 15:41:40 +0200 Subject: [PATCH] fix: Exact plural forms for basic MT translators (#2454) --- .../PluralTranslationUtil.kt | 68 +++++++++++++------ .../unit/util/PluralTranslationUtilTest.kt | 31 +++++++++ .../e2e/import/importApplication.cy.ts | 2 +- .../e2e/import/importResultManupulation.cy.ts | 2 +- e2e/cypress/e2e/translations/plurals.cy.ts | 23 ++++++- .../translations/TranslationEditor.tsx | 2 + .../TranslationsTable/TranslationWrite.tsx | 3 +- .../translationVisual/PluralEditor.tsx | 14 ++++ .../translationVisual/TranslationPlurals.tsx | 47 +++++++++---- .../translations/useTranslationCell.ts | 30 ++++++-- 10 files changed, 176 insertions(+), 46 deletions(-) create mode 100644 backend/data/src/test/kotlin/io/tolgee/unit/util/PluralTranslationUtilTest.kt diff --git a/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt b/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt index 0eb97f6dd7..49d81d73b0 100644 --- a/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt +++ b/backend/data/src/main/kotlin/io/tolgee/service/machineTranslation/PluralTranslationUtil.kt @@ -12,21 +12,14 @@ class PluralTranslationUtil( private val item: MtBatchItemParams, private val translateFn: (String) -> MtTranslatorResult, ) { - val forms by lazy { - context.getPluralFormsReplacingReplaceParam(baseTranslationText) - ?: throw IllegalStateException("Plural forms are null") - } - fun translate(): MtTranslatorResult { return result } private val preparedFormSourceStrings: Sequence> by lazy { - return@lazy targetExamples.asSequence().map { - val form = sourceRules?.select(it.value.toDouble()) - val formValue = forms.forms[form] ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: "" - it.key to formValue.replaceReplaceNumberPlaceholderWithExample(it.value) - } + val targetLanguageTag = context.getLanguage(item.targetLanguageId).tag + val sourceLanguageTag = context.baseLanguage.tag + getPreparedSourceStrings(sourceLanguageTag, targetLanguageTag, forms) } private val translated by lazy { @@ -35,6 +28,11 @@ class PluralTranslationUtil( } } + private val forms by lazy { + context.getPluralFormsReplacingReplaceParam(baseTranslationText) + ?: throw IllegalStateException("Plural forms are null") + } + private val result: MtTranslatorResult by lazy { val result = translated.map { (form, result) -> @@ -59,18 +57,6 @@ class PluralTranslationUtil( ) } - private val targetExamples by lazy { - val targetLanguageTag = context.getLanguage(item.targetLanguageId).tag - val targetULocale = getULocaleFromTag(targetLanguageTag) - val targetRules = PluralRules.forLocale(targetULocale) - getPluralFormExamples(targetRules) - } - - private val sourceRules by lazy { - val sourceLanguageTag = context.baseLanguage.tag - getRulesByTag(sourceLanguageTag) - } - private fun String.replaceNumberTags(): String { return this.replace(TOLGEE_TAG_REGEX, "#") } @@ -126,5 +112,43 @@ class PluralTranslationUtil( val sourceULocale = getULocaleFromTag(languageTag) return PluralRules.forLocale(sourceULocale) } + + fun getPreparedSourceStrings( + sourceLanguageTag: String, + targetLanguageTag: String, + forms: PluralForms, + ): Sequence> { + val sourceRules = getRulesByTag(sourceLanguageTag) + val keywordCases = + getTargetExamples(targetLanguageTag).asSequence().map { + val form = sourceRules?.select(it.value.toDouble()) + val formValue = forms.forms[form] ?: forms.forms[PluralRules.KEYWORD_OTHER] ?: "" + it.key to formValue.replaceReplaceNumberPlaceholderWithExample(it.value) + } + + val exactCases = + forms.forms.asSequence().filter { + it.key.startsWith("=") + }.mapNotNull { + val number = it.key.substring(1).toDoubleOrNull() ?: return@mapNotNull null + it.key to it.value.replaceReplaceNumberPlaceholderWithExample(number) + } + + return keywordCases + exactCases + } + + private fun String.toDoubleOrNull(): Number? { + return try { + this.toBigDecimalOrNull() + } catch (e: NumberFormatException) { + null + } + } + + private fun getTargetExamples(targetLanguageTag: String): Map { + val targetULocale = getULocaleFromTag(targetLanguageTag) + val targetRules = PluralRules.forLocale(targetULocale) + return getPluralFormExamples(targetRules) + } } } diff --git a/backend/data/src/test/kotlin/io/tolgee/unit/util/PluralTranslationUtilTest.kt b/backend/data/src/test/kotlin/io/tolgee/unit/util/PluralTranslationUtilTest.kt new file mode 100644 index 0000000000..d84f4ea25b --- /dev/null +++ b/backend/data/src/test/kotlin/io/tolgee/unit/util/PluralTranslationUtilTest.kt @@ -0,0 +1,31 @@ +package io.tolgee.unit.util + +import io.tolgee.formats.getPluralFormsReplacingReplaceParam +import io.tolgee.service.machineTranslation.PluralTranslationUtil +import io.tolgee.testing.assert +import org.junit.jupiter.api.Test + +class PluralTranslationUtilTest { + @Test + fun `provides correct forms for basic MT providers`() { + val baseString = """{number, plural, one {# apple} =1 {one apple} =2 {Two apples} =5 {# apples} other {# apples}}""" + val result = + PluralTranslationUtil.getPreparedSourceStrings( + "en", + "cs", + getPluralFormsReplacingReplaceParam(baseString, PluralTranslationUtil.REPLACE_NUMBER_PLACEHOLDER)!!, + ) + + result.toMap().assert.isEqualTo( + mapOf( + "one" to "1 apple", + "few" to "2 apples", + "many" to "0.5 apples", + "other" to "10 apples", + "=1" to "one apple", + "=2" to "Two apples", + "=5" to "5 apples", + ), + ) + } +} diff --git a/e2e/cypress/e2e/import/importApplication.cy.ts b/e2e/cypress/e2e/import/importApplication.cy.ts index 800a09af7a..a0e21f1293 100644 --- a/e2e/cypress/e2e/import/importApplication.cy.ts +++ b/e2e/cypress/e2e/import/importApplication.cy.ts @@ -22,7 +22,7 @@ describe('Import application', () => { 'Applies import', { retries: { - runMode: 4, + runMode: 10, }, }, () => { diff --git a/e2e/cypress/e2e/import/importResultManupulation.cy.ts b/e2e/cypress/e2e/import/importResultManupulation.cy.ts index d3d20af3ff..ee78c34966 100644 --- a/e2e/cypress/e2e/import/importResultManupulation.cy.ts +++ b/e2e/cypress/e2e/import/importResultManupulation.cy.ts @@ -188,7 +188,7 @@ describe('Import result & manipulation', () => { } ); - it('imports with selects namespaces', () => { + it('imports with selects namespaces', { retries: { runMode: 5 } }, () => { gcy('import_apply_import_button').click(); assertMessage('Import successful'); gcy('import-result-row').should('have.length', 0); diff --git a/e2e/cypress/e2e/translations/plurals.cy.ts b/e2e/cypress/e2e/translations/plurals.cy.ts index a6eaff5036..a3a65c0a8b 100644 --- a/e2e/cypress/e2e/translations/plurals.cy.ts +++ b/e2e/cypress/e2e/translations/plurals.cy.ts @@ -8,7 +8,7 @@ import { } from '../../common/translations'; import { waitForGlobalLoading } from '../../common/loading'; import { createKey, deleteProject } from '../../common/apiCalls/common'; -import { confirmStandard } from '../../common/shared'; +import { confirmStandard, gcyAdvanced } from '../../common/shared'; describe('Translations Base', () => { let project: ProjectDTO = null; @@ -60,6 +60,27 @@ describe('Translations Base', () => { .should('be.visible'); }); + it('shows base and existing exact forms', () => { + createKey( + project.id, + 'Test key', + { + en: 'You have {testValue, plural, one {# item} =2 {Two items} other {# items}}', + cs: 'Máte {testValue, plural, one {# položku} =4 {# položky } few {# položky} other {# položek}}', + }, + { isPlural: true } + ); + visitTranslations(project.id); + waitForGlobalLoading(); + getTranslationCell('Test key', 'cs').click(); + gcyAdvanced({ value: 'translation-editor', variant: '=2' }).should( + 'be.visible' + ); + gcyAdvanced({ value: 'translation-editor', variant: '=4' }).should( + 'be.visible' + ); + }); + it('will change plural parameter name for all translations', () => { createKey( project.id, diff --git a/webapp/src/views/projects/translations/TranslationEditor.tsx b/webapp/src/views/projects/translations/TranslationEditor.tsx index 5356e829df..8bde1efe13 100644 --- a/webapp/src/views/projects/translations/TranslationEditor.tsx +++ b/webapp/src/views/projects/translations/TranslationEditor.tsx @@ -18,6 +18,7 @@ export const TranslationEditor = ({ mode, tools, editorRef }: Props) => { handleSave, handleClose, handleInsertBase, + baseValue, } = tools; return ( @@ -30,6 +31,7 @@ export const TranslationEditor = ({ mode, tools, editorRef }: Props) => { autofocus={true} activeEditorRef={editorRef} mode={mode} + baseValue={baseValue} editorProps={{ shortcuts: [ { key: 'Escape', run: () => (handleClose(true), true) }, diff --git a/webapp/src/views/projects/translations/TranslationsTable/TranslationWrite.tsx b/webapp/src/views/projects/translations/TranslationsTable/TranslationWrite.tsx index b63776a10e..3449905969 100644 --- a/webapp/src/views/projects/translations/TranslationsTable/TranslationWrite.tsx +++ b/webapp/src/views/projects/translations/TranslationsTable/TranslationWrite.tsx @@ -56,6 +56,7 @@ export const TranslationWrite: React.FC = ({ tools }) => { handleInsertBase, editEnabled, disabled, + baseText, } = tools; const editVal = tools.editVal!; const state = translation?.state || 'UNTRANSLATED'; @@ -68,7 +69,7 @@ export const TranslationWrite: React.FC = ({ tools }) => { const baseTranslation = useBaseTranslation( activeVariant, - keyData.translations[baseLanguage]?.text, + baseText, keyData.keyIsPlural ); diff --git a/webapp/src/views/projects/translations/translationVisual/PluralEditor.tsx b/webapp/src/views/projects/translations/translationVisual/PluralEditor.tsx index 56478a06aa..e49a94e4fa 100644 --- a/webapp/src/views/projects/translations/translationVisual/PluralEditor.tsx +++ b/webapp/src/views/projects/translations/translationVisual/PluralEditor.tsx @@ -17,6 +17,7 @@ type Props = { autofocus?: boolean; activeEditorRef?: RefObject; mode: 'placeholders' | 'syntax'; + baseValue?: TolgeeFormat; }; export const PluralEditor = ({ @@ -29,6 +30,7 @@ export const PluralEditor = ({ activeEditorRef, editorProps, mode, + baseValue, }: Props) => { function handleChange(text: string, variant: string) { onChange?.({ ...value, variants: { ...value.variants, [variant]: text } }); @@ -38,6 +40,17 @@ export const PluralEditor = ({ const editorMode = project.icuPlaceholders ? mode : 'plain'; + function getExactForms() { + if (!baseValue) { + return []; + } + return Object.keys(baseValue.variants) + .filter((key) => /^=\d+(\.\d+)?$/.test(key)) + .map((key) => parseFloat(key.substring(1))); + } + + const exactForms = getExactForms(); + return ( { const variantOrOther = variant || 'other'; return ( diff --git a/webapp/src/views/projects/translations/translationVisual/TranslationPlurals.tsx b/webapp/src/views/projects/translations/translationVisual/TranslationPlurals.tsx index 0fce5348ed..94c35ddb35 100644 --- a/webapp/src/views/projects/translations/translationVisual/TranslationPlurals.tsx +++ b/webapp/src/views/projects/translations/translationVisual/TranslationPlurals.tsx @@ -1,10 +1,9 @@ -import { useMemo } from 'react'; +import React, { useMemo } from 'react'; import { styled } from '@mui/material'; -import React from 'react'; import { - TolgeeFormat, getPluralVariants, getVariantExample, + TolgeeFormat, } from '@tginternal/editor'; const StyledContainer = styled('div')` @@ -67,6 +66,7 @@ type Props = { showEmpty?: boolean; activeVariant?: string; variantPaddingTop?: number | string; + exactForms?: number[]; }; export const TranslationPlurals = ({ @@ -76,19 +76,12 @@ export const TranslationPlurals = ({ showEmpty, activeVariant, variantPaddingTop, + exactForms, }: Props) => { - const variants = useMemo(() => { - const existing = new Set(Object.keys(value.variants)); - const required = getPluralVariants(locale); - required.forEach((val) => existing.delete(val)); - const result = Array.from(existing).map((value) => { - return [value, getVariantExample(locale, value)] as const; - }); - required.forEach((value) => { - result.push([value, getVariantExample(locale, value)]); - }); - return result; - }, [locale]); + const variants = useMemo( + () => getForms(locale, value, exactForms), + [locale, exactForms, value] + ); if (value.parameter) { return ( @@ -137,3 +130,27 @@ export const TranslationPlurals = ({ ); }; + +function getForms(locale: string, value: TolgeeFormat, exactForms?: number[]) { + const forms: Set = new Set(); + getPluralVariants(locale).forEach((value) => forms.add(value)); + Object.keys(value.variants).forEach((value) => forms.add(value)); + (exactForms || []) + .map((value) => `=${value.toString()}`) + .forEach((value) => forms.add(value)); + + const formsArray = sortExactForms(forms); + + return formsArray.map((value) => { + return [value, getVariantExample(locale, value)] as const; + }); +} + +function sortExactForms(forms: Set) { + return [...forms].sort((a, b) => { + if (a.startsWith('=') && b.startsWith('=')) { + return Number(a.substring(1)) - Number(b.substring(1)); + } + return 0; + }); +} diff --git a/webapp/src/views/projects/translations/useTranslationCell.ts b/webapp/src/views/projects/translations/useTranslationCell.ts index 4095ff49d5..eb1f2e79b7 100644 --- a/webapp/src/views/projects/translations/useTranslationCell.ts +++ b/webapp/src/views/projects/translations/useTranslationCell.ts @@ -1,4 +1,4 @@ -import React, { useEffect } from 'react'; +import React, { useEffect, useMemo } from 'react'; import { getTolgeeFormat } from '@tginternal/editor'; import { TRANSLATION_STATES } from 'tg.constants/translationStates'; @@ -6,8 +6,8 @@ import { useProjectPermissions } from 'tg.hooks/useProjectPermissions'; import { components } from 'tg.service/apiSchema.generated'; import { - useTranslationsSelector, useTranslationsActions, + useTranslationsSelector, } from './context/TranslationsContext'; import { AfterCommand, @@ -88,13 +88,23 @@ export const useTranslationCell = ({ }); }; + const getBaseText = () => { + if (!baseLanguage) { + return undefined; + } + + return keyData.translations[baseLanguage.tag]?.text; + }; + + const baseText = getBaseText(); + const handleInsertBase = () => { - if (!baseLanguage?.tag) { + const baseText = getBaseText(); + + if (!baseText) { return; } - const baseText = keyData.translations[baseLanguage.tag].text; - let baseVariant: string | undefined; if (cursor?.activeVariant) { const variants = getTolgeeFormat( @@ -112,6 +122,14 @@ export const useTranslationCell = ({ } }; + const baseValue = useMemo(() => { + return getTolgeeFormat( + getBaseText() || '', + keyData.keyIsPlural, + !project.icuPlaceholders + ); + }, [baseText, keyData.keyIsPlural]); + const handleClose = (force = false) => { if (force) { setEditForce(undefined); @@ -171,5 +189,7 @@ export const useTranslationCell = ({ editEnabled, translation, disabled, + baseValue, + baseText, }; };