From 437e7c2eabc354c7d4dea4596717541e5510a053 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Mon, 28 Aug 2023 20:34:25 +1000 Subject: [PATCH] add: warning when using or_other and translations together - warning is once per form. Re-used missing translations check code to avoid doing the same processing twice. The new check needed the intermediate result of the missing check. - test_validators.py: removed version check for import since supported versions have been above 3.3 for a while. --- .../pyxform/missing_translations_check.py | 138 -------------- .../validators/pyxform/translations_checks.py | 174 ++++++++++++++++++ pyxform/xls2json.py | 11 +- tests/test_translations.py | 16 +- tests/test_validators.py | 11 +- 5 files changed, 195 insertions(+), 155 deletions(-) delete mode 100644 pyxform/validators/pyxform/missing_translations_check.py create mode 100644 pyxform/validators/pyxform/translations_checks.py diff --git a/pyxform/validators/pyxform/missing_translations_check.py b/pyxform/validators/pyxform/missing_translations_check.py deleted file mode 100644 index addb15fa..00000000 --- a/pyxform/validators/pyxform/missing_translations_check.py +++ /dev/null @@ -1,138 +0,0 @@ -from collections import defaultdict -from typing import TYPE_CHECKING - -from pyxform import aliases, constants -from pyxform.errors import PyXFormError - -if TYPE_CHECKING: - from typing import Dict, List, Optional, Sequence, Union - - SheetData = List[Dict[str, Union[str, Dict]]] - - -def format_missing_translations_msg( - _in: "Dict[str, Dict[str, Sequence]]", -) -> "Optional[str]": - """ - Format the missing translations data into a warning message. - - :param _in: A dict structured as Dict[survey|choices: Dict[language: (columns)]]. - In other words, for the survey or choices sheet, a dict of the language(s) and - column names for which there are missing translations. - :return: The warning message, or None if there were no missing columns. - """ - - def get_sheet_msg(name, sheet): - if sheet is not None: - langs = sorted(sheet.keys()) - if 0 < len(langs): - lang_msgs = [] - for lang in langs: - cols = sheet[lang] - if isinstance(cols, str): - msg = f"Expected a sequence of columns, got a string for {lang}." - raise PyXFormError(msg) - if 1 == len(cols): - msg = f"Language '{lang}' is missing the {name} {cols[0]} column." - lang_msgs.append(msg) - if 1 < len(cols): - c = ", ".join(sorted(cols)) - msg = f"Language '{lang}' is missing the {name} columns {c}." - lang_msgs.append(msg) - return "\n".join(lang_msgs) - return None - - survey = get_sheet_msg(name=constants.SURVEY, sheet=_in.get(constants.SURVEY)) - choices = get_sheet_msg(name=constants.CHOICES, sheet=_in.get(constants.CHOICES)) - - messages = tuple(i for i in (survey, choices) if i is not None) - if 0 == len(messages): - return None - return "\n".join(messages) - - -def find_missing_translations( - sheet_data: "SheetData", - translatable_columns: "Dict[str, str]", -) -> "Dict[str, List[str]]": - """ - Find missing translation columns in the sheet data. - - For each translatable column used in the sheet, there should be a translation for - each language (including the default / unspecified language) that is used for any - other translatable column. - - This checks the first row only since it is concerned with the presence of columns, not - individual cells. It therefore assumes that each row object has the same structure. - - :param sheet_data: The survey or choices sheet data. - :param translatable_columns: The translatable columns for a sheet. The structure - should be Dict[internal_name, external_name]. See the aliases module. - :return: Dict[column_name, List[languages]] - """ - translations_seen = defaultdict(list) - translation_columns_seen = set() - - def process_cell(typ, cell): - if cell is not None: - if typ in translatable_columns.keys(): - name = translatable_columns[typ] - if isinstance(cell, str): - translations_seen[constants.DEFAULT_LANGUAGE_VALUE].append(name) - translation_columns_seen.add(name) - elif isinstance(cell, dict): - for lng in cell: - translations_seen[lng].append(name) - translation_columns_seen.add(name) - - if 0 < len(sheet_data): - # e.g. ("name", "q1"), ("label", {"en": "Hello", "fr": "Bonjour"}) - for column_type, cell_content in sheet_data[0].items(): - if column_type == constants.MEDIA and isinstance(cell_content, dict): - # e.g. ("audio", {"eng": "my.mp3"}) - for media_type, media_cell in cell_content.items(): - process_cell(typ=media_type, cell=media_cell) - if column_type == constants.BIND: - # e.g. ("jr:constraintMsg", "Try again") - for bind_type, bind_cell in cell_content.items(): - process_cell(typ=bind_type, cell=bind_cell) - else: - process_cell(typ=column_type, cell=cell_content) - - missing = defaultdict(list) - for lang, lang_trans in translations_seen.items(): - for seen_tran in translation_columns_seen: - if seen_tran not in lang_trans: - missing[lang].append(seen_tran) - - return missing - - -def missing_translations_check( - survey_sheet: "SheetData", - choices_sheet: "SheetData", - warnings: "List[str]", -): - """ - Add a warning if there are missing translation columns in the survey or choices data. - - :param survey_sheet: The survey sheet data. - :param choices_sheet: The choices sheet data. - :param warnings: The warnings list, which may be empty. - :return: The warnings list, possibly with a new message, otherwise unchanged. - """ - survey_missing_trans = find_missing_translations( - sheet_data=survey_sheet, - translatable_columns=aliases.TRANSLATABLE_SURVEY_COLUMNS, - ) - choices_missing_trans = find_missing_translations( - sheet_data=choices_sheet, - translatable_columns=aliases.TRANSLATABLE_CHOICES_COLUMNS, - ) - if 0 < len(survey_missing_trans) or 0 < len(choices_missing_trans): - msg = format_missing_translations_msg( - _in={"survey": survey_missing_trans, "choices": choices_missing_trans} - ) - if msg is not None: - warnings.append(msg) - return warnings diff --git a/pyxform/validators/pyxform/translations_checks.py b/pyxform/validators/pyxform/translations_checks.py new file mode 100644 index 00000000..722017ba --- /dev/null +++ b/pyxform/validators/pyxform/translations_checks.py @@ -0,0 +1,174 @@ +from collections import defaultdict +from typing import TYPE_CHECKING + +from pyxform import aliases +from pyxform import constants as const +from pyxform.errors import PyXFormError + +if TYPE_CHECKING: + from typing import Dict, List, Optional, Sequence, Set, Union + + SheetData = List[Dict[str, Union[str, Dict]]] + Warnings = List[str] + + +OR_OTHER_WARNING = ( + "This form uses or_other and translations, which is not recommended. " + "An untranslated input question label and choice label is generated " + "for 'other'. Learn more: https://xlsform.org/en/#specify-other)." +) + + +def format_missing_translations_msg( + _in: "Dict[str, Dict[str, Sequence]]", +) -> "Optional[str]": + """ + Format the missing translations data into a warning message. + + :param _in: A dict structured as Dict[survey|choices: Dict[language: (columns)]]. + In other words, for the survey or choices sheet, a dict of the language(s) and + column names for which there are missing translations. + :return: The warning message, or None if there were no missing columns. + """ + + def get_sheet_msg(name, sheet): + if sheet is not None: + langs = sorted(sheet.keys()) + if 0 < len(langs): + lang_msgs = [] + for lang in langs: + cols = sheet[lang] + if isinstance(cols, str): + msg = f"Expected a sequence of columns, got a string for {lang}." + raise PyXFormError(msg) + if 1 == len(cols): + msg = f"Language '{lang}' is missing the {name} {cols[0]} column." + lang_msgs.append(msg) + if 1 < len(cols): + c = ", ".join(sorted(cols)) + msg = f"Language '{lang}' is missing the {name} columns {c}." + lang_msgs.append(msg) + return "\n".join(lang_msgs) + return None + + survey = get_sheet_msg(name=const.SURVEY, sheet=_in.get(const.SURVEY)) + choices = get_sheet_msg(name=const.CHOICES, sheet=_in.get(const.CHOICES)) + + messages = tuple(i for i in (survey, choices) if i is not None) + if 0 == len(messages): + return None + return "\n".join(messages) + + +class Translations: + """ + Sheet-level container for translations info. + + For each translatable column used in the sheet, there should be a translation for + each language (including the default / unspecified language) that is used for any + other translatable column. + + Only the first row is inspected since the checks are concerned with the presence of + columns, not individual cells. It therefore assumes that each row object has the same + structure. + """ + + def __init__( + self, + sheet_data: "SheetData", + translatable_columns: "Dict[str, str]", + ): + """ + :param sheet_data: The survey or choices sheet data. + :param translatable_columns: The translatable columns for a sheet. The structure + should be Dict[internal_name, external_name]. See the aliases module. + """ + self.seen: "defaultdict[str, List[str]]" = defaultdict(list) + self.columns_seen: "Set[str]" = set() + self.missing: "defaultdict[str, List[str]]" = defaultdict(list) + + self._find_translations(sheet_data, translatable_columns) + self._find_missing() + + def _find_translations( + self, sheet_data: "SheetData", translatable_columns: "Dict[str, str]" + ): + def process_cell(typ, cell): + if cell is not None: + if typ in translatable_columns.keys(): + name = translatable_columns[typ] + if isinstance(cell, str): + self.seen[const.DEFAULT_LANGUAGE_VALUE].append(name) + self.columns_seen.add(name) + elif isinstance(cell, dict): + for lng in cell: + self.seen[lng].append(name) + self.columns_seen.add(name) + + if 0 < len(sheet_data): + # e.g. ("name", "q1"), ("label", {"en": "Hello", "fr": "Bonjour"}) + for column_type, cell_content in sheet_data[0].items(): + if column_type == const.MEDIA and isinstance(cell_content, dict): + # e.g. ("audio", {"eng": "my.mp3"}) + for media_type, media_cell in cell_content.items(): + process_cell(typ=media_type, cell=media_cell) + if column_type == const.BIND: + # e.g. ("jr:constraintMsg", "Try again") + for bind_type, bind_cell in cell_content.items(): + process_cell(typ=bind_type, cell=bind_cell) + else: + process_cell(typ=column_type, cell=cell_content) + + def seen_default_only(self) -> bool: + return 0 == len(self.seen) or ( + const.DEFAULT_LANGUAGE_VALUE in self.seen and 1 == len(self.seen) + ) + + def _find_missing(self): + if self.seen_default_only(): + return + for lang, lang_trans in self.seen.items(): + for seen_tran in self.columns_seen: + if seen_tran not in lang_trans: + self.missing[lang].append(seen_tran) + + +class SheetTranslations: + """Workbook-level container for translations info, with validation checks.""" + + def __init__( + self, + survey_sheet: "SheetData", + choices_sheet: "SheetData", + ): + """ + :param survey_sheet: The survey sheet data. + :param choices_sheet: The choices sheet data. + """ + self.survey: "Translations" = Translations( + sheet_data=survey_sheet, + translatable_columns=aliases.TRANSLATABLE_SURVEY_COLUMNS, + ) + self.choices: "Translations" = Translations( + sheet_data=choices_sheet, + translatable_columns=aliases.TRANSLATABLE_CHOICES_COLUMNS, + ) + self.or_other_seen: bool = False + + def missing_check(self, warnings: "Warnings") -> "Warnings": + """Add a warning if survey or choices have missing translations.""" + if 0 < len(self.survey.missing) or 0 < len(self.choices.missing): + msg = format_missing_translations_msg( + _in={"survey": self.survey.missing, "choices": self.choices.missing} + ) + if msg is not None: + warnings.append(msg) + return warnings + + def or_other_check(self, warnings: "Warnings") -> "Warnings": + """Add a warning if translations and or_other are present.""" + if self.or_other_seen and ( + not self.survey.seen_default_only() or not self.choices.seen_default_only() + ): + warnings.append(OR_OTHER_WARNING) + return warnings diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py index 2fbfdeee..53b2e90a 100644 --- a/pyxform/xls2json.py +++ b/pyxform/xls2json.py @@ -24,9 +24,7 @@ from pyxform.errors import PyXFormError from pyxform.utils import PYXFORM_REFERENCE_REGEX, default_is_dynamic from pyxform.validators.pyxform import parameters_generic, select_from_file_params -from pyxform.validators.pyxform.missing_translations_check import ( - missing_translations_check, -) +from pyxform.validators.pyxform.translations_checks import SheetTranslations from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag @@ -617,11 +615,11 @@ def workbook_to_json( # Check for missing translations. The choices sheet is checked here so that the # warning can be combined into one message. - warnings = missing_translations_check( + sheet_translations = SheetTranslations( survey_sheet=survey_sheet, choices_sheet=choices_sheet, - warnings=warnings, ) + sheet_translations.missing_check(warnings=warnings) # No spell check for OSM sheet (infrequently used, many spurious matches). osm_sheet = dealias_and_group_headers( @@ -1158,6 +1156,7 @@ def workbook_to_json( specify_other_question = None if parse_dict.get("specify_other") is not None: + sheet_translations.or_other_seen = True select_type += constants.SELECT_OR_OTHER_SUFFIX if row.get(constants.CHOICE_FILTER): msg = ( @@ -1417,6 +1416,8 @@ def workbook_to_json( # Put the row in the json dict as is: parent_children_array.append(row) + sheet_translations.or_other_check(warnings=warnings) + if len(stack) != 1: raise PyXFormError( "Unmatched begin statement: " diff --git a/tests/test_translations.py b/tests/test_translations.py index ece4717b..8116a0bb 100644 --- a/tests/test_translations.py +++ b/tests/test_translations.py @@ -10,7 +10,8 @@ from pyxform.constants import CHOICES from pyxform.constants import DEFAULT_LANGUAGE_VALUE as DEFAULT_LANG from pyxform.constants import SURVEY -from pyxform.validators.pyxform.missing_translations_check import ( +from pyxform.validators.pyxform.translations_checks import ( + OR_OTHER_WARNING, format_missing_translations_msg, ) from tests.pyxform_test_case import PyxformTestCase @@ -409,7 +410,10 @@ def run(name): run(name=f"questions={count}, with check (seconds):") - with patch("pyxform.xls2json.missing_translations_check", return_value=[]): + with patch( + "pyxform.xls2json.SheetTranslations.missing_check", + return_value=[], + ): run(name=f"questions={count}, without check (seconds):") @@ -921,6 +925,7 @@ def test_missing_translation__issue_157__warn__default(self): # TODO: bug - missing default lang translatable/itext values. self.xp.language_no_itext(DEFAULT_LANG), ], + warnings__not_contains=[OR_OTHER_WARNING], ) @@ -1373,6 +1378,7 @@ def test_missing_translation__two_lang__warn__default(self): self.xp.language_is_not_default("french"), self.xp.language_no_itext(DEFAULT_LANG), ], + warnings__not_contains=[OR_OTHER_WARNING], ) def test_specify_other__with_translations(self): @@ -1402,6 +1408,7 @@ def test_specify_other__with_translations(self): x:label[text() = 'Specify other.'] """, ], + warnings__contains=[OR_OTHER_WARNING], ) def test_specify_other__with_translations__with_group(self): @@ -1434,6 +1441,7 @@ def test_specify_other__with_translations__with_group(self): /x:label[text() = 'Specify other.'] """, ], + warnings__contains=[OR_OTHER_WARNING], ) def test_specify_other__with_translations__with_repeat(self): @@ -1467,6 +1475,7 @@ def test_specify_other__with_translations__with_repeat(self): /x:label[text() = 'Specify other.'] """, ], + warnings__contains=[OR_OTHER_WARNING], ) def test_specify_other__with_translations__with_nested_group(self): @@ -1509,6 +1518,7 @@ def test_specify_other__with_translations__with_nested_group(self): /x:label[text() = 'Specify other.'] """, ], + warnings__contains=[OR_OTHER_WARNING], ) def test_specify_other__with_translations__with_nested_repeat(self): @@ -1554,6 +1564,7 @@ def test_specify_other__with_translations__with_nested_repeat(self): /x:label[text() = 'Specify other.'] """, ], + warnings__contains=[OR_OTHER_WARNING], ) def test_specify_other__no_translations(self): @@ -1582,6 +1593,7 @@ def test_specify_other__no_translations(self): x:label[text() = 'Specify other.'] """, ], + warnings__not_contains=[OR_OTHER_WARNING], ) def test_specify_other__choice_filter(self): diff --git a/tests/test_validators.py b/tests/test_validators.py index 32337ba2..39e2b48b 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -2,20 +2,11 @@ """ Test validators. """ -import sys from unittest import TestCase +from unittest.mock import patch from pyxform.validators.odk_validate import check_java_available -if sys.version_info >= (3, 3): - from unittest.mock import patch # pylint: disable=E0611,E0401 -else: - try: - from mock import patch - except ImportError: - raise ImportError("Pyxform test suite requires the 'mock' library.") - - mock_func = "shutil.which" msg = "Form validation failed because Java (8+ required) could not be found."