From fd132be9f99f9d9c181c7e8fcda26ba2b589bb4d Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Fri, 25 Aug 2023 22:52:50 +1000 Subject: [PATCH 1/3] fix: failure when using or_other + translations + group or repeat - in _create_section_from_dict, the context is the nearest "section" which can be a survey, group, or repeat. When it's not the survey, the survey-level choices dict isn't directly available. So this is now tracked by the builder as a class-level object. - added tests for groups, repeats, groups-in-groups, repeats-in-groups. --- pyxform/builder.py | 16 +++- tests/test_translations.py | 152 +++++++++++++++++++++++++++++++ tests/xpath_helpers/questions.py | 24 +++++ 3 files changed, 190 insertions(+), 2 deletions(-) diff --git a/pyxform/builder.py b/pyxform/builder.py index d968ffc3..ae63758b 100644 --- a/pyxform/builder.py +++ b/pyxform/builder.py @@ -86,6 +86,8 @@ def __init__(self, **kwargs): # dictionary of setvalue target and value tuple indexed by triggering element self.setvalues_by_triggering_ref = {} + # For tracking survey-level choices while recursing through the survey. + self.choices: Dict[str, Any] = {} def set_sections(self, sections): """ @@ -108,10 +110,14 @@ def create_survey_element_from_dict( self._add_none_option = d["add_none_option"] if d["type"] in self.SECTION_CLASSES: + if d["type"] == "survey": + self.choices = copy.deepcopy(d.get(constants.CHOICES, {})) + section = self._create_section_from_dict(d) if d["type"] == "survey": section.setvalues_by_triggering_ref = self.setvalues_by_triggering_ref + section.choices = self.choices return section elif d["type"] == "loop": @@ -254,9 +260,15 @@ def _create_section_from_dict(self, d): survey_element = self.create_survey_element_from_dict(copy.deepcopy(child)) if child["type"].endswith(" or specify other"): select_question = survey_element[0] - itemset_choices = result["choices"][select_question["itemset"]] - if OR_OTHER_CHOICE not in itemset_choices: + itemset_choices = self.choices.get(select_question["itemset"], None) + if ( + itemset_choices is not None + and isinstance(itemset_choices, list) + and OR_OTHER_CHOICE not in itemset_choices + ): itemset_choices.append(OR_OTHER_CHOICE) + # This is required for builder_tests.BuilderTests.test_loop to pass. + self._add_other_option_to_multiple_choice_question(d=child) if survey_element: result.add_children(survey_element) diff --git a/tests/test_translations.py b/tests/test_translations.py index f1908e64..ece4717b 100644 --- a/tests/test_translations.py +++ b/tests/test_translations.py @@ -1404,6 +1404,158 @@ def test_specify_other__with_translations(self): ], ) + def test_specify_other__with_translations__with_group(self): + """Should add an "other" choice to the itemset instance and an itext label.""" + md = """ + | survey | | | | | + | | type | name | label | label::eng | + | | begin group | g1 | Group 1 | Group 1 | + | | select_one c1 or_other | q1 | Question 1 | Question A | + | | end group | g1 | | | + | choices | | | | | | + | | list name | name | label | label::eng | label::fr | + | | c1 | na | la | la-e | la-f | + | | c1 | nb | lb | lb-e | | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + xpc.model_itext_choice_text_label_by_pos( + "eng", "c1", ("la-e", "lb-e", "-") + ), + xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("la-f", "-", "-")), + xpc.model_itext_choice_text_label_by_pos( + DEFAULT_LANG, "c1", ("la", "lb", "Other") + ), + xpq.body_group_select1_itemset("g1", "q1"), + """ + /h:html/h:body/x:group[@ref='/test_name/g1'] + /x:input[@ref='/test_name/g1/q1_other'] + /x:label[text() = 'Specify other.'] + """, + ], + ) + + def test_specify_other__with_translations__with_repeat(self): + """Should add an "other" choice to the itemset instance and an itext label.""" + md = """ + | survey | | | | | + | | type | name | label | label::eng | + | | begin repeat | r1 | Repeat 1 | Repeat 1 | + | | select_one c1 or_other | q1 | Question 1 | Question A | + | | end repeat | r1 | | | + | choices | | | | | | + | | list name | name | label | label::eng | label::fr | + | | c1 | na | la | la-e | la-f | + | | c1 | nb | lb | lb-e | | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + xpc.model_itext_choice_text_label_by_pos( + "eng", "c1", ("la-e", "lb-e", "-") + ), + xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("la-f", "-", "-")), + xpc.model_itext_choice_text_label_by_pos( + DEFAULT_LANG, "c1", ("la", "lb", "Other") + ), + xpq.body_repeat_select1_itemset("r1", "q1"), + """ + /h:html/h:body/x:group[@ref='/test_name/r1'] + /x:repeat[@nodeset='/test_name/r1'] + /x:input[@ref='/test_name/r1/q1_other'] + /x:label[text() = 'Specify other.'] + """, + ], + ) + + def test_specify_other__with_translations__with_nested_group(self): + """Should add an "other" choice to the itemset instance and an itext label.""" + md = """ + | survey | | | | | + | | type | name | label | label::eng | + | | begin group | g1 | Group 1 | Group 1 | + | | begin group | g2 | Group 2 | Group 2 | + | | select_one c1 or_other | q1 | Question 1 | Question A | + | | end group | g2 | | | + | | end group | g1 | | | + | choices | | | | | | + | | list name | name | label | label::eng | label::fr | + | | c1 | na | la | la-e | la-f | + | | c1 | nb | lb | lb-e | | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + xpc.model_itext_choice_text_label_by_pos( + "eng", "c1", ("la-e", "lb-e", "-") + ), + xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("la-f", "-", "-")), + xpc.model_itext_choice_text_label_by_pos( + DEFAULT_LANG, "c1", ("la", "lb", "Other") + ), + """ + /h:html/h:body/x:group[@ref='/test_name/g1'] + /x:group[@ref='/test_name/g1/g2']/x:select1[ + @ref = '/test_name/g1/g2/q1' + and ./x:itemset + and not(./x:item) + ] + """, + """ + /h:html/h:body/x:group[@ref='/test_name/g1'] + /x:group[@ref='/test_name/g1/g2'] + /x:input[@ref='/test_name/g1/g2/q1_other'] + /x:label[text() = 'Specify other.'] + """, + ], + ) + + def test_specify_other__with_translations__with_nested_repeat(self): + """Should add an "other" choice to the itemset instance and an itext label.""" + md = """ + | survey | | | | | + | | type | name | label | label::eng | + | | begin group | g1 | Group 1 | Group 1 | + | | begin repeat | r1 | Repeat 1 | Repeat 1 | + | | select_one c1 or_other | q1 | Question 1 | Question A | + | | end repeat | r1 | | | + | | end group | g1 | | | + | choices | | | | | | + | | list name | name | label | label::eng | label::fr | + | | c1 | na | la | la-e | la-f | + | | c1 | nb | lb | lb-e | | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + xpc.model_itext_choice_text_label_by_pos( + "eng", "c1", ("la-e", "lb-e", "-") + ), + xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("la-f", "-", "-")), + xpc.model_itext_choice_text_label_by_pos( + DEFAULT_LANG, "c1", ("la", "lb", "Other") + ), + """ + /h:html/h:body/x:group[@ref='/test_name/g1'] + /x:group[@ref='/test_name/g1/r1'] + /x:repeat[@nodeset='/test_name/g1/r1'] + /x:select1[ + @ref = '/test_name/g1/r1/q1' + and ./x:itemset + and not(./x:item) + ] + """, + """ + /h:html/h:body/x:group[@ref='/test_name/g1'] + /x:group[@ref='/test_name/g1/r1'] + /x:repeat[@nodeset='/test_name/g1/r1'] + /x:input[@ref='/test_name/g1/r1/q1_other'] + /x:label[text() = 'Specify other.'] + """, + ], + ) + def test_specify_other__no_translations(self): """Should add an "other" choice to the itemset instance, but not use itext.""" md = """ diff --git a/tests/xpath_helpers/questions.py b/tests/xpath_helpers/questions.py index 8eb37732..34c2db5a 100644 --- a/tests/xpath_helpers/questions.py +++ b/tests/xpath_helpers/questions.py @@ -31,6 +31,30 @@ def body_select1_itemset(q_name: str): ] """ + @staticmethod + def body_group_select1_itemset(g_name: str, q_name: str): + """Body has a select1 with an itemset, and no inline items.""" + return fr""" + /h:html/h:body/x:group[@ref='/test_name/{g_name}']/x:select1[ + @ref = '/test_name/{g_name}/{q_name}' + and ./x:itemset + and not(./x:item) + ] + """ + + @staticmethod + def body_repeat_select1_itemset(r_name: str, q_name: str): + """Body has a select1 with an itemset, and no inline items.""" + return fr""" + /h:html/h:body/x:group[@ref='/test_name/{r_name}'] + /x:repeat[@nodeset='/test_name/{r_name}'] + /x:select1[ + @ref = '/test_name/{r_name}/{q_name}' + and ./x:itemset + and not(./x:item) + ] + """ + @staticmethod def body_odk_rank_itemset(q_name: str): """Body has a rank with an itemset, and no inline items.""" From f411aeadb8cde28e65ae1ae39012207f85cbeed9 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Fri, 25 Aug 2023 23:26:04 +1000 Subject: [PATCH 2/3] dev: use string constants in builder.py - standardise to help avoid misspelling / refactoring issues in future --- pyxform/builder.py | 105 +++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 51 deletions(-) diff --git a/pyxform/builder.py b/pyxform/builder.py index ae63758b..4092028c 100644 --- a/pyxform/builder.py +++ b/pyxform/builder.py @@ -7,7 +7,8 @@ import re from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union -from pyxform import constants, file_utils, utils +from pyxform import constants as const +from pyxform import file_utils, utils from pyxform.entities.entity_declaration import EntityDeclaration from pyxform.errors import PyXFormError from pyxform.external_instance import ExternalInstance @@ -29,8 +30,8 @@ from pyxform.survey_element import SurveyElement OR_OTHER_CHOICE = { - "name": "other", - "label": "Other", + const.NAME: "other", + const.LABEL: "Other", } @@ -73,9 +74,9 @@ class SurveyElementBuilder: } SECTION_CLASSES = { - "group": GroupedSection, - "repeat": RepeatingSection, - "survey": Survey, + const.GROUP: GroupedSection, + const.REPEAT: RepeatingSection, + const.SURVEY: Survey, } def __init__(self, **kwargs): @@ -109,21 +110,21 @@ def create_survey_element_from_dict( if "add_none_option" in d: self._add_none_option = d["add_none_option"] - if d["type"] in self.SECTION_CLASSES: - if d["type"] == "survey": - self.choices = copy.deepcopy(d.get(constants.CHOICES, {})) + if d[const.TYPE] in self.SECTION_CLASSES: + if d[const.TYPE] == const.SURVEY: + self.choices = copy.deepcopy(d.get(const.CHOICES, {})) section = self._create_section_from_dict(d) - if d["type"] == "survey": + if d[const.TYPE] == const.SURVEY: section.setvalues_by_triggering_ref = self.setvalues_by_triggering_ref section.choices = self.choices return section - elif d["type"] == "loop": + elif d[const.TYPE] == const.LOOP: return self._create_loop_from_dict(d) - elif d["type"] == "include": - section_name = d["name"] + elif d[const.TYPE] == "include": + section_name = d[const.NAME] if section_name not in self._sections: raise PyXFormError( "This section has not been included.", @@ -133,9 +134,9 @@ def create_survey_element_from_dict( d = self._sections[section_name] full_survey = self.create_survey_element_from_dict(d) return full_survey.children - elif d["type"] in ["xml-external", "csv-external"]: + elif d[const.TYPE] in ["xml-external", "csv-external"]: return ExternalInstance(**d) - elif d["type"] == "entity": + elif d[const.TYPE] == "entity": return EntityDeclaration(**d) else: self._save_trigger_as_setvalue_and_remove_calculate(d) @@ -148,15 +149,17 @@ def _save_trigger_as_setvalue_and_remove_calculate(self, d): if "trigger" in d: triggering_ref = re.sub(r"\s+", "", d["trigger"]) value = "" - if "bind" in d and "calculate" in d["bind"]: - value = d["bind"]["calculate"] + if const.BIND in d and "calculate" in d[const.BIND]: + value = d[const.BIND]["calculate"] if triggering_ref in self.setvalues_by_triggering_ref: self.setvalues_by_triggering_ref[triggering_ref].append( - (d["name"], value) + (d[const.NAME], value) ) else: - self.setvalues_by_triggering_ref[triggering_ref] = [(d["name"], value)] + self.setvalues_by_triggering_ref[triggering_ref] = [ + (d[const.NAME], value) + ] @staticmethod def _create_question_from_dict( @@ -164,18 +167,18 @@ def _create_question_from_dict( question_type_dictionary: Dict[str, Any], add_none_option: bool = False, ) -> Union[Question, List[Question]]: - question_type_str = d["type"] + question_type_str = d[const.TYPE] d_copy = d.copy() # TODO: Keep add none option? - if add_none_option and question_type_str.startswith("select all that apply"): + if add_none_option and question_type_str.startswith(const.SELECT_ALL_THAT_APPLY): SurveyElementBuilder._add_none_option_to_select_all_that_apply(d_copy) # Handle or_other on select type questions - or_other_len = len(constants.SELECT_OR_OTHER_SUFFIX) - if question_type_str.endswith(constants.SELECT_OR_OTHER_SUFFIX): + or_other_len = len(const.SELECT_OR_OTHER_SUFFIX) + if question_type_str.endswith(const.SELECT_OR_OTHER_SUFFIX): question_type_str = question_type_str[: len(question_type_str) - or_other_len] - d_copy["type"] = question_type_str + d_copy[const.TYPE] = question_type_str SurveyElementBuilder._add_other_option_to_multiple_choice_question(d_copy) return [ SurveyElementBuilder._create_question_from_dict( @@ -198,7 +201,7 @@ def _create_question_from_dict( @staticmethod def _add_other_option_to_multiple_choice_question(d: Dict[str, Any]) -> None: # ideally, we'd just be pulling from children - choice_list = d.get("choices", d.get("children", [])) + choice_list = d.get(const.CHOICES, d.get(const.CHILDREN, [])) if len(choice_list) <= 0: raise PyXFormError("There should be choices for this question.") if OR_OTHER_CHOICE not in choice_list: @@ -206,19 +209,19 @@ def _add_other_option_to_multiple_choice_question(d: Dict[str, Any]) -> None: @staticmethod def _add_none_option_to_select_all_that_apply(d_copy): - choice_list = d_copy.get("choices", d_copy.get("children", [])) + choice_list = d_copy.get(const.CHOICES, d_copy.get(const.CHILDREN, [])) if len(choice_list) <= 0: raise PyXFormError("There should be choices for this question.") - none_choice = {"name": "none", "label": "None"} + none_choice = {const.NAME: "none", const.LABEL: "None"} if none_choice not in choice_list: choice_list.append(none_choice) none_constraint = "(.='none' or not(selected(., 'none')))" - if "bind" not in d_copy: - d_copy["bind"] = {} - if "constraint" in d_copy["bind"]: - d_copy["bind"]["constraint"] += " and " + none_constraint + if const.BIND not in d_copy: + d_copy[const.BIND] = {} + if "constraint" in d_copy[const.BIND]: + d_copy[const.BIND]["constraint"] += " and " + none_constraint else: - d_copy["bind"]["constraint"] = none_constraint + d_copy[const.BIND]["constraint"] = none_constraint @staticmethod def _get_question_class(question_type_str, question_type_dictionary): @@ -228,7 +231,7 @@ def _get_question_class(question_type_str, question_type_dictionary): type_dictionary -> QUESTION_CLASSES """ question_type = question_type_dictionary.get(question_type_str, {}) - control_dict = question_type.get("control", {}) + control_dict = question_type.get(const.CONTROL, {}) control_tag = control_dict.get("tag", "") if control_tag == "upload" and control_dict.get("mediatype") == "osm/*": control_tag = "osm" @@ -238,19 +241,19 @@ def _get_question_class(question_type_str, question_type_dictionary): @staticmethod def _create_specify_other_question_from_dict(d: Dict[str, Any]) -> InputQuestion: kwargs = { - "type": "text", - "name": "%s_other" % d["name"], - "label": "Specify other.", - "bind": {"relevant": "selected(../%s, 'other')" % d["name"]}, + const.TYPE: "text", + const.NAME: "%s_other" % d[const.NAME], + const.LABEL: "Specify other.", + const.BIND: {"relevant": "selected(../%s, 'other')" % d[const.NAME]}, } return InputQuestion(**kwargs) def _create_section_from_dict(self, d): d_copy = d.copy() - children = d_copy.pop("children", []) - section_class = self.SECTION_CLASSES[d_copy["type"]] - if d["type"] == "survey" and "title" not in d: - d_copy["title"] = d["name"] + children = d_copy.pop(const.CHILDREN, []) + section_class = self.SECTION_CLASSES[d_copy[const.TYPE]] + if d[const.TYPE] == const.SURVEY and const.TITLE not in d: + d_copy[const.TITLE] = d[const.NAME] result = section_class(**d_copy) for child in children: # Deep copying the child is a hacky solution to the or_other bug. @@ -258,9 +261,9 @@ def _create_section_from_dict(self, d): # And I hope it doesn't break something else. # I think the good solution would be to rewrite this class. survey_element = self.create_survey_element_from_dict(copy.deepcopy(child)) - if child["type"].endswith(" or specify other"): + if child[const.TYPE].endswith(const.SELECT_OR_OTHER_SUFFIX): select_question = survey_element[0] - itemset_choices = self.choices.get(select_question["itemset"], None) + itemset_choices = self.choices.get(select_question[const.ITEMSET], None) if ( itemset_choices is not None and isinstance(itemset_choices, list) @@ -280,8 +283,8 @@ def _create_loop_from_dict(self, d): Returns a GroupedSection """ d_copy = d.copy() - children = d_copy.pop("children", []) - columns = d_copy.pop("columns", []) + children = d_copy.pop(const.CHILDREN, []) + columns = d_copy.pop(const.COLUMNS, []) result = GroupedSection(**d_copy) # columns is a left over from when this was @@ -289,7 +292,7 @@ def _create_loop_from_dict(self, d): for column_dict in columns: # If this is a none option for a select all that apply # question then we should skip adding it to the result - if column_dict["name"] == "none": + if column_dict[const.NAME] == "none": continue column = GroupedSection(**column_dict) @@ -308,17 +311,17 @@ def _name_and_label_substitutions(self, question_template, column_headers): # if the label in column_headers has multiple languages setup a # dictionary by language to do substitutions. info_by_lang = {} - if type(column_headers["label"]) == dict: + if type(column_headers[const.LABEL]) == dict: info_by_lang = dict( [ ( lang, { - "name": column_headers["name"], - "label": column_headers["label"][lang], + const.NAME: column_headers[const.NAME], + const.LABEL: column_headers[const.LABEL][lang], }, ) - for lang in column_headers["label"].keys() + for lang in column_headers[const.LABEL].keys() ] ) @@ -329,7 +332,7 @@ def _name_and_label_substitutions(self, question_template, column_headers): elif type(result[key]) == dict: result[key] = result[key].copy() for key2 in result[key].keys(): - if type(column_headers["label"]) == dict: + if type(column_headers[const.LABEL]) == dict: result[key][key2] %= info_by_lang.get(key2, column_headers) else: result[key][key2] %= column_headers From 437e7c2eabc354c7d4dea4596717541e5510a053 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Mon, 28 Aug 2023 20:34:25 +1000 Subject: [PATCH 3/3] add: warning when using or_other and translations together - warning is once per form. Re-used missing translations check code to avoid doing the same processing twice. The new check needed the intermediate result of the missing check. - test_validators.py: removed version check for import since supported versions have been above 3.3 for a while. --- .../pyxform/missing_translations_check.py | 138 -------------- .../validators/pyxform/translations_checks.py | 174 ++++++++++++++++++ pyxform/xls2json.py | 11 +- tests/test_translations.py | 16 +- tests/test_validators.py | 11 +- 5 files changed, 195 insertions(+), 155 deletions(-) delete mode 100644 pyxform/validators/pyxform/missing_translations_check.py create mode 100644 pyxform/validators/pyxform/translations_checks.py diff --git a/pyxform/validators/pyxform/missing_translations_check.py b/pyxform/validators/pyxform/missing_translations_check.py deleted file mode 100644 index addb15fa..00000000 --- a/pyxform/validators/pyxform/missing_translations_check.py +++ /dev/null @@ -1,138 +0,0 @@ -from collections import defaultdict -from typing import TYPE_CHECKING - -from pyxform import aliases, constants -from pyxform.errors import PyXFormError - -if TYPE_CHECKING: - from typing import Dict, List, Optional, Sequence, Union - - SheetData = List[Dict[str, Union[str, Dict]]] - - -def format_missing_translations_msg( - _in: "Dict[str, Dict[str, Sequence]]", -) -> "Optional[str]": - """ - Format the missing translations data into a warning message. - - :param _in: A dict structured as Dict[survey|choices: Dict[language: (columns)]]. - In other words, for the survey or choices sheet, a dict of the language(s) and - column names for which there are missing translations. - :return: The warning message, or None if there were no missing columns. - """ - - def get_sheet_msg(name, sheet): - if sheet is not None: - langs = sorted(sheet.keys()) - if 0 < len(langs): - lang_msgs = [] - for lang in langs: - cols = sheet[lang] - if isinstance(cols, str): - msg = f"Expected a sequence of columns, got a string for {lang}." - raise PyXFormError(msg) - if 1 == len(cols): - msg = f"Language '{lang}' is missing the {name} {cols[0]} column." - lang_msgs.append(msg) - if 1 < len(cols): - c = ", ".join(sorted(cols)) - msg = f"Language '{lang}' is missing the {name} columns {c}." - lang_msgs.append(msg) - return "\n".join(lang_msgs) - return None - - survey = get_sheet_msg(name=constants.SURVEY, sheet=_in.get(constants.SURVEY)) - choices = get_sheet_msg(name=constants.CHOICES, sheet=_in.get(constants.CHOICES)) - - messages = tuple(i for i in (survey, choices) if i is not None) - if 0 == len(messages): - return None - return "\n".join(messages) - - -def find_missing_translations( - sheet_data: "SheetData", - translatable_columns: "Dict[str, str]", -) -> "Dict[str, List[str]]": - """ - Find missing translation columns in the sheet data. - - For each translatable column used in the sheet, there should be a translation for - each language (including the default / unspecified language) that is used for any - other translatable column. - - This checks the first row only since it is concerned with the presence of columns, not - individual cells. It therefore assumes that each row object has the same structure. - - :param sheet_data: The survey or choices sheet data. - :param translatable_columns: The translatable columns for a sheet. The structure - should be Dict[internal_name, external_name]. See the aliases module. - :return: Dict[column_name, List[languages]] - """ - translations_seen = defaultdict(list) - translation_columns_seen = set() - - def process_cell(typ, cell): - if cell is not None: - if typ in translatable_columns.keys(): - name = translatable_columns[typ] - if isinstance(cell, str): - translations_seen[constants.DEFAULT_LANGUAGE_VALUE].append(name) - translation_columns_seen.add(name) - elif isinstance(cell, dict): - for lng in cell: - translations_seen[lng].append(name) - translation_columns_seen.add(name) - - if 0 < len(sheet_data): - # e.g. ("name", "q1"), ("label", {"en": "Hello", "fr": "Bonjour"}) - for column_type, cell_content in sheet_data[0].items(): - if column_type == constants.MEDIA and isinstance(cell_content, dict): - # e.g. ("audio", {"eng": "my.mp3"}) - for media_type, media_cell in cell_content.items(): - process_cell(typ=media_type, cell=media_cell) - if column_type == constants.BIND: - # e.g. ("jr:constraintMsg", "Try again") - for bind_type, bind_cell in cell_content.items(): - process_cell(typ=bind_type, cell=bind_cell) - else: - process_cell(typ=column_type, cell=cell_content) - - missing = defaultdict(list) - for lang, lang_trans in translations_seen.items(): - for seen_tran in translation_columns_seen: - if seen_tran not in lang_trans: - missing[lang].append(seen_tran) - - return missing - - -def missing_translations_check( - survey_sheet: "SheetData", - choices_sheet: "SheetData", - warnings: "List[str]", -): - """ - Add a warning if there are missing translation columns in the survey or choices data. - - :param survey_sheet: The survey sheet data. - :param choices_sheet: The choices sheet data. - :param warnings: The warnings list, which may be empty. - :return: The warnings list, possibly with a new message, otherwise unchanged. - """ - survey_missing_trans = find_missing_translations( - sheet_data=survey_sheet, - translatable_columns=aliases.TRANSLATABLE_SURVEY_COLUMNS, - ) - choices_missing_trans = find_missing_translations( - sheet_data=choices_sheet, - translatable_columns=aliases.TRANSLATABLE_CHOICES_COLUMNS, - ) - if 0 < len(survey_missing_trans) or 0 < len(choices_missing_trans): - msg = format_missing_translations_msg( - _in={"survey": survey_missing_trans, "choices": choices_missing_trans} - ) - if msg is not None: - warnings.append(msg) - return warnings diff --git a/pyxform/validators/pyxform/translations_checks.py b/pyxform/validators/pyxform/translations_checks.py new file mode 100644 index 00000000..722017ba --- /dev/null +++ b/pyxform/validators/pyxform/translations_checks.py @@ -0,0 +1,174 @@ +from collections import defaultdict +from typing import TYPE_CHECKING + +from pyxform import aliases +from pyxform import constants as const +from pyxform.errors import PyXFormError + +if TYPE_CHECKING: + from typing import Dict, List, Optional, Sequence, Set, Union + + SheetData = List[Dict[str, Union[str, Dict]]] + Warnings = List[str] + + +OR_OTHER_WARNING = ( + "This form uses or_other and translations, which is not recommended. " + "An untranslated input question label and choice label is generated " + "for 'other'. Learn more: https://xlsform.org/en/#specify-other)." +) + + +def format_missing_translations_msg( + _in: "Dict[str, Dict[str, Sequence]]", +) -> "Optional[str]": + """ + Format the missing translations data into a warning message. + + :param _in: A dict structured as Dict[survey|choices: Dict[language: (columns)]]. + In other words, for the survey or choices sheet, a dict of the language(s) and + column names for which there are missing translations. + :return: The warning message, or None if there were no missing columns. + """ + + def get_sheet_msg(name, sheet): + if sheet is not None: + langs = sorted(sheet.keys()) + if 0 < len(langs): + lang_msgs = [] + for lang in langs: + cols = sheet[lang] + if isinstance(cols, str): + msg = f"Expected a sequence of columns, got a string for {lang}." + raise PyXFormError(msg) + if 1 == len(cols): + msg = f"Language '{lang}' is missing the {name} {cols[0]} column." + lang_msgs.append(msg) + if 1 < len(cols): + c = ", ".join(sorted(cols)) + msg = f"Language '{lang}' is missing the {name} columns {c}." + lang_msgs.append(msg) + return "\n".join(lang_msgs) + return None + + survey = get_sheet_msg(name=const.SURVEY, sheet=_in.get(const.SURVEY)) + choices = get_sheet_msg(name=const.CHOICES, sheet=_in.get(const.CHOICES)) + + messages = tuple(i for i in (survey, choices) if i is not None) + if 0 == len(messages): + return None + return "\n".join(messages) + + +class Translations: + """ + Sheet-level container for translations info. + + For each translatable column used in the sheet, there should be a translation for + each language (including the default / unspecified language) that is used for any + other translatable column. + + Only the first row is inspected since the checks are concerned with the presence of + columns, not individual cells. It therefore assumes that each row object has the same + structure. + """ + + def __init__( + self, + sheet_data: "SheetData", + translatable_columns: "Dict[str, str]", + ): + """ + :param sheet_data: The survey or choices sheet data. + :param translatable_columns: The translatable columns for a sheet. The structure + should be Dict[internal_name, external_name]. See the aliases module. + """ + self.seen: "defaultdict[str, List[str]]" = defaultdict(list) + self.columns_seen: "Set[str]" = set() + self.missing: "defaultdict[str, List[str]]" = defaultdict(list) + + self._find_translations(sheet_data, translatable_columns) + self._find_missing() + + def _find_translations( + self, sheet_data: "SheetData", translatable_columns: "Dict[str, str]" + ): + def process_cell(typ, cell): + if cell is not None: + if typ in translatable_columns.keys(): + name = translatable_columns[typ] + if isinstance(cell, str): + self.seen[const.DEFAULT_LANGUAGE_VALUE].append(name) + self.columns_seen.add(name) + elif isinstance(cell, dict): + for lng in cell: + self.seen[lng].append(name) + self.columns_seen.add(name) + + if 0 < len(sheet_data): + # e.g. ("name", "q1"), ("label", {"en": "Hello", "fr": "Bonjour"}) + for column_type, cell_content in sheet_data[0].items(): + if column_type == const.MEDIA and isinstance(cell_content, dict): + # e.g. ("audio", {"eng": "my.mp3"}) + for media_type, media_cell in cell_content.items(): + process_cell(typ=media_type, cell=media_cell) + if column_type == const.BIND: + # e.g. ("jr:constraintMsg", "Try again") + for bind_type, bind_cell in cell_content.items(): + process_cell(typ=bind_type, cell=bind_cell) + else: + process_cell(typ=column_type, cell=cell_content) + + def seen_default_only(self) -> bool: + return 0 == len(self.seen) or ( + const.DEFAULT_LANGUAGE_VALUE in self.seen and 1 == len(self.seen) + ) + + def _find_missing(self): + if self.seen_default_only(): + return + for lang, lang_trans in self.seen.items(): + for seen_tran in self.columns_seen: + if seen_tran not in lang_trans: + self.missing[lang].append(seen_tran) + + +class SheetTranslations: + """Workbook-level container for translations info, with validation checks.""" + + def __init__( + self, + survey_sheet: "SheetData", + choices_sheet: "SheetData", + ): + """ + :param survey_sheet: The survey sheet data. + :param choices_sheet: The choices sheet data. + """ + self.survey: "Translations" = Translations( + sheet_data=survey_sheet, + translatable_columns=aliases.TRANSLATABLE_SURVEY_COLUMNS, + ) + self.choices: "Translations" = Translations( + sheet_data=choices_sheet, + translatable_columns=aliases.TRANSLATABLE_CHOICES_COLUMNS, + ) + self.or_other_seen: bool = False + + def missing_check(self, warnings: "Warnings") -> "Warnings": + """Add a warning if survey or choices have missing translations.""" + if 0 < len(self.survey.missing) or 0 < len(self.choices.missing): + msg = format_missing_translations_msg( + _in={"survey": self.survey.missing, "choices": self.choices.missing} + ) + if msg is not None: + warnings.append(msg) + return warnings + + def or_other_check(self, warnings: "Warnings") -> "Warnings": + """Add a warning if translations and or_other are present.""" + if self.or_other_seen and ( + not self.survey.seen_default_only() or not self.choices.seen_default_only() + ): + warnings.append(OR_OTHER_WARNING) + return warnings diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py index 2fbfdeee..53b2e90a 100644 --- a/pyxform/xls2json.py +++ b/pyxform/xls2json.py @@ -24,9 +24,7 @@ from pyxform.errors import PyXFormError from pyxform.utils import PYXFORM_REFERENCE_REGEX, default_is_dynamic from pyxform.validators.pyxform import parameters_generic, select_from_file_params -from pyxform.validators.pyxform.missing_translations_check import ( - missing_translations_check, -) +from pyxform.validators.pyxform.translations_checks import SheetTranslations from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag @@ -617,11 +615,11 @@ def workbook_to_json( # Check for missing translations. The choices sheet is checked here so that the # warning can be combined into one message. - warnings = missing_translations_check( + sheet_translations = SheetTranslations( survey_sheet=survey_sheet, choices_sheet=choices_sheet, - warnings=warnings, ) + sheet_translations.missing_check(warnings=warnings) # No spell check for OSM sheet (infrequently used, many spurious matches). osm_sheet = dealias_and_group_headers( @@ -1158,6 +1156,7 @@ def workbook_to_json( specify_other_question = None if parse_dict.get("specify_other") is not None: + sheet_translations.or_other_seen = True select_type += constants.SELECT_OR_OTHER_SUFFIX if row.get(constants.CHOICE_FILTER): msg = ( @@ -1417,6 +1416,8 @@ def workbook_to_json( # Put the row in the json dict as is: parent_children_array.append(row) + sheet_translations.or_other_check(warnings=warnings) + if len(stack) != 1: raise PyXFormError( "Unmatched begin statement: " diff --git a/tests/test_translations.py b/tests/test_translations.py index ece4717b..8116a0bb 100644 --- a/tests/test_translations.py +++ b/tests/test_translations.py @@ -10,7 +10,8 @@ from pyxform.constants import CHOICES from pyxform.constants import DEFAULT_LANGUAGE_VALUE as DEFAULT_LANG from pyxform.constants import SURVEY -from pyxform.validators.pyxform.missing_translations_check import ( +from pyxform.validators.pyxform.translations_checks import ( + OR_OTHER_WARNING, format_missing_translations_msg, ) from tests.pyxform_test_case import PyxformTestCase @@ -409,7 +410,10 @@ def run(name): run(name=f"questions={count}, with check (seconds):") - with patch("pyxform.xls2json.missing_translations_check", return_value=[]): + with patch( + "pyxform.xls2json.SheetTranslations.missing_check", + return_value=[], + ): run(name=f"questions={count}, without check (seconds):") @@ -921,6 +925,7 @@ def test_missing_translation__issue_157__warn__default(self): # TODO: bug - missing default lang translatable/itext values. self.xp.language_no_itext(DEFAULT_LANG), ], + warnings__not_contains=[OR_OTHER_WARNING], ) @@ -1373,6 +1378,7 @@ def test_missing_translation__two_lang__warn__default(self): self.xp.language_is_not_default("french"), self.xp.language_no_itext(DEFAULT_LANG), ], + warnings__not_contains=[OR_OTHER_WARNING], ) def test_specify_other__with_translations(self): @@ -1402,6 +1408,7 @@ def test_specify_other__with_translations(self): x:label[text() = 'Specify other.'] """, ], + warnings__contains=[OR_OTHER_WARNING], ) def test_specify_other__with_translations__with_group(self): @@ -1434,6 +1441,7 @@ def test_specify_other__with_translations__with_group(self): /x:label[text() = 'Specify other.'] """, ], + warnings__contains=[OR_OTHER_WARNING], ) def test_specify_other__with_translations__with_repeat(self): @@ -1467,6 +1475,7 @@ def test_specify_other__with_translations__with_repeat(self): /x:label[text() = 'Specify other.'] """, ], + warnings__contains=[OR_OTHER_WARNING], ) def test_specify_other__with_translations__with_nested_group(self): @@ -1509,6 +1518,7 @@ def test_specify_other__with_translations__with_nested_group(self): /x:label[text() = 'Specify other.'] """, ], + warnings__contains=[OR_OTHER_WARNING], ) def test_specify_other__with_translations__with_nested_repeat(self): @@ -1554,6 +1564,7 @@ def test_specify_other__with_translations__with_nested_repeat(self): /x:label[text() = 'Specify other.'] """, ], + warnings__contains=[OR_OTHER_WARNING], ) def test_specify_other__no_translations(self): @@ -1582,6 +1593,7 @@ def test_specify_other__no_translations(self): x:label[text() = 'Specify other.'] """, ], + warnings__not_contains=[OR_OTHER_WARNING], ) def test_specify_other__choice_filter(self): diff --git a/tests/test_validators.py b/tests/test_validators.py index 32337ba2..39e2b48b 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -2,20 +2,11 @@ """ Test validators. """ -import sys from unittest import TestCase +from unittest.mock import patch from pyxform.validators.odk_validate import check_java_available -if sys.version_info >= (3, 3): - from unittest.mock import patch # pylint: disable=E0611,E0401 -else: - try: - from mock import patch - except ImportError: - raise ImportError("Pyxform test suite requires the 'mock' library.") - - mock_func = "shutil.which" msg = "Form validation failed because Java (8+ required) could not be found."