fix: exclude select with appearance=search() from itemset, test updates

- move choices logic off to new function add_choices_info_to_question() - exclude select with appearance=search() from itemset (per comment) - new func for readability and to clarify what info is used to determine choices structures - survey.py: skip adding translations for choices, instead of just for choice_filter. Use type "Option" to detect choice elements which should be more reliable. Move get_xpath lookup under if condition so it's only called if needed. - xls2json.py: fix lookup of select type "select one external" which has an alias "select_one_external". Docs use the underscore alias but the implementation is looking for the spaces alias. - xlsform_test_case/base.py: simplify reporter function - xlsform_spec_test.py: simplify repetitive tests by adding test func. Ideally these should be re-implemented as PyxformTestCase, but they cover a lot of scenarios so for now just simplifying the structure. - test_expected_output/*.xml: update expected XML to match current behaviour, generally: 1) update body to refer to instance, 2) add new instances in model, 3) add new translations in itext, 4) remove old itext from translations. - Misc minor fixes: - utils.py: fix deprecation warning for get_sheet_by_name. - replace in-line regex with pre-compiled regex, and replace match (only looks at start of string) with search (looks at whole string). - add more constants for commonly-used internal strings so that it's easier to find where they are used, added usages to xls2json.py.
XLSForm · May 5, 2023 · 8f72e42 · 8f72e42
1 parent a8107b5
commit 8f72e42
Show file tree

Hide file tree

Showing 12 changed files with 519 additions and 955 deletions.
diff --git a/pyxform/constants.py b/pyxform/constants.py
@@ -47,6 +47,10 @@
 MEDIA = "media"
 CONTROL = "control"
 APPEARANCE = "appearance"
+ITEMSET = "itemset"
+RANDOMIZE = "randomize"
+CHOICE_FILTER = "choice_filter"
+PARAMETERS = "parameters"
 
 LOOP = "loop"
 COLUMNS = "columns"
@@ -65,6 +69,7 @@
 CASCADING_SELECT = "cascading_select"
 TABLE_LIST = "table-list"  # hyphenated because it goes in appearance, and convention for appearance column is dashes # noqa
 FIELD_LIST = "field-list"
+LIST_NOLABEL = "list-nolabel"
 
 SURVEY = "survey"
 SETTINGS = "settings"

diff --git a/pyxform/question.py b/pyxform/question.py
@@ -3,7 +3,6 @@
 XForm Survey element classes for different question types.
 """
 import os.path
-import re
 
 from pyxform.constants import (
     EXTERNAL_CHOICES_ITEMSET_REF_LABEL,
@@ -15,7 +14,12 @@
 from pyxform.errors import PyXFormError
 from pyxform.question_type_dictionary import QUESTION_TYPE_DICT
 from pyxform.survey_element import SurveyElement
-from pyxform.utils import default_is_dynamic, has_dynamic_label, node
+from pyxform.utils import (
+    PYXFORM_REFERENCE_REGEX,
+    default_is_dynamic,
+    has_dynamic_label,
+    node,
+)
 
 
 class Question(SurveyElement):
@@ -225,7 +229,9 @@ def build_xml(self):
 
             has_media = False
             has_dyn_label = False
-            is_previous_question = bool(re.match(r"^\${.*}$", self.get("itemset")))
+            is_previous_question = bool(
+                PYXFORM_REFERENCE_REGEX.search(self.get("itemset"))
+            )
 
             if choices.get(itemset):
                 has_media = bool(choices[itemset][0].get("media"))

diff --git a/pyxform/survey.py b/pyxform/survey.py
@@ -18,7 +18,7 @@
 from pyxform.external_instance import ExternalInstance
 from pyxform.instance import SurveyInstance
 from pyxform.instance_info import InstanceInfo
-from pyxform.question import Question
+from pyxform.question import Option, Question
 from pyxform.section import Section
 from pyxform.survey_element import SurveyElement
 from pyxform.utils import (
@@ -240,7 +240,7 @@ def xml(self):
         self._setup_xpath_dictionary()
 
         for triggering_reference in self.setvalues_by_triggering_ref.keys():
-            if not (re.match(BRACKETED_TAG_REGEX, triggering_reference)):
+            if not (re.search(BRACKETED_TAG_REGEX, triggering_reference)):
                 raise PyXFormError(
                     "Only references to other fields are allowed in the 'trigger' column."
                 )
@@ -768,14 +768,13 @@ def _set_up_media_translations(media_dict, translation_key):
             self._translations = defaultdict(dict)  # pylint: disable=W0201
 
         for survey_element in self.iter_descendants():
-            # Skip set up of media for choices in filtered selects.
-            # Translations for the media content should have been set up
-            # in _setup_translations
-            parent = survey_element.get("parent")
-            if parent and not parent.get("choice_filter"):
-                translation_key = survey_element.get_xpath() + ":label"
+            # Skip set up of media for choices in selects. Translations for their media
+            # content should have been set up in _setup_translations, with one copy of
+            # each choice translation per language (after _add_empty_translations).
+            if not isinstance(survey_element, Option):
                 media_dict = survey_element.get("media")
-                if isinstance(media_dict, dict):
+                if isinstance(media_dict, dict) and 0 < len(media_dict):
+                    translation_key = survey_element.get_xpath() + ":label"
                     _set_up_media_translations(media_dict, translation_key)
 
     def itext(self):
@@ -979,7 +978,7 @@ def _is_return_relative_path():
 
                     indexed_repeat_name_index = None
                     indexed_repeat_args = (
-                        function_args_regex.match(indexed_repeat.group())
+                        function_args_regex.search(indexed_repeat.group())
                         .group(1)
                         .split(",")
                     )

diff --git a/pyxform/utils.py b/pyxform/utils.py
@@ -9,8 +9,9 @@
 import os
 import re
 from collections import namedtuple
+from itertools import chain
 from json.decoder import JSONDecodeError
-from typing import List, Tuple
+from typing import Dict, List, Set, Tuple
 from xml.dom.minidom import Element, Text, parseString
 
 import openpyxl
@@ -25,6 +26,8 @@
 LAST_SAVED_INSTANCE_NAME = "__last-saved"
 BRACKETED_TAG_REGEX = re.compile(r"\${(last-saved#)?(.*?)}")
 LAST_SAVED_REGEX = re.compile(r"\${last-saved#(.*?)}")
+PYXFORM_REFERENCE_REGEX = re.compile(r"\$\{(.*?)\}")
+
 
 NSMAP = {
     "xmlns": "http://www.w3.org/2002/xforms",
@@ -143,6 +146,17 @@ def flatten(li):
             yield it
 
 
+def get_aliases_by_value(alias_dict: Dict[str, str], search_value: str) -> Set[str]:
+    """
+    Get the unique keys and values that match the alias value given in search_value.
+
+    Example: get_aliases_by_value(aliases.select, constants.SELECT_ONE).
+    """
+    return set(
+        chain.from_iterable(((k, v) for k, v in alias_dict.items() if v == search_value))
+    )
+
+
 def sheet_to_csv(workbook_path, csv_path, sheet_name):
     if workbook_path.endswith(".xls"):
         return xls_sheet_to_csv(workbook_path, csv_path, sheet_name)
@@ -182,7 +196,7 @@ def xls_sheet_to_csv(workbook_path, csv_path, sheet_name):
 def xlsx_sheet_to_csv(workbook_path, csv_path, sheet_name):
     wb = openpyxl.open(workbook_path, read_only=True, data_only=True)
     try:
-        sheet = wb.get_sheet_by_name(sheet_name)
+        sheet = wb[sheet_name]
     except KeyError:
         return False
 

diff --git a/pyxform/validators/pyxform/select_from_file_params.py b/pyxform/validators/pyxform/select_from_file_params.py
@@ -3,6 +3,8 @@
 from pyxform.constants import ROW_FORMAT_STRING
 from pyxform.errors import PyXFormError
 
+VALUE_OR_LABEL_TEST_REGEX = re.compile(r"^[a-zA-Z_][a-zA-Z0-9\-_\.]*$")
+
 
 def value_or_label_format_msg(name: str, row_number: int) -> str:
     return (
@@ -14,7 +16,7 @@ def value_or_label_format_msg(name: str, row_number: int) -> str:
 
 
 def value_or_label_test(value: str) -> bool:
-    query = re.search(r"^[a-zA-Z_][a-zA-Z0-9\-_\.]*$", value)
+    query = VALUE_OR_LABEL_TEST_REGEX.search(value)
     if query is None:
         return False
     else:

diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py
@@ -22,7 +22,11 @@
     validate_entity_saveto,
 )
 from pyxform.errors import PyXFormError
-from pyxform.utils import default_is_dynamic
+from pyxform.utils import (
+    PYXFORM_REFERENCE_REGEX,
+    default_is_dynamic,
+    get_aliases_by_value,
+)
 from pyxform.validators.pyxform import parameters_generic, select_from_file_params
 from pyxform.validators.pyxform.missing_translations_check import (
     missing_translations_check,
@@ -31,6 +35,7 @@
 from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag
 
 SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'}
+SEARCH_APPEARANCE_REGEX = re.compile(r"search\(.*?\)")
 
 
 def print_pyobj_to_json(pyobj, path=None):
@@ -322,6 +327,70 @@ def process_image_default(default_value):
     return default_value
 
 
+def add_choices_info_to_question(
+    question: Dict[str, Any],
+    list_name: str,
+    choices: Dict[str, list],
+    choice_filter: str,
+    file_extension: str,
+):
+    """
+    Add choices-related info to the question dict, e.g. itemset, list_name, choices, etc.
+
+    :param question: A dict with question details.
+    :param list_name: The choice list name for the question.
+    :param choices: The available choices in the survey.
+    :param choice_filter: The question's choice_filter, if any.
+    :param file_extension: The question's external select file_extension, if any.
+    :return: The updated question dict.
+    """
+    if choice_filter is None:
+        choice_filter = ""
+    if file_extension is None:
+        file_extension = ""
+    try:
+        is_search = bool(
+            SEARCH_APPEARANCE_REGEX.search(
+                question[constants.CONTROL][constants.APPEARANCE]
+            )
+        )
+    except (KeyError, TypeError):
+        is_search = False
+
+    # External selects from a "search" appearance alone don't work in Enketo. In Collect
+    # they must have the "item" elements in the body, rather than in an "itemset".
+    if not is_search:
+        question[constants.ITEMSET] = list_name
+
+    if choice_filter:
+        # External selects e.g. type = "select_one_external city".
+        select_one_external = get_aliases_by_value(
+            alias_dict=aliases.select, search_value="select one external"
+        )
+        if question[constants.TYPE] in select_one_external:
+            question["query"] = list_name
+        elif choices.get(list_name):
+            # Reference to list name for data dictionary tools (ilri/odktools).
+            question["list_name"] = list_name
+            # Copy choices for data export tools (onaio/onadata).
+            # TODO: could onadata use the list_name to look up the list for
+            #  export, instead of pyxform internally duplicating choices data?
+            question[constants.CHOICES] = choices[list_name]
+    elif not (
+        # Select with randomized choices.
+        (
+            constants.RANDOMIZE in question[constants.PARAMETERS]
+            and question[constants.PARAMETERS][constants.RANDOMIZE] == "true"
+        )
+        # Select from file e.g. type = "select_one_from_file cities.xml".
+        or file_extension in EXTERNAL_INSTANCE_EXTENSIONS
+        # Select from previous answers e.g. type = "select_one ${q1}".
+        or bool(PYXFORM_REFERENCE_REGEX.search(list_name))
+    ):
+        question["list_name"] = list_name
+        question[constants.CHOICES] = choices[list_name]
+
+
 def workbook_to_json(
     workbook_dict,
     form_name=None,
@@ -566,6 +635,9 @@ def workbook_to_json(
         workbook_dict.get(constants.OSM, []), aliases.list_header, True
     )
     osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME)
+    select_one_external = get_aliases_by_value(
+        alias_dict=aliases.select, search_value="select one external"
+    )
     # #################################
 
     # Parse the survey sheet while generating a survey in our json format:
@@ -1024,16 +1096,16 @@ def workbook_to_json(
             parse_dict = select_parse.groupdict()
             if parse_dict.get("select_command"):
                 select_type = aliases.select[parse_dict["select_command"]]
-                if select_type == "select one external" and "choice_filter" not in row:
+                if select_type in select_one_external and "choice_filter" not in row:
                     warnings.append(
                         ROW_FORMAT_STRING % row_number
                         + " select one external is only meant for"
                         " filtered selects."
                     )
                 list_name = parse_dict["list_name"]
-                list_file_name, file_extension = os.path.splitext(list_name)
+                file_extension = os.path.splitext(list_name)[1]
                 if (
-                    select_type == "select one external"
+                    select_type in select_one_external
                     and list_name not in external_choices
                 ):
                     if not external_choices:
@@ -1054,9 +1126,9 @@ def workbook_to_json(
                     )
                 if (
                     list_name not in choices
-                    and select_type != "select one external"
+                    and select_type not in select_one_external
                     and file_extension not in EXTERNAL_INSTANCE_EXTENSIONS
-                    and not re.match(r"\$\{(.*?)\}", list_name)
+                    and not PYXFORM_REFERENCE_REGEX.search(list_name)
                 ):
                     if not choices:
                         k = constants.CHOICES
@@ -1146,49 +1218,33 @@ def workbook_to_json(
                         row_number=row_number,
                     )
 
-                new_json_dict["parameters"] = parameters
+                new_json_dict[constants.PARAMETERS] = parameters
 
-                # Always generate secondary instance for selects.
-                new_json_dict["itemset"] = list_name
-                # Only add the choice if it's being used.
-                if list_name in choices:
+                add_choices_info_to_question(
+                    question=new_json_dict,
+                    list_name=list_name,
+                    choices=choices,
+                    choice_filter=row.get(constants.CHOICE_FILTER),
+                    file_extension=file_extension,
+                )
+                # Add the choice to the survey choices if it's being used in an itemset.
+                if (
+                    constants.ITEMSET in new_json_dict
+                    and new_json_dict[constants.ITEMSET] == list_name
+                    and list_name in choices
+                ):
                     # Initialise choices output if none added already.
                     if constants.CHOICES not in json_dict:
                         json_dict[constants.CHOICES] = {}
                     json_dict[constants.CHOICES][list_name] = choices[list_name]
 
-                if row.get("choice_filter"):
-                    # External selects e.g. type = "select_one_external city".
-                    if select_type == "select one external":
-                        new_json_dict["query"] = list_name
-                    elif choices.get(list_name):
-                        # Reference to list name for data dictionary tools (ilri/odktools).
-                        new_json_dict["list_name"] = list_name
-                        # Copy choices for data export tools (onaio/onadata).
-                        # TODO: could onadata use the list_name to look up the list for
-                        #  export, instead of pyxform internally duplicating choices data?
-                        new_json_dict[constants.CHOICES] = choices[list_name]
-                elif not (
-                    # Select with randomized choices.
-                    (
-                        "randomize" in parameters.keys()
-                        and parameters["randomize"] == "true"
-                    )
-                    # Select from file e.g. type = "select_one_from_file cities.xml".
-                    or file_extension in EXTERNAL_INSTANCE_EXTENSIONS
-                    # Select from previous answers e.g. type = "select_one ${q1}".
-                    or bool(re.match(r"\$\{(.*?)\}", list_name))
-                ):
-                    new_json_dict["list_name"] = list_name
-                    new_json_dict[constants.CHOICES] = choices[list_name]
-
                 # Code to deal with table_list appearance flags
                 # (for groups of selects)
                 if table_list is not None:
                     # Then this row is the first select in a table list
                     if not isinstance(table_list, str):
                         table_list = list_name
-                        if row.get("choice_filter", None) is not None:
+                        if row.get(constants.CHOICE_FILTER, None) is not None:
                             msg = (
                                 ROW_FORMAT_STRING % row_number
                                 + " Choice filter not supported for table-list appearance."
@@ -1199,9 +1255,9 @@ def workbook_to_json(
                             constants.NAME: "reserved_name_for_field_list_labels_"
                             + str(row_number),
                             # Adding row number for uniqueness # noqa
-                            constants.CONTROL: {"appearance": "label"},
+                            constants.CONTROL: {constants.APPEARANCE: "label"},
                             constants.CHOICES: choices[list_name],
-                            "itemset": list_name,
+                            constants.ITEMSET: list_name,
                         }
                         parent_children_array.append(table_list_header)
 
@@ -1213,8 +1269,11 @@ def workbook_to_json(
                         )
                         raise PyXFormError(error_message)
 
-                    control = new_json_dict["control"] = new_json_dict.get("control", {})
-                    control["appearance"] = "list-nolabel"
+                    if constants.CONTROL not in new_json_dict:
+                        new_json_dict[constants.CONTROL] = {}
+                    new_json_dict[constants.CONTROL][
+                        constants.APPEARANCE
+                    ] = constants.LIST_NOLABEL
                 parent_children_array.append(new_json_dict)
                 if specify_other_question:
                     parent_children_array.append(specify_other_question)