Skip to content

Commit

Permalink
fix: exclude select with appearance=search() from itemset, test updates
Browse files Browse the repository at this point in the history
- move choices logic off to new function add_choices_info_to_question()
  - exclude select with appearance=search() from itemset (per comment)
  - new func for readability and to clarify what info is used to
    determine choices structures
- survey.py: skip adding translations for choices, instead of just for
  choice_filter. Use type "Option" to detect choice elements which
  should be more reliable. Move get_xpath lookup under if condition so
  it's only called if needed.
- xls2json.py: fix lookup of select type "select one external" which
  has an alias "select_one_external". Docs use the underscore alias
  but the implementation is looking for the spaces alias.
- xlsform_test_case/base.py: simplify reporter function
- xlsform_spec_test.py: simplify repetitive tests by adding test func.
  Ideally these should be re-implemented as PyxformTestCase, but they
  cover a lot of scenarios so for now just simplifying the structure.
- test_expected_output/*.xml: update expected XML to match current
  behaviour, generally: 1) update body to refer to instance, 2) add new
  instances in model, 3) add new translations in itext, 4) remove old
  itext from translations.
- Misc minor fixes:
  - utils.py: fix deprecation warning for get_sheet_by_name.
  - replace in-line regex with pre-compiled regex, and replace match
    (only looks at start of string) with search (looks at whole string).
  - add more constants for commonly-used internal strings so that it's
    easier to find where they are used, added usages to xls2json.py.
  • Loading branch information
lindsay-stevens committed May 5, 2023
1 parent a8107b5 commit 8f72e42
Show file tree
Hide file tree
Showing 12 changed files with 519 additions and 955 deletions.
5 changes: 5 additions & 0 deletions pyxform/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@
MEDIA = "media"
CONTROL = "control"
APPEARANCE = "appearance"
ITEMSET = "itemset"
RANDOMIZE = "randomize"
CHOICE_FILTER = "choice_filter"
PARAMETERS = "parameters"

LOOP = "loop"
COLUMNS = "columns"
Expand All @@ -65,6 +69,7 @@
CASCADING_SELECT = "cascading_select"
TABLE_LIST = "table-list" # hyphenated because it goes in appearance, and convention for appearance column is dashes # noqa
FIELD_LIST = "field-list"
LIST_NOLABEL = "list-nolabel"

SURVEY = "survey"
SETTINGS = "settings"
Expand Down
12 changes: 9 additions & 3 deletions pyxform/question.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
XForm Survey element classes for different question types.
"""
import os.path
import re

from pyxform.constants import (
EXTERNAL_CHOICES_ITEMSET_REF_LABEL,
Expand All @@ -15,7 +14,12 @@
from pyxform.errors import PyXFormError
from pyxform.question_type_dictionary import QUESTION_TYPE_DICT
from pyxform.survey_element import SurveyElement
from pyxform.utils import default_is_dynamic, has_dynamic_label, node
from pyxform.utils import (
PYXFORM_REFERENCE_REGEX,
default_is_dynamic,
has_dynamic_label,
node,
)


class Question(SurveyElement):
Expand Down Expand Up @@ -225,7 +229,9 @@ def build_xml(self):

has_media = False
has_dyn_label = False
is_previous_question = bool(re.match(r"^\${.*}$", self.get("itemset")))
is_previous_question = bool(
PYXFORM_REFERENCE_REGEX.search(self.get("itemset"))
)

if choices.get(itemset):
has_media = bool(choices[itemset][0].get("media"))
Expand Down
19 changes: 9 additions & 10 deletions pyxform/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pyxform.external_instance import ExternalInstance
from pyxform.instance import SurveyInstance
from pyxform.instance_info import InstanceInfo
from pyxform.question import Question
from pyxform.question import Option, Question
from pyxform.section import Section
from pyxform.survey_element import SurveyElement
from pyxform.utils import (
Expand Down Expand Up @@ -240,7 +240,7 @@ def xml(self):
self._setup_xpath_dictionary()

for triggering_reference in self.setvalues_by_triggering_ref.keys():
if not (re.match(BRACKETED_TAG_REGEX, triggering_reference)):
if not (re.search(BRACKETED_TAG_REGEX, triggering_reference)):
raise PyXFormError(
"Only references to other fields are allowed in the 'trigger' column."
)
Expand Down Expand Up @@ -768,14 +768,13 @@ def _set_up_media_translations(media_dict, translation_key):
self._translations = defaultdict(dict) # pylint: disable=W0201

for survey_element in self.iter_descendants():
# Skip set up of media for choices in filtered selects.
# Translations for the media content should have been set up
# in _setup_translations
parent = survey_element.get("parent")
if parent and not parent.get("choice_filter"):
translation_key = survey_element.get_xpath() + ":label"
# Skip set up of media for choices in selects. Translations for their media
# content should have been set up in _setup_translations, with one copy of
# each choice translation per language (after _add_empty_translations).
if not isinstance(survey_element, Option):
media_dict = survey_element.get("media")
if isinstance(media_dict, dict):
if isinstance(media_dict, dict) and 0 < len(media_dict):
translation_key = survey_element.get_xpath() + ":label"
_set_up_media_translations(media_dict, translation_key)

def itext(self):
Expand Down Expand Up @@ -979,7 +978,7 @@ def _is_return_relative_path():

indexed_repeat_name_index = None
indexed_repeat_args = (
function_args_regex.match(indexed_repeat.group())
function_args_regex.search(indexed_repeat.group())
.group(1)
.split(",")
)
Expand Down
18 changes: 16 additions & 2 deletions pyxform/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
import os
import re
from collections import namedtuple
from itertools import chain
from json.decoder import JSONDecodeError
from typing import List, Tuple
from typing import Dict, List, Set, Tuple
from xml.dom.minidom import Element, Text, parseString

import openpyxl
Expand All @@ -25,6 +26,8 @@
LAST_SAVED_INSTANCE_NAME = "__last-saved"
BRACKETED_TAG_REGEX = re.compile(r"\${(last-saved#)?(.*?)}")
LAST_SAVED_REGEX = re.compile(r"\${last-saved#(.*?)}")
PYXFORM_REFERENCE_REGEX = re.compile(r"\$\{(.*?)\}")


NSMAP = {
"xmlns": "http://www.w3.org/2002/xforms",
Expand Down Expand Up @@ -143,6 +146,17 @@ def flatten(li):
yield it


def get_aliases_by_value(alias_dict: Dict[str, str], search_value: str) -> Set[str]:
"""
Get the unique keys and values that match the alias value given in search_value.
Example: get_aliases_by_value(aliases.select, constants.SELECT_ONE).
"""
return set(
chain.from_iterable(((k, v) for k, v in alias_dict.items() if v == search_value))
)


def sheet_to_csv(workbook_path, csv_path, sheet_name):
if workbook_path.endswith(".xls"):
return xls_sheet_to_csv(workbook_path, csv_path, sheet_name)
Expand Down Expand Up @@ -182,7 +196,7 @@ def xls_sheet_to_csv(workbook_path, csv_path, sheet_name):
def xlsx_sheet_to_csv(workbook_path, csv_path, sheet_name):
wb = openpyxl.open(workbook_path, read_only=True, data_only=True)
try:
sheet = wb.get_sheet_by_name(sheet_name)
sheet = wb[sheet_name]
except KeyError:
return False

Expand Down
4 changes: 3 additions & 1 deletion pyxform/validators/pyxform/select_from_file_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from pyxform.constants import ROW_FORMAT_STRING
from pyxform.errors import PyXFormError

VALUE_OR_LABEL_TEST_REGEX = re.compile(r"^[a-zA-Z_][a-zA-Z0-9\-_\.]*$")


def value_or_label_format_msg(name: str, row_number: int) -> str:
return (
Expand All @@ -14,7 +16,7 @@ def value_or_label_format_msg(name: str, row_number: int) -> str:


def value_or_label_test(value: str) -> bool:
query = re.search(r"^[a-zA-Z_][a-zA-Z0-9\-_\.]*$", value)
query = VALUE_OR_LABEL_TEST_REGEX.search(value)
if query is None:
return False
else:
Expand Down
141 changes: 100 additions & 41 deletions pyxform/xls2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@
validate_entity_saveto,
)
from pyxform.errors import PyXFormError
from pyxform.utils import default_is_dynamic
from pyxform.utils import (
PYXFORM_REFERENCE_REGEX,
default_is_dynamic,
get_aliases_by_value,
)
from pyxform.validators.pyxform import parameters_generic, select_from_file_params
from pyxform.validators.pyxform.missing_translations_check import (
missing_translations_check,
Expand All @@ -31,6 +35,7 @@
from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag

SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'}
SEARCH_APPEARANCE_REGEX = re.compile(r"search\(.*?\)")


def print_pyobj_to_json(pyobj, path=None):
Expand Down Expand Up @@ -322,6 +327,70 @@ def process_image_default(default_value):
return default_value


def add_choices_info_to_question(
question: Dict[str, Any],
list_name: str,
choices: Dict[str, list],
choice_filter: str,
file_extension: str,
):
"""
Add choices-related info to the question dict, e.g. itemset, list_name, choices, etc.
:param question: A dict with question details.
:param list_name: The choice list name for the question.
:param choices: The available choices in the survey.
:param choice_filter: The question's choice_filter, if any.
:param file_extension: The question's external select file_extension, if any.
:return: The updated question dict.
"""
if choice_filter is None:
choice_filter = ""
if file_extension is None:
file_extension = ""
try:
is_search = bool(
SEARCH_APPEARANCE_REGEX.search(
question[constants.CONTROL][constants.APPEARANCE]
)
)
except (KeyError, TypeError):
is_search = False

# External selects from a "search" appearance alone don't work in Enketo. In Collect
# they must have the "item" elements in the body, rather than in an "itemset".
if not is_search:
question[constants.ITEMSET] = list_name

if choice_filter:
# External selects e.g. type = "select_one_external city".
select_one_external = get_aliases_by_value(
alias_dict=aliases.select, search_value="select one external"
)
if question[constants.TYPE] in select_one_external:
question["query"] = list_name
elif choices.get(list_name):
# Reference to list name for data dictionary tools (ilri/odktools).
question["list_name"] = list_name
# Copy choices for data export tools (onaio/onadata).
# TODO: could onadata use the list_name to look up the list for
# export, instead of pyxform internally duplicating choices data?
question[constants.CHOICES] = choices[list_name]
elif not (
# Select with randomized choices.
(
constants.RANDOMIZE in question[constants.PARAMETERS]
and question[constants.PARAMETERS][constants.RANDOMIZE] == "true"
)
# Select from file e.g. type = "select_one_from_file cities.xml".
or file_extension in EXTERNAL_INSTANCE_EXTENSIONS
# Select from previous answers e.g. type = "select_one ${q1}".
or bool(PYXFORM_REFERENCE_REGEX.search(list_name))
):
question["list_name"] = list_name
question[constants.CHOICES] = choices[list_name]


def workbook_to_json(
workbook_dict,
form_name=None,
Expand Down Expand Up @@ -566,6 +635,9 @@ def workbook_to_json(
workbook_dict.get(constants.OSM, []), aliases.list_header, True
)
osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME)
select_one_external = get_aliases_by_value(
alias_dict=aliases.select, search_value="select one external"
)
# #################################

# Parse the survey sheet while generating a survey in our json format:
Expand Down Expand Up @@ -1024,16 +1096,16 @@ def workbook_to_json(
parse_dict = select_parse.groupdict()
if parse_dict.get("select_command"):
select_type = aliases.select[parse_dict["select_command"]]
if select_type == "select one external" and "choice_filter" not in row:
if select_type in select_one_external and "choice_filter" not in row:
warnings.append(
ROW_FORMAT_STRING % row_number
+ " select one external is only meant for"
" filtered selects."
)
list_name = parse_dict["list_name"]
list_file_name, file_extension = os.path.splitext(list_name)
file_extension = os.path.splitext(list_name)[1]
if (
select_type == "select one external"
select_type in select_one_external
and list_name not in external_choices
):
if not external_choices:
Expand All @@ -1054,9 +1126,9 @@ def workbook_to_json(
)
if (
list_name not in choices
and select_type != "select one external"
and select_type not in select_one_external
and file_extension not in EXTERNAL_INSTANCE_EXTENSIONS
and not re.match(r"\$\{(.*?)\}", list_name)
and not PYXFORM_REFERENCE_REGEX.search(list_name)
):
if not choices:
k = constants.CHOICES
Expand Down Expand Up @@ -1146,49 +1218,33 @@ def workbook_to_json(
row_number=row_number,
)

new_json_dict["parameters"] = parameters
new_json_dict[constants.PARAMETERS] = parameters

# Always generate secondary instance for selects.
new_json_dict["itemset"] = list_name
# Only add the choice if it's being used.
if list_name in choices:
add_choices_info_to_question(
question=new_json_dict,
list_name=list_name,
choices=choices,
choice_filter=row.get(constants.CHOICE_FILTER),
file_extension=file_extension,
)
# Add the choice to the survey choices if it's being used in an itemset.
if (
constants.ITEMSET in new_json_dict
and new_json_dict[constants.ITEMSET] == list_name
and list_name in choices
):
# Initialise choices output if none added already.
if constants.CHOICES not in json_dict:
json_dict[constants.CHOICES] = {}
json_dict[constants.CHOICES][list_name] = choices[list_name]

if row.get("choice_filter"):
# External selects e.g. type = "select_one_external city".
if select_type == "select one external":
new_json_dict["query"] = list_name
elif choices.get(list_name):
# Reference to list name for data dictionary tools (ilri/odktools).
new_json_dict["list_name"] = list_name
# Copy choices for data export tools (onaio/onadata).
# TODO: could onadata use the list_name to look up the list for
# export, instead of pyxform internally duplicating choices data?
new_json_dict[constants.CHOICES] = choices[list_name]
elif not (
# Select with randomized choices.
(
"randomize" in parameters.keys()
and parameters["randomize"] == "true"
)
# Select from file e.g. type = "select_one_from_file cities.xml".
or file_extension in EXTERNAL_INSTANCE_EXTENSIONS
# Select from previous answers e.g. type = "select_one ${q1}".
or bool(re.match(r"\$\{(.*?)\}", list_name))
):
new_json_dict["list_name"] = list_name
new_json_dict[constants.CHOICES] = choices[list_name]

# Code to deal with table_list appearance flags
# (for groups of selects)
if table_list is not None:
# Then this row is the first select in a table list
if not isinstance(table_list, str):
table_list = list_name
if row.get("choice_filter", None) is not None:
if row.get(constants.CHOICE_FILTER, None) is not None:
msg = (
ROW_FORMAT_STRING % row_number
+ " Choice filter not supported for table-list appearance."
Expand All @@ -1199,9 +1255,9 @@ def workbook_to_json(
constants.NAME: "reserved_name_for_field_list_labels_"
+ str(row_number),
# Adding row number for uniqueness # noqa
constants.CONTROL: {"appearance": "label"},
constants.CONTROL: {constants.APPEARANCE: "label"},
constants.CHOICES: choices[list_name],
"itemset": list_name,
constants.ITEMSET: list_name,
}
parent_children_array.append(table_list_header)

Expand All @@ -1213,8 +1269,11 @@ def workbook_to_json(
)
raise PyXFormError(error_message)

control = new_json_dict["control"] = new_json_dict.get("control", {})
control["appearance"] = "list-nolabel"
if constants.CONTROL not in new_json_dict:
new_json_dict[constants.CONTROL] = {}
new_json_dict[constants.CONTROL][
constants.APPEARANCE
] = constants.LIST_NOLABEL
parent_children_array.append(new_json_dict)
if specify_other_question:
parent_children_array.append(specify_other_question)
Expand Down
Loading

0 comments on commit 8f72e42

Please sign in to comment.