From 42bb2d6733c1fd47694ede0e842dfb55084a7779 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Fri, 15 Nov 2024 03:14:53 +1100 Subject: [PATCH 01/15] fix: poor performance with large and complex forms with many references - builder.py - replace "copy_json_dict" with copy.deepcopy since that's what it seems to be doing (probably less efficiently) - question.py - for each SurveyElement subclass that overrides __init__, ensure that it calls the parent / super().__init__ (and calls it last) - to allow the above, rearrange the choices/children/tags into just "children" so that the base __init__ can add them. - generally the goal is to avoid use cases where the dict info from the XLSForm spec is modified after __init__ to help with the below - survey_element.py - avoid recursive calls through __getattr__ by using the base class dict.get when those keys are known to exist and aren't in FIELDS - get the default info from the question_type_dict once at init time, instead of potentially looking it up on every call to __getattr__ - avoid many calls to get_lineage by caching the xpath after it is calculated once, and invalidate the cache if the parent changes (although this doesn't seem to ever happen during test runs). - avoid many calls to iter_descendants by caching the result of "any_repeat" (dict can be quite large since it's every element). - This does not seem like the optimal solution yet since there are many related recursive calls that could be avoided - Also this function is only used for the survey class so it would make sense to move it there. - for the form described in central/#171, the form conversion total time is reduced from about 5 minutes to about 45 seconds. Resident memory usage about 43MB baseline and 150MB after conversion. --- pyxform/builder.py | 25 +------------ pyxform/question.py | 60 ++++++++++++-------------------- pyxform/survey_element.py | 56 ++++++++++++++++++++++------- pyxform/utils.py | 16 +++++++++ tests/parsing/test_expression.py | 3 ++ tests/test_j2x_creation.py | 24 ------------- 6 files changed, 87 insertions(+), 97 deletions(-) diff --git a/pyxform/builder.py b/pyxform/builder.py index 0ade9150..b1a711b8 100644 --- a/pyxform/builder.py +++ b/pyxform/builder.py @@ -52,29 +52,6 @@ } -def copy_json_dict(json_dict): - """ - Returns a deep copy of the input json_dict - """ - json_dict_copy = None - items = None - - if isinstance(json_dict, list): - json_dict_copy = [None] * len(json_dict) - items = enumerate(json_dict) - elif isinstance(json_dict, dict): - json_dict_copy = {} - items = json_dict.items() - - for key, value in items: - if isinstance(value, dict | list): - json_dict_copy[key] = copy_json_dict(value) - else: - json_dict_copy[key] = value - - return json_dict_copy - - class SurveyElementBuilder: def __init__(self, **kwargs): # I don't know why we would need an explicit none option for @@ -143,7 +120,7 @@ def create_survey_element_from_dict( else: self._save_trigger(d=d) return self._create_question_from_dict( - d, copy_json_dict(QUESTION_TYPE_DICT), self._add_none_option + d, copy.deepcopy(QUESTION_TYPE_DICT), self._add_none_option ) def _save_trigger(self, d: dict) -> None: diff --git a/pyxform/question.py b/pyxform/question.py index 6c8f6a91..1962fb00 100644 --- a/pyxform/question.py +++ b/pyxform/question.py @@ -17,6 +17,7 @@ from pyxform.utils import ( PYXFORM_REFERENCE_REGEX, DetachableElement, + combine_lists, default_is_dynamic, node, ) @@ -189,21 +190,17 @@ def _translation_path(self, display_element): class MultipleChoiceQuestion(Question): def __init__(self, **kwargs): - kwargs_copy = kwargs.copy() # Notice that choices can be specified under choices or children. # I'm going to try to stick to just choices. # Aliases in the json format will make it more difficult # to use going forward. - choices = list(kwargs_copy.pop("choices", [])) + list( - kwargs_copy.pop("children", []) - ) - Question.__init__(self, **kwargs_copy) - for choice in choices: - self.add_choice(**choice) - - def add_choice(self, **kwargs): - option = Option(**kwargs) - self.add_child(option) + kwargs["children"] = [ + Option(**c) + for c in combine_lists( + a=kwargs.pop("choices", None), b=kwargs.pop("children", None) + ) + ] + super().__init__(**kwargs) def validate(self): Question.validate(self) @@ -320,23 +317,19 @@ def build_xml(self): class SelectOneQuestion(MultipleChoiceQuestion): def __init__(self, **kwargs): - super().__init__(**kwargs) self._dict[self.TYPE] = "select one" + super().__init__(**kwargs) class Tag(SurveyElement): def __init__(self, **kwargs): - kwargs_copy = kwargs.copy() - choices = kwargs_copy.pop("choices", []) + kwargs_copy.pop("children", []) - - super().__init__(**kwargs_copy) - - if choices: - self.children = [] - - for choice in choices: - option = Option(**choice) - self.add_child(option) + kwargs["children"] = [ + Option(**c) + for c in combine_lists( + a=kwargs.pop("choices", None), b=kwargs.pop("children", None) + ) + ] + super().__init__(**kwargs) def xml(self): result = node("tag", key=self.name) @@ -356,20 +349,13 @@ def xml_control(self): class OsmUploadQuestion(UploadQuestion): def __init__(self, **kwargs): - kwargs_copy = kwargs.copy() - tags = kwargs_copy.pop("tags", []) + kwargs_copy.pop("children", []) - - super().__init__(**kwargs_copy) - - if tags: - self.children = [] - - for tag in tags: - self.add_tag(**tag) - - def add_tag(self, **kwargs): - tag = Tag(**kwargs) - self.add_child(tag) + kwargs["children"] = [ + Tag(**c) + for c in combine_lists( + a=kwargs.pop("tags", None), b=kwargs.pop("children", None) + ) + ] + super().__init__(**kwargs) def build_xml(self): control_dict = self.control diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py index 3468f92f..007b9169 100644 --- a/pyxform/survey_element.py +++ b/pyxform/survey_element.py @@ -75,6 +75,13 @@ def any_repeat(survey_element: "SurveyElement", parent_xpath: str) -> bool: return False +SURVEY_ELEMENT_LOCAL_KEYS = { + "_survey_element_default_type", + "_survey_element_repeats", + "_survey_element_xpath", +} + + class SurveyElement(dict): """ SurveyElement is the base class we'll looks for the following keys @@ -85,31 +92,40 @@ class SurveyElement(dict): __name__ = "SurveyElement" FIELDS: ClassVar[dict[str, Any]] = FIELDS.copy() - def _default(self): - # TODO: need way to override question type dictionary - defaults = QUESTION_TYPE_DICT - return defaults.get(self.get("type"), {}) + def _dict_get(self, key): + """Built-in dict.get to bypass __getattr__""" + return dict.get(self, key) def __getattr__(self, key): """ Get attributes from FIELDS rather than the class. """ if key in self.FIELDS: - question_type_dict = self._default() - under = question_type_dict.get(key, None) - over = self.get(key) + under = None + default = self._dict_get("_survey_element_default_type") + if default is not None: + under = default.get(key, None) if not under: - return over - return _overlay(over, under) + return self._dict_get(key) + return _overlay(self._dict_get(key), under) raise AttributeError(key) def __hash__(self): return hash(id(self)) def __setattr__(self, key, value): + if key == "parent": + # Object graph local changes invalidate these cached facts. + self._survey_element_repeats = {} + self._survey_element_xpath = None self[key] = value def __init__(self, **kwargs): + self._survey_element_default_type: dict = kwargs.get( + "question_type_dict", QUESTION_TYPE_DICT + ).get(kwargs.get("type"), {}) + self._survey_element_xpath: str | None = None + self._survey_element_repeats: dict = {} for key, default in self.FIELDS.items(): self[key] = kwargs.get(key, default()) self._link_children() @@ -120,6 +136,7 @@ def __init__(self, **kwargs): # themselves. if self.control.get("appearance") == "label" and not self.label: self["label"] = " " + super().__init__() def _link_children(self): for child in self.children: @@ -161,7 +178,12 @@ def iter_descendants(self): def any_repeat(self, parent_xpath: str) -> bool: """Return True if there ia any repeat in `parent_xpath`.""" - return any_repeat(survey_element=self, parent_xpath=parent_xpath) + current_value = self._dict_get("_survey_element_repeats") + if parent_xpath not in current_value: + new_value = any_repeat(survey_element=self, parent_xpath=parent_xpath) + current_value[parent_xpath] = new_value + return new_value + return current_value[parent_xpath] def get_lineage(self): """ @@ -184,7 +206,12 @@ def get_xpath(self): """ Return the xpath of this survey element. """ - return "/".join([""] + [n.name for n in self.get_lineage()]) + current_value = self._dict_get("_survey_element_xpath") + if current_value is None: + new_value = f'/{"/".join(n.name for n in self.get_lineage())}' + self._survey_element_xpath = new_value + return new_value + return current_value def get_abbreviated_xpath(self): lineage = self.get_lineage() @@ -213,7 +240,12 @@ def to_json_dict(self): """ self.validate() result = self.copy() - to_delete = ["parent", "question_type_dictionary", "_created"] + to_delete = [ + "parent", + "question_type_dictionary", + "_created", + *SURVEY_ELEMENT_LOCAL_KEYS, + ] # Delete all keys that may cause a "Circular Reference" # error while converting the result to JSON self._delete_keys_from_dict(result, to_delete) diff --git a/pyxform/utils.py b/pyxform/utils.py index 37e5a849..d00e7d95 100644 --- a/pyxform/utils.py +++ b/pyxform/utils.py @@ -335,3 +335,19 @@ def levenshtein_distance(a: str, b: str) -> int: def coalesce(*args): return next((a for a in args if a is not None), None) + + +def combine_lists( + a: list[dict] | None = None, + b: list[dict] | None = None, +) -> list[dict]: + """Get the list that is not None, or both lists combined, or an empty list.""" + if a is None and b is None: + combined = [] + elif a is None: + combined = b + elif b is None: + combined = a + else: + combined = a + b + return combined diff --git a/tests/parsing/test_expression.py b/tests/parsing/test_expression.py index 1fe3ad42..ca8dc4c9 100644 --- a/tests/parsing/test_expression.py +++ b/tests/parsing/test_expression.py @@ -31,6 +31,9 @@ ("name with space", "Invalid character (space)"), ("na@me", "Invalid character (@)"), ("na#me", "Invalid character (#)"), + ("name:.name", "Invalid character (in local name)"), + ("-name:name", "Invalid character (in namespace)"), + ("$name:@name", "Invalid character (in both names)"), ("name:name:name", "Invalid structure (multiple colons)"), ] diff --git a/tests/test_j2x_creation.py b/tests/test_j2x_creation.py index c0d7b41c..72761275 100644 --- a/tests/test_j2x_creation.py +++ b/tests/test_j2x_creation.py @@ -10,30 +10,6 @@ class Json2XformVerboseSurveyCreationTests(TestCase): - def test_survey_can_be_created_in_a_verbose_manner(self): - s = Survey() - s.name = "simple_survey" - - q = MultipleChoiceQuestion() - q.name = "cow_color" - q.type = "select one" - - q.add_choice(label="Green", name="green") - s.add_child(q) - - expected_dict = { - "name": "simple_survey", - "children": [ - { - "name": "cow_color", - "type": "select one", - "children": [{"label": "Green", "name": "green"}], - } - ], - } - self.maxDiff = None - self.assertEqual(s.to_json_dict(), expected_dict) - def test_survey_can_be_created_in_a_slightly_less_verbose_manner(self): option_dict_array = [ {"name": "red", "label": "Red"}, From 3a9d0bb9ddcd118ce740b256f48b6e27ad07bcf1 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Fri, 15 Nov 2024 07:09:53 +1100 Subject: [PATCH 02/15] fix: poor performance with large and complex forms with many references - instance.py - gather the survey xpaths since _xpath now tracks the objects - survey.py - when flattening the objects in _setup_xpath_dict, keep the object references instead of just the xpath, for future use - skip calling the heavily recursive share_same_repeat_parent if the target and context items have no common ancestor that is a repeat - incorporate "any_repeat" func body into is_parent_a_repeat since it is not re-used elsewhere and to avoid an extra lru_cache. - survey_element.py - add method to iterate ancestors, to find common items for relative references, rather than iterating down from the survey/root - add method to find nearest common ancestor repeat (if any). Currently only used to discern objects with no such ancestor but could be developed further to replace "share_same_repeat_parent". --- pyxform/instance.py | 2 +- pyxform/survey.py | 33 +++++++++------ pyxform/survey_element.py | 89 ++++++++++++++++++++++++++++----------- 3 files changed, 84 insertions(+), 40 deletions(-) diff --git a/pyxform/instance.py b/pyxform/instance.py index 17b77f9f..ae47fedb 100644 --- a/pyxform/instance.py +++ b/pyxform/instance.py @@ -22,7 +22,7 @@ def __init__(self, survey_object, **kwargs): # get xpaths # - prep for xpaths. self._survey.xml() - self._xpaths = self._survey._xpath.values() + self._xpaths = [x.get_xpath() for x in self._survey._xpath.values()] # see "answers(self):" below for explanation of this dict self._answers = {} diff --git a/pyxform/survey.py b/pyxform/survey.py index 2b6e21ed..80171af5 100644 --- a/pyxform/survey.py +++ b/pyxform/survey.py @@ -84,8 +84,9 @@ def is_parent_a_repeat(survey, xpath): if not parent_xpath: return False - if survey.any_repeat(parent_xpath): - return parent_xpath + for item in survey.iter_descendants(): + if item.type == constants.REPEAT and item.get_xpath() == parent_xpath: + return parent_xpath return is_parent_a_repeat(survey, parent_xpath) @@ -98,7 +99,7 @@ def share_same_repeat_parent(survey, xpath, context_xpath, reference_parent=Fals parent. For example, - xpath = /data/repeat_a/group_a/name + xpath = /data/repeat_a/group_a/name context_xpath = /data/repeat_a/group_b/age returns (2, '/group_a/name')' @@ -995,13 +996,13 @@ def __unicode__(self): return f"" def _setup_xpath_dictionary(self): - self._xpath = {} # pylint: disable=attribute-defined-outside-init + self._xpath: dict[str, SurveyElement | None] = {} # pylint: disable=attribute-defined-outside-init for element in self.iter_descendants(): if isinstance(element, Question | Section): if element.name in self._xpath: self._xpath[element.name] = None else: - self._xpath[element.name] = element.get_xpath() + self._xpath[element.name] = element def _var_repl_function( self, matchobj, context, use_current=False, reference_parent=False @@ -1036,7 +1037,7 @@ def _in_secondary_instance_predicate() -> bool: def _relative_path(ref_name: str, _use_current: bool) -> str | None: """Given name in ${name}, return relative xpath to ${name}.""" return_path = None - xpath = self._xpath[ref_name] + xpath = self._xpath[ref_name].get_xpath() context_xpath = context.get_xpath() # share same root i.e repeat_a from /data/repeat_a/... if ( @@ -1045,13 +1046,17 @@ def _relative_path(ref_name: str, _use_current: bool) -> str | None: ): # if context xpath and target xpath fall under the same # repeat use relative xpath referencing. - steps, ref_path = share_same_repeat_parent( - self, xpath, context_xpath, reference_parent - ) - if steps: - ref_path = ref_path if ref_path.endswith(ref_name) else f"/{name}" - prefix = " current()/" if _use_current else " " - return_path = prefix + "/".join([".."] * steps) + ref_path + " " + relation = context.has_common_repeat_parent(self._xpath[ref_name]) + if relation[0] == "Unrelated": + return return_path + else: + steps, ref_path = share_same_repeat_parent( + self, xpath, context_xpath, reference_parent + ) + if steps: + ref_path = ref_path if ref_path.endswith(ref_name) else f"/{name}" + prefix = " current()/" if _use_current else " " + return_path = prefix + "/".join([".."] * steps) + ref_path + " " return return_path @@ -1122,7 +1127,7 @@ def _is_return_relative_path() -> bool: last_saved_prefix = ( "instance('" + LAST_SAVED_INSTANCE_NAME + "')" if last_saved else "" ) - return " " + last_saved_prefix + self._xpath[name] + " " + return " " + last_saved_prefix + self._xpath[name].get_xpath() + " " def insert_xpaths(self, text, context, use_current=False, reference_parent=False): """ diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py index 007b9169..2328296b 100644 --- a/pyxform/survey_element.py +++ b/pyxform/survey_element.py @@ -5,8 +5,8 @@ import json import re from collections import deque -from functools import lru_cache -from typing import TYPE_CHECKING, Any, ClassVar +from collections.abc import Generator +from typing import TYPE_CHECKING, Any, ClassVar, Optional from pyxform import aliases as alias from pyxform import constants as const @@ -65,19 +65,8 @@ def _overlay(over, under): return over if over else under -@lru_cache(maxsize=65536) -def any_repeat(survey_element: "SurveyElement", parent_xpath: str) -> bool: - """Return True if there ia any repeat in `parent_xpath`.""" - for item in survey_element.iter_descendants(): - if item.get_xpath() == parent_xpath and item.type == const.REPEAT: - return True - - return False - - -SURVEY_ELEMENT_LOCAL_KEYS = { +SURVEY_ELEMENT_LOCAL_KEYS = { "_survey_element_default_type", - "_survey_element_repeats", "_survey_element_xpath", } @@ -115,8 +104,7 @@ def __hash__(self): def __setattr__(self, key, value): if key == "parent": - # Object graph local changes invalidate these cached facts. - self._survey_element_repeats = {} + # If object graph position changes then invalidate cached. self._survey_element_xpath = None self[key] = value @@ -125,7 +113,6 @@ def __init__(self, **kwargs): "question_type_dict", QUESTION_TYPE_DICT ).get(kwargs.get("type"), {}) self._survey_element_xpath: str | None = None - self._survey_element_repeats: dict = {} for key, default in self.FIELDS.items(): self[key] = kwargs.get(key, default()) self._link_children() @@ -176,14 +163,65 @@ def iter_descendants(self): for e in self.children: yield from e.iter_descendants() - def any_repeat(self, parent_xpath: str) -> bool: - """Return True if there ia any repeat in `parent_xpath`.""" - current_value = self._dict_get("_survey_element_repeats") - if parent_xpath not in current_value: - new_value = any_repeat(survey_element=self, parent_xpath=parent_xpath) - current_value[parent_xpath] = new_value - return new_value - return current_value[parent_xpath] + def iter_ancestors(self) -> Generator[tuple["SurveyElement", int], None, None]: + """Get each self.parent with their distance from self (starting at 1).""" + distance = 1 + current = self.parent + while current is not None: + yield current, distance + current = current.parent + distance += 1 + + def has_common_repeat_parent( + self, other: "SurveyElement" + ) -> tuple[str, int | None, Optional["SurveyElement"]]: + """ + Get the relation type, steps (generations), and the common ancestor. + """ + # Quick check for immediate relation. + if self.parent is other: + return "Parent (other)", 1, other + elif other.parent is self: + return "Parent (self)", 1, self + + # Traversal tracking + self_ancestors = {} + other_ancestors = {} + self_current = self + other_current = other + self_distance = 0 + other_distance = 0 + + # Traverse up both ancestor chains as far as necessary. + while self_current or other_current: + # Step up the self chain + if self_current: + self_distance += 1 + self_current = self_current.parent + if self_current: + self_ancestors[self_current] = self_distance + if ( + self_current.type == const.REPEAT + and self_current in other_ancestors + ): + max_steps = max(self_distance, other_ancestors[self_current]) + return "Common Ancestor Repeat", max_steps, self_current + + # Step up the other chain + if other_current: + other_distance += 1 + other_current = other_current.parent + if other_current: + other_ancestors[other_current] = other_distance + if ( + other_current.type == const.REPEAT + and other_current in self_ancestors + ): + max_steps = max(other_distance, self_ancestors[other_current]) + return "Common Ancestor Repeat", max_steps, other_current + + # No common ancestor found. + return "Unrelated", None, None def get_lineage(self): """ @@ -244,6 +282,7 @@ def to_json_dict(self): "parent", "question_type_dictionary", "_created", + "_xpath", *SURVEY_ELEMENT_LOCAL_KEYS, ] # Delete all keys that may cause a "Circular Reference" From a84e75f1bd16c63015af027e45ac26f052f83309 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Sun, 17 Nov 2024 02:18:43 +1100 Subject: [PATCH 03/15] fix: avoid unnecessary dict.copy() or copy.deepcopy() - wastes cpu time and memory, but likely only noticeable on large forms - presumably being done to avoid changing input dictionaries. Not sure if that is a necessary guarantee to make. Should be possible to avoid the need to copy dicts at all, by only reading from them instead of making changes during processing e.g. replace dict.pop() with dict.get() and skip using that key later. - builder.py - in _create_section_from_dict (applies to Survey, Group, Repeat) the input dict data was being copied by copy then deepcopy for every child element e.g. firstly the entire Survey, then each Group or Repeat recursively in the data. - to avoid this while maintaining backwards compatibility, now the public method create_survey_element_from_dict will make a deepcopy of the input data (the entire Survey), and all private (recursive) methods will use that copy rather than making additional copies. - an exception is _name_and_label_substitutions which seems to still need copies in order to generate language-specific copies of data. - question_type_dictionary.py - this reference dict was being copied for every SurveyElement - never modified so no need to make a copy - to avoid unintentional modifications in the future, replace the original object with a MappingProxy which is a read-only view. --- pyxform/builder.py | 59 +++++++++++++++++------------ pyxform/question_type_dictionary.py | 7 +++- 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/pyxform/builder.py b/pyxform/builder.py index b1a711b8..df0801fb 100644 --- a/pyxform/builder.py +++ b/pyxform/builder.py @@ -78,19 +78,28 @@ def set_sections(self, sections): self._sections = sections def create_survey_element_from_dict( - self, d: dict[str, Any] + self, d: dict[str, Any], deep_copy: bool = True ) -> Union["SurveyElement", list["SurveyElement"]]: """ Convert from a nested python dictionary/array structure (a json dict I call it because it corresponds directly with a json object) to a survey object + + :param d: data to use for constructing SurveyElements. + :param deep_copy: Take a copy.deepcopy() of the input data, to avoid changing the + original objects during processing. All methods private to this Builder class + should use "False" since the data is already copied by its public methods. """ + # TODO: ideally avoid copying entirely, but at least try to only copy once. + if deep_copy: + d = copy.deepcopy(d) + if "add_none_option" in d: self._add_none_option = d["add_none_option"] if d[const.TYPE] in SECTION_CLASSES: if d[const.TYPE] == const.SURVEY: - self._choices = copy.deepcopy(d.get(const.CHOICES, {})) + self._choices = d.get(const.CHOICES, {}) section = self._create_section_from_dict(d) @@ -111,7 +120,7 @@ def create_survey_element_from_dict( self._sections.keys(), ) d = self._sections[section_name] - full_survey = self.create_survey_element_from_dict(d) + full_survey = self.create_survey_element_from_dict(d=d, deep_copy=False) return full_survey.children elif d[const.TYPE] in ["xml-external", "csv-external"]: return ExternalInstance(**d) @@ -120,7 +129,7 @@ def create_survey_element_from_dict( else: self._save_trigger(d=d) return self._create_question_from_dict( - d, copy.deepcopy(QUESTION_TYPE_DICT), self._add_none_option + d, QUESTION_TYPE_DICT, self._add_none_option ) def _save_trigger(self, d: dict) -> None: @@ -142,23 +151,22 @@ def _create_question_from_dict( add_none_option: bool = False, ) -> Question | list[Question]: question_type_str = d[const.TYPE] - d_copy = d.copy() # TODO: Keep add none option? if add_none_option and question_type_str.startswith(const.SELECT_ALL_THAT_APPLY): - SurveyElementBuilder._add_none_option_to_select_all_that_apply(d_copy) + SurveyElementBuilder._add_none_option_to_select_all_that_apply(d) # Handle or_other on select type questions or_other_len = len(const.SELECT_OR_OTHER_SUFFIX) if question_type_str.endswith(const.SELECT_OR_OTHER_SUFFIX): question_type_str = question_type_str[: len(question_type_str) - or_other_len] - d_copy[const.TYPE] = question_type_str - SurveyElementBuilder._add_other_option_to_multiple_choice_question(d_copy) + d[const.TYPE] = question_type_str + SurveyElementBuilder._add_other_option_to_multiple_choice_question(d) return [ SurveyElementBuilder._create_question_from_dict( - d_copy, question_type_dictionary, add_none_option + d, question_type_dictionary, add_none_option ), - SurveyElementBuilder._create_specify_other_question_from_dict(d_copy), + SurveyElementBuilder._create_specify_other_question_from_dict(d), ] question_class = SurveyElementBuilder._get_question_class( @@ -166,9 +174,9 @@ def _create_question_from_dict( ) # todo: clean up this spaghetti code - d_copy["question_type_dictionary"] = question_type_dictionary + d["question_type_dictionary"] = question_type_dictionary if question_class: - return question_class(**d_copy) + return question_class(**d) return [] @@ -243,18 +251,19 @@ def _create_specify_other_question_from_dict(d: dict[str, Any]) -> InputQuestion return InputQuestion(**kwargs) def _create_section_from_dict(self, d): - d_copy = d.copy() - children = d_copy.pop(const.CHILDREN, []) - section_class = SECTION_CLASSES[d_copy[const.TYPE]] + children = d.pop(const.CHILDREN, []) + section_class = SECTION_CLASSES[d[const.TYPE]] if d[const.TYPE] == const.SURVEY and const.TITLE not in d: - d_copy[const.TITLE] = d[const.NAME] - result = section_class(**d_copy) + d[const.TITLE] = d[const.NAME] + result = section_class(**d) for child in children: # Deep copying the child is a hacky solution to the or_other bug. # I don't know why it works. # And I hope it doesn't break something else. # I think the good solution would be to rewrite this class. - survey_element = self.create_survey_element_from_dict(copy.deepcopy(child)) + survey_element = self.create_survey_element_from_dict( + d=child, deep_copy=False + ) if child[const.TYPE].endswith(const.SELECT_OR_OTHER_SUFFIX): select_question = survey_element[0] itemset_choices = self._choices.get(select_question[const.ITEMSET], None) @@ -279,10 +288,9 @@ def _create_loop_from_dict(self, d): Takes a json_dict of "loop" type Returns a GroupedSection """ - d_copy = d.copy() - children = d_copy.pop(const.CHILDREN, []) - columns = d_copy.pop(const.COLUMNS, []) - result = GroupedSection(**d_copy) + children = d.pop(const.CHILDREN, []) + columns = d.pop(const.COLUMNS, []) + result = GroupedSection(**d) # columns is a left over from when this was # create_table_from_dict, I will need to clean this up @@ -295,7 +303,9 @@ def _create_loop_from_dict(self, d): column = GroupedSection(**column_dict) for child in children: question_dict = self._name_and_label_substitutions(child, column_dict) - question = self.create_survey_element_from_dict(question_dict) + question = self.create_survey_element_from_dict( + d=question_dict, deep_copy=False + ) column.add_child(question) result.add_child(column) if result.name != "": @@ -333,7 +343,8 @@ def _name_and_label_substitutions(question_template, column_headers): def create_survey_element_from_json(self, str_or_path): d = utils.get_pyobj_from_json(str_or_path) - return self.create_survey_element_from_dict(d) + # Loading JSON creates a new dictionary structure so no need to re-copy. + return self.create_survey_element_from_dict(d=d, deep_copy=False) def create_survey_element_from_dict(d, sections=None): diff --git a/pyxform/question_type_dictionary.py b/pyxform/question_type_dictionary.py index b22e0b7d..0d5e0857 100644 --- a/pyxform/question_type_dictionary.py +++ b/pyxform/question_type_dictionary.py @@ -2,6 +2,8 @@ XForm survey question type mapping dictionary module. """ +from types import MappingProxyType + from pyxform.xls2json import QuestionTypesReader, print_pyobj_to_json @@ -16,7 +18,7 @@ def generate_new_dict(): print_pyobj_to_json(json_dict, "new_question_type_dict.json") -QUESTION_TYPE_DICT = { +_QUESTION_TYPE_DICT = { "q picture": { "control": {"tag": "upload", "mediatype": "image/*"}, "bind": {"type": "binary"}, @@ -387,3 +389,6 @@ def generate_new_dict(): "bind": {"type": "geopoint"}, }, } + +# Read-only view of the types. +QUESTION_TYPE_DICT = MappingProxyType(_QUESTION_TYPE_DICT) From 188cb510d28d49c1c1c1c85caf27db1c433d9956 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Sun, 17 Nov 2024 09:25:22 +1100 Subject: [PATCH 04/15] fix: avoid repeatedly traversing SurveyElements to find the Survey - avoid repeatedly traversing the object tree to find the Survey, and instead pass it to where it is needed. Maybe a looks more verbose but it is faster, and clearer where the Survey is being used. - entities.entity_declaration.py - use provided survey object instead of lookup with self.get_root() - parsing.expression.py - use slotted class since it seems somewhat faster and lower memory - parsing.instance_expression.py - inline function to allow left-to-right condition optimisation - external_instance.py, question.py, section.py - use provided survey object instead of lookup with self.get_root() - survey.py - filter iter_descendants types during iteration instead of after, to avoid processing them twice - convert most of model_children generator functions to return generators and pass them to node() as such to reduce intermediate lists held in memory - move "xml_descendent_bindings" and "xml_actions" from survey_element since they are only called by the Survey class - survey_element.py - add option to filter iter_descendants and iter_ancestors by providng a condition callable - use provided survey object instead of lookup with self.get_root() - remove unused functions: get_lineage, get_root, get_media_keys - fix unnecessary value checks in xml_bindings (the key can only be one of the "if" options). - simplify xml_action - utils.py - allow node() args to be a Generator, and if so append each element --- pyxform/entities/entity_declaration.py | 11 +- pyxform/external_instance.py | 7 +- pyxform/parsing/expression.py | 14 ++- pyxform/parsing/instance_expression.py | 22 ++-- pyxform/question.py | 67 +++++----- pyxform/section.py | 68 ++++++----- pyxform/survey.py | 102 ++++++++++------ pyxform/survey_element.py | 163 ++++++++++--------------- pyxform/utils.py | 4 + tests/test_j2x_question.py | 44 ++++--- 10 files changed, 258 insertions(+), 244 deletions(-) diff --git a/pyxform/entities/entity_declaration.py b/pyxform/entities/entity_declaration.py index b34950a0..18ae84c6 100644 --- a/pyxform/entities/entity_declaration.py +++ b/pyxform/entities/entity_declaration.py @@ -1,7 +1,13 @@ +from typing import TYPE_CHECKING + from pyxform import constants as const from pyxform.survey_element import SurveyElement from pyxform.utils import node +if TYPE_CHECKING: + from pyxform.survey import Survey + + EC = const.EntityColumns @@ -50,11 +56,10 @@ def xml_instance(self, **kwargs): else: return node(const.ENTITY, **attributes) - def xml_bindings(self): + def xml_bindings(self, survey: "Survey"): """ See the class comment for an explanation of the logic for generating bindings. """ - survey = self.get_root() parameters = self.get(const.PARAMETERS, {}) entity_id_expression = parameters.get(EC.ENTITY_ID, None) create_condition = parameters.get(EC.CREATE_IF, None) @@ -125,5 +130,5 @@ def _get_bind_node(self, survey, expression, destination): return node(const.BIND, nodeset=self.get_xpath() + destination, **bind_attrs) - def xml_control(self): + def xml_control(self, survey: "Survey"): raise NotImplementedError() diff --git a/pyxform/external_instance.py b/pyxform/external_instance.py index 50ea4a70..4688b4de 100644 --- a/pyxform/external_instance.py +++ b/pyxform/external_instance.py @@ -2,11 +2,16 @@ ExternalInstance class module """ +from typing import TYPE_CHECKING + from pyxform.survey_element import SurveyElement +if TYPE_CHECKING: + from pyxform.survey import Survey + class ExternalInstance(SurveyElement): - def xml_control(self): + def xml_control(self, survey: "Survey"): """ No-op since there is no associated form control to place under . diff --git a/pyxform/parsing/expression.py b/pyxform/parsing/expression.py index 29df4fa1..32c6509b 100644 --- a/pyxform/parsing/expression.py +++ b/pyxform/parsing/expression.py @@ -1,7 +1,6 @@ import re from collections.abc import Iterable from functools import lru_cache -from typing import NamedTuple def get_expression_lexer() -> re.Scanner: @@ -74,11 +73,14 @@ def tokenizer(scan, value): # Scanner takes a few 100ms to compile so use this shared instance. -class ExpLexerToken(NamedTuple): - name: str - value: str - start: int - end: int +class ExpLexerToken: + __slots__ = ("name", "value", "start", "end") + + def __init__(self, name: str, value: str, start: int, end: int) -> None: + self.name: str = name + self.value: str = value + self.start: int = start + self.end: int = end _EXPRESSION_LEXER = get_expression_lexer() diff --git a/pyxform/parsing/instance_expression.py b/pyxform/parsing/instance_expression.py index 4b3f82ed..7ab5fbb2 100644 --- a/pyxform/parsing/instance_expression.py +++ b/pyxform/parsing/instance_expression.py @@ -1,6 +1,6 @@ from typing import TYPE_CHECKING -from pyxform.parsing.expression import ExpLexerToken, parse_expression +from pyxform.parsing.expression import parse_expression from pyxform.utils import BRACKETED_TAG_REGEX, node if TYPE_CHECKING: @@ -8,18 +8,6 @@ from pyxform.survey_element import SurveyElement -def instance_func_start(token: ExpLexerToken) -> bool: - """ - Determine if the token is the start of an instance expression. - - :param token: The token to examine. - :return: If True, the token is the start of an instance expression. - """ - if token is None: - return False - return token.name == "FUNC_CALL" and token.value == "instance(" - - def find_boundaries(xml_text: str) -> list[tuple[int, int]]: """ Find token boundaries of any instance() expression. @@ -43,14 +31,18 @@ def find_boundaries(xml_text: str) -> list[tuple[int, int]]: for t in tokens: emit = False # If an instance expression had started, note the string position boundary. - if instance_func_start(token=t) and not instance_enter: + if not instance_enter and t.name == "FUNC_CALL" and t.value == "instance(": instance_enter = True emit = True boundaries.append(t.start) # Tokens that are part of an instance expression. elif instance_enter: # Tokens that are part of the instance call. - if instance_func_start(token=last_token) and t.name == "SYSTEM_LITERAL": + if ( + t.name == "SYSTEM_LITERAL" + and last_token.name == "FUNC_CALL" + and last_token.value == "instance(" + ): emit = True elif last_token.name == "SYSTEM_LITERAL" and t.name == "CLOSE_PAREN": emit = True diff --git a/pyxform/question.py b/pyxform/question.py index 1962fb00..1b7c30b4 100644 --- a/pyxform/question.py +++ b/pyxform/question.py @@ -3,6 +3,7 @@ """ import os.path +from typing import TYPE_CHECKING from pyxform.constants import ( EXTERNAL_CHOICES_ITEMSET_REF_LABEL, @@ -22,6 +23,9 @@ node, ) +if TYPE_CHECKING: + from pyxform.survey import Survey + class Question(SurveyElement): FIELDS = SurveyElement.FIELDS.copy() @@ -41,8 +45,7 @@ def validate(self): if self.type not in QUESTION_TYPE_DICT: raise PyXFormError(f"Unknown question type '{self.type}'.") - def xml_instance(self, **kwargs): - survey = self.get_root() + def xml_instance(self, survey: "Survey", **kwargs): attributes = {} attributes.update(self.get("instance", {})) for key, value in attributes.items(): @@ -52,8 +55,7 @@ def xml_instance(self, **kwargs): return node(self.name, str(self.get("default")), **attributes) return node(self.name, **attributes) - def xml_control(self): - survey = self.get_root() + def xml_control(self, survey: "Survey"): if self.type == "calculate" or ( ("calculate" in self.bind or self.trigger) and not (self.label or self.hint) ): @@ -70,7 +72,7 @@ def xml_control(self): raise PyXFormError(msg) return None - xml_node = self.build_xml() + xml_node = self.build_xml(survey=survey) if xml_node: # Get nested setvalue and setgeopoint items @@ -102,7 +104,7 @@ def nest_set_nodes(self, survey, xml_node, tag, nested_items): set_node = node(tag, **node_attrs) xml_node.appendChild(set_node) - def build_xml(self) -> DetachableElement | None: + def build_xml(self, survey: "Survey") -> DetachableElement | None: return None @@ -112,10 +114,9 @@ class InputQuestion(Question): dates, geopoints, barcodes ... """ - def build_xml(self): + def build_xml(self, survey: "Survey"): control_dict = self.control - label_and_hint = self.xml_label_and_hint() - survey = self.get_root() + label_and_hint = self.xml_label_and_hint(survey=survey) # Resolve field references in attributes for key, value in control_dict.items(): control_dict[key] = survey.insert_xpaths(value, self) @@ -123,7 +124,7 @@ def build_xml(self): result = node(**control_dict) if label_and_hint: - for element in self.xml_label_and_hint(): + for element in self.xml_label_and_hint(survey=survey): result.appendChild(element) # Input types are used for selects with external choices sheets. @@ -138,39 +139,36 @@ def build_xml(self): class TriggerQuestion(Question): - def build_xml(self): + def build_xml(self, survey: "Survey"): control_dict = self.control - survey = self.get_root() # Resolve field references in attributes for key, value in control_dict.items(): control_dict[key] = survey.insert_xpaths(value, self) control_dict["ref"] = self.get_xpath() - return node("trigger", *self.xml_label_and_hint(), **control_dict) + return node("trigger", *self.xml_label_and_hint(survey=survey), **control_dict) class UploadQuestion(Question): def _get_media_type(self): return self.control["mediatype"] - def build_xml(self): + def build_xml(self, survey: "Survey"): control_dict = self.control - survey = self.get_root() # Resolve field references in attributes for key, value in control_dict.items(): control_dict[key] = survey.insert_xpaths(value, self) control_dict["ref"] = self.get_xpath() control_dict["mediatype"] = self._get_media_type() - return node("upload", *self.xml_label_and_hint(), **control_dict) + return node("upload", *self.xml_label_and_hint(survey=survey), **control_dict) class Option(SurveyElement): def xml_value(self): return node("value", self.name) - def xml(self): + def xml(self, survey: "Survey"): item = node("item") - self.xml_label() - item.appendChild(self.xml_label()) + item.appendChild(self.xml_label(survey=survey)) item.appendChild(self.xml_value()) return item @@ -178,7 +176,7 @@ def xml(self): def validate(self): pass - def xml_control(self): + def xml_control(self, survey: "Survey"): raise NotImplementedError() def _translation_path(self, display_element): @@ -210,10 +208,9 @@ def validate(self): for choice in descendants: choice.validate() - def build_xml(self): + def build_xml(self, survey: "Survey"): if self.bind["type"] not in ["string", "odk:rank"]: raise PyXFormError("""Invalid value for `self.bind["type"]`.""") - survey = self.get_root() control_dict = self.control.copy() # Resolve field references in attributes for key, value in control_dict.items(): @@ -221,7 +218,7 @@ def build_xml(self): control_dict["ref"] = self.get_xpath() result = node(**control_dict) - for element in self.xml_label_and_hint(): + for element in self.xml_label_and_hint(survey=survey): result.appendChild(element) # itemset are only supposed to be strings, @@ -310,7 +307,7 @@ def build_xml(self): result.appendChild(node("itemset", *itemset_children, nodeset=nodeset)) else: for child in self.children: - result.appendChild(child.xml()) + result.appendChild(child.xml(survey=survey)) return result @@ -331,19 +328,18 @@ def __init__(self, **kwargs): ] super().__init__(**kwargs) - def xml(self): + def xml(self, survey: "Survey"): result = node("tag", key=self.name) - self.xml_label() - result.appendChild(self.xml_label()) + result.appendChild(self.xml_label(survey=survey)) for choice in self.children: - result.appendChild(choice.xml()) + result.appendChild(choice.xml(survey=survey)) return result def validate(self): pass - def xml_control(self): + def xml_control(self, survey: "Survey"): raise NotImplementedError() @@ -357,23 +353,22 @@ def __init__(self, **kwargs): ] super().__init__(**kwargs) - def build_xml(self): + def build_xml(self, survey: "Survey"): control_dict = self.control control_dict["ref"] = self.get_xpath() control_dict["mediatype"] = self._get_media_type() - result = node("upload", *self.xml_label_and_hint(), **control_dict) + result = node("upload", *self.xml_label_and_hint(survey=survey), **control_dict) for osm_tag in self.children: - result.appendChild(osm_tag.xml()) + result.appendChild(osm_tag.xml(survey=survey)) return result class RangeQuestion(Question): - def build_xml(self): + def build_xml(self, survey: "Survey"): control_dict = self.control - label_and_hint = self.xml_label_and_hint() - survey = self.get_root() + label_and_hint = self.xml_label_and_hint(survey=survey) # Resolve field references in attributes for key, value in control_dict.items(): control_dict[key] = survey.insert_xpaths(value, self) @@ -382,7 +377,7 @@ def build_xml(self): control_dict.update(params) result = node(**control_dict) if label_and_hint: - for element in self.xml_label_and_hint(): + for element in self.xml_label_and_hint(survey=survey): result.appendChild(element) return result diff --git a/pyxform/section.py b/pyxform/section.py index a52ea9e9..7e8635ae 100644 --- a/pyxform/section.py +++ b/pyxform/section.py @@ -2,11 +2,16 @@ Section survey element module. """ +from typing import TYPE_CHECKING + from pyxform.errors import PyXFormError from pyxform.external_instance import ExternalInstance from pyxform.survey_element import SurveyElement from pyxform.utils import node +if TYPE_CHECKING: + from pyxform.survey import Survey + class Section(SurveyElement): def validate(self): @@ -28,7 +33,7 @@ def _validate_uniqueness_of_element_names(self): ) element_slugs.add(elem_lower) - def xml_instance(self, **kwargs): + def xml_instance(self, survey: "Survey", **kwargs): """ Creates an xml representation of the section """ @@ -37,7 +42,6 @@ def xml_instance(self, **kwargs): attributes = {} attributes.update(kwargs) attributes.update(self.get("instance", {})) - survey = self.get_root() # Resolve field references in attributes for key, value in attributes.items(): attributes[key] = survey.insert_xpaths(value, self) @@ -46,53 +50,55 @@ def xml_instance(self, **kwargs): for child in self.children: repeating_template = None if child.get("flat"): - for grandchild in child.xml_instance_array(): + for grandchild in child.xml_instance_array(survey=survey): result.appendChild(grandchild) elif isinstance(child, ExternalInstance): continue else: if isinstance(child, RepeatingSection) and not append_template: append_template = not append_template - repeating_template = child.generate_repeating_template() - result.appendChild(child.xml_instance(append_template=append_template)) + repeating_template = child.generate_repeating_template(survey=survey) + result.appendChild( + child.xml_instance(survey=survey, append_template=append_template) + ) if append_template and repeating_template: append_template = not append_template result.insertBefore(repeating_template, result._get_lastChild()) return result - def generate_repeating_template(self, **kwargs): + def generate_repeating_template(self, survey: "Survey", **kwargs): attributes = {"jr:template": ""} result = node(self.name, **attributes) for child in self.children: if isinstance(child, RepeatingSection): - result.appendChild(child.template_instance()) + result.appendChild(child.template_instance(survey=survey)) else: - result.appendChild(child.xml_instance()) + result.appendChild(child.xml_instance(survey=survey)) return result - def xml_instance_array(self): + def xml_instance_array(self, survey: "Survey"): """ This method is used for generating flat instances. """ for child in self.children: if child.get("flat"): - yield from child.xml_instance_array() + yield from child.xml_instance_array(survey=survey) else: - yield child.xml_instance() + yield child.xml_instance(survey=survey) - def xml_control(self): + def xml_control(self, survey: "Survey"): """ Ideally, we'll have groups up and rolling soon, but for now let's just yield controls from all the children of this section """ for e in self.children: - control = e.xml_control() + control = e.xml_control(survey=survey) if control is not None: yield control class RepeatingSection(Section): - def xml_control(self): + def xml_control(self, survey: "Survey"): """ @@ -107,43 +113,44 @@ def xml_control(self): """ control_dict = self.control.copy() - survey = self.get_root() # Resolve field references in attributes for key, value in control_dict.items(): control_dict[key] = survey.insert_xpaths(value, self) repeat_node = node("repeat", nodeset=self.get_xpath(), **control_dict) - for n in Section.xml_control(self): + for n in Section.xml_control(self, survey=survey): repeat_node.appendChild(n) - setvalue_nodes = self._get_setvalue_nodes_for_dynamic_defaults() + setvalue_nodes = self._get_setvalue_nodes_for_dynamic_defaults(survey=survey) for setvalue_node in setvalue_nodes: repeat_node.appendChild(setvalue_node) - label = self.xml_label() + label = self.xml_label(survey=survey) if label: - return node("group", self.xml_label(), repeat_node, ref=self.get_xpath()) + return node("group", label, repeat_node, ref=self.get_xpath()) return node("group", repeat_node, ref=self.get_xpath(), **self.control) # Get setvalue nodes for all descendants of this repeat that have dynamic defaults and aren't nested in other repeats. - def _get_setvalue_nodes_for_dynamic_defaults(self): + def _get_setvalue_nodes_for_dynamic_defaults(self, survey: "Survey"): setvalue_nodes = [] - self._dynamic_defaults_helper(self, setvalue_nodes) + self._dynamic_defaults_helper(current=self, nodes=setvalue_nodes, survey=survey) return setvalue_nodes - def _dynamic_defaults_helper(self, current, nodes): + def _dynamic_defaults_helper(self, current: "Section", nodes: list, survey: "Survey"): for e in current.children: if e.type != "repeat": # let nested repeats handle their own defaults - dynamic_default = e.get_setvalue_node_for_dynamic_default(in_repeat=True) + dynamic_default = e.get_setvalue_node_for_dynamic_default( + in_repeat=True, survey=survey + ) if dynamic_default: nodes.append(dynamic_default) - self._dynamic_defaults_helper(e, nodes) + self._dynamic_defaults_helper(current=e, nodes=nodes, survey=survey) # I'm anal about matching function signatures when overriding a function, # but there's no reason for kwargs to be an argument - def template_instance(self, **kwargs): - return super().generate_repeating_template(**kwargs) + def template_instance(self, survey: "Survey", **kwargs): + return super().generate_repeating_template(survey=survey, **kwargs) class GroupedSection(Section): @@ -159,7 +166,7 @@ class GroupedSection(Section): # kwargs["children"].insert(0, kwargs["children"][0]) # super(GroupedSection, self).__init__(kwargs) - def xml_control(self): + def xml_control(self, survey: "Survey"): control_dict = self.control if control_dict.get("bodyless"): @@ -169,8 +176,6 @@ def xml_control(self): attributes = {} attributes.update(self.control) - survey = self.get_root() - # Resolve field references in attributes for key, value in attributes.items(): attributes[key] = survey.insert_xpaths(value, self) @@ -179,15 +184,14 @@ def xml_control(self): attributes["ref"] = self.get_xpath() if "label" in self and len(self["label"]) > 0: - children.append(self.xml_label()) - for n in Section.xml_control(self): + children.append(self.xml_label(survey=survey)) + for n in Section.xml_control(self, survey=survey): children.append(n) if "appearance" in control_dict: attributes["appearance"] = control_dict["appearance"] if "intent" in control_dict: - survey = self.get_root() attributes["intent"] = survey.insert_xpaths(control_dict["intent"], self) return node("group", *children, **attributes) diff --git a/pyxform/survey.py b/pyxform/survey.py index 80171af5..5cc5c873 100644 --- a/pyxform/survey.py +++ b/pyxform/survey.py @@ -7,7 +7,7 @@ import tempfile import xml.etree.ElementTree as ETree from collections import defaultdict -from collections.abc import Generator, Iterator +from collections.abc import Generator from datetime import datetime from functools import lru_cache from pathlib import Path @@ -230,21 +230,20 @@ def validate(self): def _validate_uniqueness_of_section_names(self): root_node_name = self.name section_names = set() - for element in self.iter_descendants(): - if isinstance(element, Section): - if element.name in section_names: - if element.name == root_node_name: - # The root node name is rarely explictly set; explain - # the problem in a more helpful way (#510) - msg = ( - f"The name '{element.name}' is the same as the form name. " - "Use a different section name (or change the form name in " - "the 'name' column of the settings sheet)." - ) - raise PyXFormError(msg) - msg = f"There are two sections with the name {element.name}." + for element in self.iter_descendants(condition=lambda i: isinstance(i, Section)): + if element.name in section_names: + if element.name == root_node_name: + # The root node name is rarely explictly set; explain + # the problem in a more helpful way (#510) + msg = ( + f"The name '{element.name}' is the same as the form name. " + "Use a different section name (or change the form name in " + "the 'name' column of the settings sheet)." + ) raise PyXFormError(msg) - section_names.add(element.name) + msg = f"There are two sections with the name {element.name}." + raise PyXFormError(msg) + section_names.add(element.name) def get_nsmap(self): """Add additional namespaces""" @@ -295,7 +294,7 @@ def xml(self): return node( "h:html", node("h:head", node("h:title", self.title), self.xml_model()), - node("h:body", *self.xml_control(), **body_kwargs), + node("h:body", *self.xml_control(survey=self), **body_kwargs), **nsmap, ) @@ -500,7 +499,7 @@ def _get_last_saved_instance() -> InstanceInfo: instance=node("instance", id=name, src=uri), ) - def _generate_instances(self) -> Iterator[DetachableElement]: + def _generate_instances(self) -> Generator[DetachableElement, None, None]: """ Get instances from all the different ways that they may be generated. @@ -581,6 +580,32 @@ def _generate_instances(self) -> Iterator[DetachableElement]: yield i.instance seen[i.name] = i + def xml_descendent_bindings(self) -> Generator["DetachableElement", None, None]: + """ + Yield bindings for this node and all its descendants. + """ + for e in self.iter_descendants(): + xml_bindings = e.xml_bindings(survey=self) + if xml_bindings is not None: + yield from xml_bindings + + # dynamic defaults for repeats go in the body. All other dynamic defaults (setvalue actions) go in the model + if not next( + e.iter_ancestors(condition=lambda i: i.type == constants.REPEAT), False + ): + dynamic_default = e.get_setvalue_node_for_dynamic_default(survey=self) + if dynamic_default: + yield dynamic_default + + def xml_actions(self) -> Generator[DetachableElement, None, None]: + """ + Yield xml_actions for this node and all its descendants. + """ + for e in self.iter_descendants(condition=lambda i: isinstance(i, Question)): + xml_action = e.xml_action() + if xml_action is not None: + yield xml_action + def xml_model(self): """ Generate the xform element @@ -609,10 +634,9 @@ def xml_model(self): model_children = [] if self._translations: model_children.append(self.itext()) - model_children += [node("instance", self.xml_instance())] - model_children += list(self._generate_instances()) - model_children += self.xml_descendent_bindings() - model_children += self.xml_actions() + model_children.append( + node("instance", self.xml_instance()), + ) if self.submission_url or self.public_key or self.auto_send or self.auto_delete: submission_attrs = {} @@ -628,10 +652,16 @@ def xml_model(self): submission_node = node("submission", **submission_attrs) model_children.insert(0, submission_node) - return node("model", *model_children, **model_kwargs) + def model_children_generator(): + yield from model_children + yield from self._generate_instances() + yield from self.xml_descendent_bindings() + yield from self.xml_actions() + + return node("model", model_children_generator(), **model_kwargs) def xml_instance(self, **kwargs): - result = Section.xml_instance(self, **kwargs) + result = Section.xml_instance(self, survey=self, **kwargs) # set these first to prevent overwriting id and version for key, value in self.attribute.items(): @@ -890,17 +920,18 @@ def _set_up_media_translations(media_dict, translation_key): translations_trans_key[media_type] = media - for survey_element in self.iter_descendants(): + for survey_element in self.iter_descendants( + condition=lambda i: not isinstance(i, Option) + ): # Skip set up of media for choices in selects. Translations for their media # content should have been set up in _setup_translations, with one copy of # each choice translation per language (after _add_empty_translations). - if not isinstance(survey_element, Option): - media_dict = survey_element.get("media") - if isinstance(media_dict, dict) and 0 < len(media_dict): - translation_key = survey_element.get_xpath() + ":label" - _set_up_media_translations(media_dict, translation_key) + media_dict = survey_element.get("media") + if isinstance(media_dict, dict) and 0 < len(media_dict): + translation_key = survey_element.get_xpath() + ":label" + _set_up_media_translations(media_dict, translation_key) - def itext(self): + def itext(self) -> DetachableElement: """ This function creates the survey's itext nodes from _translations @see _setup_media _setup_translations @@ -997,12 +1028,11 @@ def __unicode__(self): def _setup_xpath_dictionary(self): self._xpath: dict[str, SurveyElement | None] = {} # pylint: disable=attribute-defined-outside-init - for element in self.iter_descendants(): - if isinstance(element, Question | Section): - if element.name in self._xpath: - self._xpath[element.name] = None - else: - self._xpath[element.name] = element + for element in self.iter_descendants(lambda i: isinstance(i, Question | Section)): + if element.name in self._xpath: + self._xpath[element.name] = None + else: + self._xpath[element.name] = element def _var_repl_function( self, matchobj, context, use_current=False, reference_parent=False diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py index 2328296b..d455729a 100644 --- a/pyxform/survey_element.py +++ b/pyxform/survey_element.py @@ -4,8 +4,8 @@ import json import re -from collections import deque -from collections.abc import Generator +from collections.abc import Callable, Generator +from itertools import chain from typing import TYPE_CHECKING, Any, ClassVar, Optional from pyxform import aliases as alias @@ -22,6 +22,7 @@ from pyxform.xls2json import print_pyobj_to_json if TYPE_CHECKING: + from pyxform.survey import Survey from pyxform.utils import DetachableElement # The following are important keys for the underlying dict that describes SurveyElement @@ -151,24 +152,39 @@ def validate(self): ) # TODO: Make sure renaming this doesn't cause any problems - def iter_descendants(self): + def iter_descendants( + self, condition: Callable[["SurveyElement"], bool] | None = None + ) -> Generator["SurveyElement", None, None]: """ - A survey_element is a dictionary of survey_elements - This method does a preorder traversal over them. - For the time being this survery_element is included among its - descendants + Get each of self.children. + + :param condition: If this evaluates to True, yield the element. """ # it really seems like this method should not yield self - yield self + if condition is not None: + if condition(self): + yield self + else: + yield self for e in self.children: - yield from e.iter_descendants() + yield from e.iter_descendants(condition=condition) + + def iter_ancestors( + self, condition: Callable[["SurveyElement"], bool] | None = None + ) -> Generator[tuple["SurvekyElement", int], None, None]: + """ + Get each self.parent with their distance from self (starting at 1). - def iter_ancestors(self) -> Generator[tuple["SurveyElement", int], None, None]: - """Get each self.parent with their distance from self (starting at 1).""" + :param condition: If this evaluates to True, yield the element. + """ distance = 1 current = self.parent while current is not None: - yield current, distance + if condition is not None: + if condition(current): + yield current, distance + else: + yield current, distance current = current.parent distance += 1 @@ -223,41 +239,35 @@ def has_common_repeat_parent( # No common ancestor found. return "Unrelated", None, None - def get_lineage(self): - """ - Return a the list [root, ..., self._parent, self] - """ - result = deque((self,)) - current_element = self - while current_element.parent: - current_element = current_element.parent - result.appendleft(current_element) - # For some reason the root element has a True flat property... - output = [result.popleft()] - output.extend([i for i in result if not i.get("flat")]) - return output - - def get_root(self): - return self.get_lineage()[0] - def get_xpath(self): """ Return the xpath of this survey element. """ + # Imported here to avoid circular references. + from pyxform.survey import Survey + + def condition(e): + # The "flat" setting was added in 2013 to support ODK Tables, and results in + # a data instance with no element nesting. Not sure if still needed. + return isinstance(e, Survey) or ( + not isinstance(e, Survey) and not e.get("flat") + ) + current_value = self._dict_get("_survey_element_xpath") if current_value is None: - new_value = f'/{"/".join(n.name for n in self.get_lineage())}' + if condition(self): + self_element = (self,) + else: + self_element = () + lineage = chain( + reversed(tuple(i[0] for i in self.iter_ancestors(condition=condition))), + self_element, + ) + new_value = f'/{"/".join(n.name for n in lineage)}' self._survey_element_xpath = new_value return new_value return current_value - def get_abbreviated_xpath(self): - lineage = self.get_lineage() - if len(lineage) >= 2: - return "/".join([str(n.name) for n in lineage[1:]]) - else: - return lineage[0].name - def _delete_keys_from_dict(self, dictionary: dict, keys: list): """ Deletes a list of keys from a dictionary. @@ -416,23 +426,16 @@ def get_translations(self, default_language): "text": text, } - def get_media_keys(self): - """ - @deprected - I'm leaving this in just in case it has outside references. - """ - return {"media": f"{self.get_xpath()}:media"} - def needs_itext_ref(self): return isinstance(self.label, dict) or ( isinstance(self.media, dict) and len(self.media) > 0 ) - def get_setvalue_node_for_dynamic_default(self, in_repeat=False): + def get_setvalue_node_for_dynamic_default(self, survey: "Survey", in_repeat=False): if not self.default or not default_is_dynamic(self.default, self.type): return None - default_with_xpath_paths = self.get_root().insert_xpaths(self.default, self) + default_with_xpath_paths = survey.insert_xpaths(self.default, self) triggering_events = "odk-instance-first-load" if in_repeat: @@ -446,26 +449,25 @@ def get_setvalue_node_for_dynamic_default(self, in_repeat=False): ) # XML generating functions, these probably need to be moved around. - def xml_label(self): + def xml_label(self, survey: "Survey"): if self.needs_itext_ref(): # If there is a dictionary label, or non-empty media dict, # then we need to make a label with an itext ref ref = f"""jr:itext('{self._translation_path("label")}')""" return node("label", ref=ref) else: - survey = self.get_root() label, output_inserted = survey.insert_output_values(self.label, self) return node("label", label, toParseString=output_inserted) - def xml_hint(self): + def xml_hint(self, survey: "Survey"): if isinstance(self.hint, dict) or self.guidance_hint: path = self._translation_path("hint") return node("hint", ref=f"jr:itext('{path}')") else: - hint, output_inserted = self.get_root().insert_output_values(self.hint, self) + hint, output_inserted = survey.insert_output_values(self.hint, self) return node("hint", hint, toParseString=output_inserted) - def xml_label_and_hint(self) -> list["DetachableElement"]: + def xml_label_and_hint(self, survey: "Survey") -> list["DetachableElement"]: """ Return a list containing one node for the label and if there is a hint one node for the hint. @@ -473,13 +475,13 @@ def xml_label_and_hint(self) -> list["DetachableElement"]: result = [] label_appended = False if self.label or self.media: - result.append(self.xml_label()) + result.append(self.xml_label(survey=survey)) label_appended = True if self.hint or self.guidance_hint: if not label_appended: - result.append(self.xml_label()) - result.append(self.xml_hint()) + result.append(self.xml_label(survey=survey)) + result.append(self.xml_hint(survey=survey)) msg = f"The survey element named '{self.name}' has no label or hint." if len(result) == 0: @@ -497,11 +499,10 @@ def xml_label_and_hint(self) -> list["DetachableElement"]: return result - def xml_bindings(self): + def xml_bindings(self, survey: "Survey"): """ Return the binding(s) for this survey element. """ - survey = self.get_root() bind_dict = self.bind.copy() if self.get("flat"): # Don't generate bind element for flat groups. @@ -520,47 +521,21 @@ def xml_bindings(self): and k in const.CONVERTIBLE_BIND_ATTRIBUTES ): v = alias.BINDING_CONVERSIONS[v] - if k == "jr:constraintMsg" and ( + elif k == "jr:constraintMsg" and ( isinstance(v, dict) or re.search(BRACKETED_TAG_REGEX, v) ): v = f"""jr:itext('{self._translation_path("jr:constraintMsg")}')""" - if k == "jr:requiredMsg" and ( + elif k == "jr:requiredMsg" and ( isinstance(v, dict) or re.search(BRACKETED_TAG_REGEX, v) ): v = f"""jr:itext('{self._translation_path("jr:requiredMsg")}')""" - if k == "jr:noAppErrorString" and isinstance(v, dict): + elif k == "jr:noAppErrorString" and isinstance(v, dict): v = f"""jr:itext('{self._translation_path("jr:noAppErrorString")}')""" bind_dict[k] = survey.insert_xpaths(v, context=self) return [node("bind", nodeset=self.get_xpath(), **bind_dict)] return None - def xml_descendent_bindings(self): - """ - Return a list of bindings for this node and all its descendants. - """ - result = [] - for e in self.iter_descendants(): - xml_bindings = e.xml_bindings() - if xml_bindings is not None: - result.extend(xml_bindings) - - # dynamic defaults for repeats go in the body. All other dynamic defaults (setvalue actions) go in the model - if ( - len( - [ - ancestor - for ancestor in e.get_lineage() - if ancestor.type == "repeat" - ] - ) - == 0 - ): - dynamic_default = e.get_setvalue_node_for_dynamic_default() - if dynamic_default: - result.append(dynamic_default) - return result - - def xml_control(self): + def xml_control(self, survey: "Survey"): """ The control depends on what type of question we're asking, it doesn't make sense to implement here in the base class. @@ -573,24 +548,10 @@ def xml_action(self): """ if self.action: action_dict = self.action.copy() - if action_dict: - name = action_dict["name"] - del action_dict["name"] - return node(name, ref=self.get_xpath(), **action_dict) + return node(action_dict.pop("name"), ref=self.get_xpath(), **action_dict) return None - def xml_actions(self): - """ - Return a list of actions for this node and all its descendants. - """ - result = [] - for e in self.iter_descendants(): - xml_action = e.xml_action() - if xml_action is not None: - result.append(xml_action) - return result - def hashable(v): """Determine whether `v` can be hashed.""" diff --git a/pyxform/utils.py b/pyxform/utils.py index d00e7d95..c417d8db 100644 --- a/pyxform/utils.py +++ b/pyxform/utils.py @@ -7,6 +7,7 @@ import json import os import re +from collections.abc import Generator from io import StringIO from json.decoder import JSONDecodeError from typing import Any @@ -139,6 +140,9 @@ def node(*args, **kwargs) -> DetachableElement: text_node = PatchedText() text_node.data = str(n) result.appendChild(text_node) + elif isinstance(n, Generator): + for e in n: + result.appendChild(e) elif not isinstance(n, str): result.appendChild(n) return result diff --git a/tests/test_j2x_question.py b/tests/test_j2x_question.py index 8e332995..14c65002 100644 --- a/tests/test_j2x_question.py +++ b/tests/test_j2x_question.py @@ -55,10 +55,12 @@ def test_question_type_string(self): ) self.s.add_child(q) - self.assertEqual(ctw(q.xml_control()), expected_string_control_xml) + self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_string_control_xml) if TESTING_BINDINGS: - self.assertEqual(ctw(q.xml_bindings()), expected_string_binding_xml) + self.assertEqual( + ctw(q.xml_bindings(survey=self.s)), expected_string_binding_xml + ) def test_select_one_question_multilingual(self): """ @@ -86,10 +88,14 @@ def test_select_one_question_multilingual(self): q = create_survey_element_from_dict(simple_select_one_json) self.s.add_child(q) - self.assertEqual(ctw(q.xml_control()), expected_select_one_control_xml) + self.assertEqual( + ctw(q.xml_control(survey=self.s)), expected_select_one_control_xml + ) if TESTING_BINDINGS: - self.assertEqual(ctw(q.xml_bindings()), expected_select_one_binding_xml) + self.assertEqual( + ctw(q.xml_bindings(survey=self.s)), expected_select_one_binding_xml + ) def test_simple_integer_question_type_multilingual(self): """ @@ -114,10 +120,12 @@ def test_simple_integer_question_type_multilingual(self): self.s.add_child(q) - self.assertEqual(ctw(q.xml_control()), expected_integer_control_xml) + self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_integer_control_xml) if TESTING_BINDINGS: - self.assertEqual(ctw(q.xml_bindings()), expected_integer_binding_xml) + self.assertEqual( + ctw(q.xml_bindings(survey=self.s)), expected_integer_binding_xml + ) def test_simple_date_question_type_multilingual(self): """ @@ -140,10 +148,12 @@ def test_simple_date_question_type_multilingual(self): q = create_survey_element_from_dict(simple_date_question) self.s.add_child(q) - self.assertEqual(ctw(q.xml_control()), expected_date_control_xml) + self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_date_control_xml) if TESTING_BINDINGS: - self.assertEqual(ctw(q.xml_bindings()), expected_date_binding_xml) + self.assertEqual( + ctw(q.xml_bindings(survey=self.s)), expected_date_binding_xml + ) def test_simple_phone_number_question_type_multilingual(self): """ @@ -159,7 +169,7 @@ def test_simple_phone_number_question_type_multilingual(self): self.s.add_child(q) # Inspect XML Control - observed = q.xml_control() + observed = q.xml_control(survey=self.s) self.assertEqual("input", observed.nodeName) self.assertEqual("/test/phone_number_q", observed.attributes["ref"].nodeValue) observed_label = observed.childNodes[0] @@ -178,7 +188,7 @@ def test_simple_phone_number_question_type_multilingual(self): "type": "string", "constraint": r"regex(., '^\d*$')", } - observed = {k: v for k, v in q.xml_bindings()[0].attributes.items()} # noqa: C416 + observed = {k: v for k, v in q.xml_bindings(survey=self.s)[0].attributes.items()} # noqa: C416 self.assertDictEqual(expected, observed) def test_simple_select_all_question_multilingual(self): @@ -206,10 +216,14 @@ def test_simple_select_all_question_multilingual(self): q = create_survey_element_from_dict(simple_select_all_question) self.s.add_child(q) - self.assertEqual(ctw(q.xml_control()), expected_select_all_control_xml) + self.assertEqual( + ctw(q.xml_control(survey=self.s)), expected_select_all_control_xml + ) if TESTING_BINDINGS: - self.assertEqual(ctw(q.xml_bindings()), expected_select_all_binding_xml) + self.assertEqual( + ctw(q.xml_bindings(survey=self.s)), expected_select_all_binding_xml + ) def test_simple_decimal_question_multilingual(self): """ @@ -232,7 +246,9 @@ def test_simple_decimal_question_multilingual(self): q = create_survey_element_from_dict(simple_decimal_question) self.s.add_child(q) - self.assertEqual(ctw(q.xml_control()), expected_decimal_control_xml) + self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_decimal_control_xml) if TESTING_BINDINGS: - self.assertEqual(ctw(q.xml_bindings()), expected_decimal_binding_xml) + self.assertEqual( + ctw(q.xml_bindings(survey=self.s)), expected_decimal_binding_xml + ) From a33a4a8a3dfdd1789aa2e778f0a713fc69ff2632 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Sat, 23 Nov 2024 14:54:02 +1100 Subject: [PATCH 05/15] fix: reduce memory usage by switching from OrderedDict to dict - since py3.7 the order of keys is retained by the dict. A dict uses 64 bytes whereas an OrderedDict uses 128 bytes. - tidied up xls2json_backends imports by using only specific imports instead of all of openpyxl and xlrd, some of which overlapped. - simplified _list_to_dict_list. --- pyxform/xls2json_backends.py | 41 +++++++++++++++++------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/pyxform/xls2json_backends.py b/pyxform/xls2json_backends.py index 6a81a811..27652915 100644 --- a/pyxform/xls2json_backends.py +++ b/pyxform/xls2json_backends.py @@ -5,7 +5,6 @@ import csv import datetime import re -from collections import OrderedDict from collections.abc import Callable, Iterator from dataclasses import dataclass from enum import Enum @@ -16,16 +15,17 @@ from typing import Any from zipfile import BadZipFile -import openpyxl -import xlrd +from openpyxl import open as pyxl_open from openpyxl.cell import Cell as pyxlCell from openpyxl.reader.excel import ExcelReader from openpyxl.workbook import Workbook as pyxlWorkbook from openpyxl.worksheet.worksheet import Worksheet as pyxlWorksheet +from xlrd import XL_CELL_BOOLEAN, XL_CELL_DATE, XL_CELL_NUMBER, XLRDError +from xlrd import open_workbook as xlrd_open from xlrd.book import Book as xlrdBook from xlrd.sheet import Cell as xlrdCell from xlrd.sheet import Sheet as xlrdSheet -from xlrd.xldate import XLDateAmbiguous +from xlrd.xldate import XLDateAmbiguous, xldate_as_tuple from pyxform import constants from pyxform.errors import PyXFormError, PyXFormReadError @@ -43,10 +43,7 @@ def _list_to_dict_list(list_items): Returns a list of the created dict or an empty list """ if list_items: - k = OrderedDict() - for item in list_items: - k[str(item)] = "" - return [k] + return [{str(i): "" for i in list_items}] return [] @@ -96,7 +93,7 @@ def get_excel_rows( adjacent_empty_rows = 0 result_rows = [] for row_n, row in enumerate(rows): - row_dict = OrderedDict() + row_dict = {} for col_n, key in col_header_enum: if key is None: continue @@ -168,7 +165,7 @@ def clean_func(cell: xlrdCell, row_n: int, col_key: str) -> str | None: return rows, _list_to_dict_list(column_header_list) def process_workbook(wb: xlrdBook): - result_book = OrderedDict() + result_book = {} for wb_sheet in wb.sheets(): # Note that the sheet exists but do no further processing here. result_book[wb_sheet.name] = [] @@ -190,12 +187,12 @@ def process_workbook(wb: xlrdBook): try: wb_file = get_definition_data(definition=path_or_file) - workbook = xlrd.open_workbook(file_contents=wb_file.data.getvalue()) + workbook = xlrd_open(file_contents=wb_file.data.getvalue()) try: return process_workbook(wb=workbook) finally: workbook.release_resources() - except (AttributeError, TypeError, xlrd.XLRDError) as read_err: + except (AttributeError, TypeError, XLRDError) as read_err: raise PyXFormReadError(f"Error reading .xls file: {read_err}") from read_err @@ -203,21 +200,21 @@ def xls_value_to_unicode(value, value_type, datemode) -> str: """ Take a xls formatted value and try to make a unicode string representation. """ - if value_type == xlrd.XL_CELL_BOOLEAN: + if value_type == XL_CELL_BOOLEAN: return "TRUE" if value else "FALSE" - elif value_type == xlrd.XL_CELL_NUMBER: + elif value_type == XL_CELL_NUMBER: # Try to display as an int if possible. int_value = int(value) if int_value == value: return str(int_value) else: return str(value) - elif value_type is xlrd.XL_CELL_DATE: + elif value_type is XL_CELL_DATE: # Warn that it is better to single quote as a string. # error_location = cellFormatString % (ss_row_idx, ss_col_idx) # raise Exception( # "Cannot handle excel formatted date at " + error_location) - datetime_or_time_only = xlrd.xldate_as_tuple(value, datemode) + datetime_or_time_only = xldate_as_tuple(value, datemode) if datetime_or_time_only[:3] == (0, 0, 0): # must be time only return str(datetime.time(*datetime_or_time_only[3:])) @@ -258,7 +255,7 @@ def xlsx_to_dict_normal_sheet(sheet: pyxlWorksheet): return rows, _list_to_dict_list(column_header_list) def process_workbook(wb: pyxlWorkbook): - result_book = OrderedDict() + result_book = {} for sheetname in wb.sheetnames: wb_sheet = wb[sheetname] # Note that the sheet exists but do no further processing here. @@ -372,7 +369,7 @@ def first_column_as_sheet_name(row): return s_or_c, content def process_csv_data(rd): - _dict = OrderedDict() + _dict = {} sheet_name = None current_headers = None for row in rd: @@ -387,7 +384,7 @@ def process_csv_data(rd): current_headers = content _dict[f"{sheet_name}_header"] = _list_to_dict_list(current_headers) else: - _d = OrderedDict() + _d = {} for key, val in zip(current_headers, content, strict=False): if val != "": # Slight modification so values are striped @@ -468,10 +465,10 @@ def sheet_to_csv(workbook_path, csv_path, sheet_name): def xls_sheet_to_csv(workbook_path, csv_path, sheet_name): - wb = xlrd.open_workbook(workbook_path) + wb = xlrd_open(workbook_path) try: sheet = wb.sheet_by_name(sheet_name) - except xlrd.biffh.XLRDError: + except XLRDError: return False if not sheet or sheet.nrows < 2: return False @@ -497,7 +494,7 @@ def xls_sheet_to_csv(workbook_path, csv_path, sheet_name): def xlsx_sheet_to_csv(workbook_path, csv_path, sheet_name): - wb = openpyxl.open(workbook_path, read_only=True, data_only=True) + wb = pyxl_open(workbook_path, read_only=True, data_only=True) try: sheet = wb[sheet_name] except KeyError: From 54c7d286fb170139799367429d219268e0b55e9b Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Sat, 23 Nov 2024 15:03:58 +1100 Subject: [PATCH 06/15] fix: simplify choices validation in workbook_to_json - pass choices info to new function so that the variables needed are in a different function scope: - peak memory usage: fewer live objects until workbook_to_json returns - debugging: many variables in workbook_to_json already --- pyxform/validators/pyxform/choices.py | 21 ++++++++++++++++++++- pyxform/xls2json.py | 17 ++++++----------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/pyxform/validators/pyxform/choices.py b/pyxform/validators/pyxform/choices.py index c97298e7..3b347c35 100644 --- a/pyxform/validators/pyxform/choices.py +++ b/pyxform/validators/pyxform/choices.py @@ -37,7 +37,7 @@ def check(): return list(check()) -def validate_choices( +def validate_choice_list( options: list[dict], warnings: list[str], allow_duplicates: bool = False ) -> None: seen_options = set() @@ -57,3 +57,22 @@ def validate_choices( if 0 < len(duplicate_errors): raise PyXFormError("\n".join(duplicate_errors)) + + +def validate_choices( + choices: dict[str, list[dict]], + warnings: list[str], + headers: tuple[tuple[str, ...], ...], + allow_duplicates: bool = False, +): + invalid_headers = validate_headers(headers, warnings) + for options in choices.values(): + validate_choice_list( + options=options, + warnings=warnings, + allow_duplicates=allow_duplicates, + ) + for option in options: + for invalid_header in invalid_headers: + option.pop(invalid_header, None) + del option["__row"] diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py index 7e4e6ebe..90699c6e 100644 --- a/pyxform/xls2json.py +++ b/pyxform/xls2json.py @@ -538,20 +538,15 @@ def workbook_to_json( # columns is run with Survey sheet below. # Warn and remove invalid headers in case the form uses headers for notes. - invalid_headers = vc.validate_headers(choices_sheet.headers, warnings) allow_duplicates = aliases.yes_no.get( settings.get("allow_choice_duplicates", False), False ) - for options in choices.values(): - vc.validate_choices( - options=options, - warnings=warnings, - allow_duplicates=allow_duplicates, - ) - for option in options: - for invalid_header in invalid_headers: - option.pop(invalid_header, None) - del option["__row"] + vc.validate_choices( + choices=choices, + warnings=warnings, + headers=choices_sheet.headers, + allow_duplicates=allow_duplicates, + ) if 0 < len(choices): json_dict[constants.CHOICES] = choices From 24e2ecc8ce9687c50f0201a741842ed0b96d3a7f Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Mon, 25 Nov 2024 11:47:09 +1100 Subject: [PATCH 07/15] fix: when testing use converted xform xml instead of converting again - also fix xpath_count test failure message (copypasta from xpath_match) --- tests/pyxform_test_case.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/pyxform_test_case.py b/tests/pyxform_test_case.py index df2fb5c7..5c349128 100644 --- a/tests/pyxform_test_case.py +++ b/tests/pyxform_test_case.py @@ -161,12 +161,14 @@ def assertPyxformXform( if survey is None: result = convert( xlsform=coalesce(md, ss_structure), + pretty_print=True, form_name=coalesce(name, "test_name"), warnings=warnings, ) survey = result._survey - - xml = survey._to_pretty_xml() + xml = result.xform + else: + xml = survey._to_pretty_xml() root = etree.fromstring(xml.encode("utf-8")) # Ensure all namespaces are present, even if unused @@ -428,7 +430,7 @@ def assert_xpath_count( xpath=xpath, ) msg = ( - f"XPath found no matches (test case {case_num}):\n{xpath}" + f"XPath did not find the expected number of matches (test case {case_num}):\n{xpath}" f"\n\nXForm content:\n{matcher_context.content_str}" ) self.assertEqual(expected, len(observed), msg=msg) From 2a4ba80fd4af2bc61072d83e8497796f46af0405 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Mon, 25 Nov 2024 13:58:29 +1100 Subject: [PATCH 08/15] fix: iana subtag language lookup optimisations - previous solution loaded all 8138 options into a list by default then ran a membership check (O(n)) to search for each language. - optimisations: - if the language name is "default", or is too short to match the language code regex, then skip executing the regex. - read the code list data at most once, only if needed. - put the code strings into a set for faster membership check (O(1)). - split the code list into the shorter list of 2-character codes (e.g. en, fr, de), and check that first. Assuming these shorter codes are more likely to be used, it is faster to check and uses less memory than loading the full list (8KB short vs 525KB). - best case: default, invalid, or short language codes get faster lookup with less memory used than before. - worst case: longer language codes get faster lookup with the same memory used as before. --- pyxform/survey.py | 2 +- pyxform/utils.py | 22 -- .../pyxform/iana_subtags/__init__.py | 0 .../iana_subtags_2_characters.txt | 190 ++++++++++++++++++ .../iana_subtags_3_or_more_characters.txt} | 190 ------------------ .../pyxform/iana_subtags/validation.py | 37 ++++ 6 files changed, 228 insertions(+), 213 deletions(-) create mode 100644 pyxform/validators/pyxform/iana_subtags/__init__.py create mode 100644 pyxform/validators/pyxform/iana_subtags/iana_subtags_2_characters.txt rename pyxform/{iana_subtags.txt => validators/pyxform/iana_subtags/iana_subtags_3_or_more_characters.txt} (98%) create mode 100644 pyxform/validators/pyxform/iana_subtags/validation.py diff --git a/pyxform/survey.py b/pyxform/survey.py index 5cc5c873..ddcc5d28 100644 --- a/pyxform/survey.py +++ b/pyxform/survey.py @@ -27,11 +27,11 @@ LAST_SAVED_REGEX, DetachableElement, PatchedText, - get_languages_with_bad_tags, has_dynamic_label, node, ) from pyxform.validators import enketo_validate, odk_validate +from pyxform.validators.pyxform.iana_subtags.validation import get_languages_with_bad_tags RE_BRACKET = re.compile(r"\[([^]]+)\]") RE_FUNCTION_ARGS = re.compile(r"\b[^()]+\((.*)\)$") diff --git a/pyxform/utils.py b/pyxform/utils.py index c417d8db..4399a298 100644 --- a/pyxform/utils.py +++ b/pyxform/utils.py @@ -5,7 +5,6 @@ import copy import csv import json -import os import re from collections.abc import Generator from io import StringIO @@ -218,27 +217,6 @@ def has_external_choices(json_struct): return False -def get_languages_with_bad_tags(languages): - """ - Returns languages with invalid or missing IANA subtags. - """ - path = os.path.join(os.path.dirname(__file__), "iana_subtags.txt") - with open(path, encoding="utf-8") as f: - iana_subtags = f.read().splitlines() - - lang_code_regex = re.compile(r"\((.*)\)$") - - languages_with_bad_tags = [] - for lang in languages: - lang_code = re.search(lang_code_regex, lang) - - if lang != "default" and ( - not lang_code or lang_code.group(1) not in iana_subtags - ): - languages_with_bad_tags.append(lang) - return languages_with_bad_tags - - def default_is_dynamic(element_default, element_type=None): """ Returns true if the default value is a dynamic value. diff --git a/pyxform/validators/pyxform/iana_subtags/__init__.py b/pyxform/validators/pyxform/iana_subtags/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pyxform/validators/pyxform/iana_subtags/iana_subtags_2_characters.txt b/pyxform/validators/pyxform/iana_subtags/iana_subtags_2_characters.txt new file mode 100644 index 00000000..80e1e431 --- /dev/null +++ b/pyxform/validators/pyxform/iana_subtags/iana_subtags_2_characters.txt @@ -0,0 +1,190 @@ +aa +ab +ae +af +ak +am +an +ar +as +av +ay +az +ba +be +bg +bh +bi +bm +bn +bo +br +bs +ca +ce +ch +co +cr +cs +cu +cv +cy +da +de +dv +dz +ee +el +en +eo +es +et +eu +fa +ff +fi +fj +fo +fr +fy +ga +gd +gl +gn +gu +gv +ha +he +hi +ho +hr +ht +hu +hy +hz +ia +id +ie +ig +ii +ik +in +io +is +it +iu +iw +ja +ji +jv +jw +ka +kg +ki +kj +kk +kl +km +kn +ko +kr +ks +ku +kv +kw +ky +la +lb +lg +li +ln +lo +lt +lu +lv +mg +mh +mi +mk +ml +mn +mo +mr +ms +mt +my +na +nb +nd +ne +ng +nl +nn +no +nr +nv +ny +oc +oj +om +or +os +pa +pi +pl +ps +pt +qu +rm +rn +ro +ru +rw +sa +sc +sd +se +sg +sh +si +sk +sl +sm +sn +so +sq +sr +ss +st +su +sv +sw +ta +te +tg +th +ti +tk +tl +tn +to +tr +ts +tt +tw +ty +ug +uk +ur +uz +ve +vi +vo +wa +wo +xh +yi +yo +za +zh +zu \ No newline at end of file diff --git a/pyxform/iana_subtags.txt b/pyxform/validators/pyxform/iana_subtags/iana_subtags_3_or_more_characters.txt similarity index 98% rename from pyxform/iana_subtags.txt rename to pyxform/validators/pyxform/iana_subtags/iana_subtags_3_or_more_characters.txt index 6d1ca078..96a44749 100644 --- a/pyxform/iana_subtags.txt +++ b/pyxform/validators/pyxform/iana_subtags/iana_subtags_3_or_more_characters.txt @@ -1,193 +1,3 @@ -aa -ab -ae -af -ak -am -an -ar -as -av -ay -az -ba -be -bg -bh -bi -bm -bn -bo -br -bs -ca -ce -ch -co -cr -cs -cu -cv -cy -da -de -dv -dz -ee -el -en -eo -es -et -eu -fa -ff -fi -fj -fo -fr -fy -ga -gd -gl -gn -gu -gv -ha -he -hi -ho -hr -ht -hu -hy -hz -ia -id -ie -ig -ii -ik -in -io -is -it -iu -iw -ja -ji -jv -jw -ka -kg -ki -kj -kk -kl -km -kn -ko -kr -ks -ku -kv -kw -ky -la -lb -lg -li -ln -lo -lt -lu -lv -mg -mh -mi -mk -ml -mn -mo -mr -ms -mt -my -na -nb -nd -ne -ng -nl -nn -no -nr -nv -ny -oc -oj -om -or -os -pa -pi -pl -ps -pt -qu -rm -rn -ro -ru -rw -sa -sc -sd -se -sg -sh -si -sk -sl -sm -sn -so -sq -sr -ss -st -su -sv -sw -ta -te -tg -th -ti -tk -tl -tn -to -tr -ts -tt -tw -ty -ug -uk -ur -uz -ve -vi -vo -wa -wo -xh -yi -yo -za -zh -zu aaa aab aac diff --git a/pyxform/validators/pyxform/iana_subtags/validation.py b/pyxform/validators/pyxform/iana_subtags/validation.py new file mode 100644 index 00000000..4cd36842 --- /dev/null +++ b/pyxform/validators/pyxform/iana_subtags/validation.py @@ -0,0 +1,37 @@ +import re +from functools import lru_cache +from pathlib import Path + +LANG_CODE_REGEX = re.compile(r"\((.*)\)$") +HERE = Path(__file__).parent + + +@lru_cache(maxsize=2) # Expecting to read 2 files. +def read_tags(file_name: str) -> set[str]: + path = HERE / file_name + with open(path, encoding="utf-8") as f: + return {line.strip() for line in f} + + +def get_languages_with_bad_tags(languages): + """ + Returns languages with invalid or missing IANA subtags. + """ + languages_with_bad_tags = [] + for lang in languages: + # Minimum matchable lang code attempt requires 3 characters e.g. "a()". + if lang == "default" or len(lang) < 3: + continue + lang_code = LANG_CODE_REGEX.search(lang) + if not lang_code: + languages_with_bad_tags.append(lang) + else: + lang_match = lang_code.group(1) + # Check the short list first: 190 short codes vs 7948 long codes. + if lang_match in read_tags("iana_subtags_2_characters.txt"): + continue + elif lang_match in read_tags("iana_subtags_3_or_more_characters.txt"): + continue + else: + languages_with_bad_tags.append(lang) + return languages_with_bad_tags From 48b396971aa2722588da1511ef1c60cf8bef7eb1 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Mon, 25 Nov 2024 16:29:23 +1100 Subject: [PATCH 09/15] fix: generate dynamic default setvalues instead of accumulating to list - avoids creation of an intermediate list per repeat section, and calls to list.append() for each node - avoids iterating through "children" elements that are not part of a section, e.g. Options of a Question --- pyxform/section.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/pyxform/section.py b/pyxform/section.py index 7e8635ae..b91fea59 100644 --- a/pyxform/section.py +++ b/pyxform/section.py @@ -2,12 +2,13 @@ Section survey element module. """ +from collections.abc import Generator from typing import TYPE_CHECKING from pyxform.errors import PyXFormError from pyxform.external_instance import ExternalInstance from pyxform.survey_element import SurveyElement -from pyxform.utils import node +from pyxform.utils import DetachableElement, node if TYPE_CHECKING: from pyxform.survey import Survey @@ -121,9 +122,7 @@ def xml_control(self, survey: "Survey"): for n in Section.xml_control(self, survey=survey): repeat_node.appendChild(n) - setvalue_nodes = self._get_setvalue_nodes_for_dynamic_defaults(survey=survey) - - for setvalue_node in setvalue_nodes: + for setvalue_node in self._dynamic_defaults_helper(current=self, survey=survey): repeat_node.appendChild(setvalue_node) label = self.xml_label(survey=survey) @@ -132,20 +131,19 @@ def xml_control(self, survey: "Survey"): return node("group", repeat_node, ref=self.get_xpath(), **self.control) # Get setvalue nodes for all descendants of this repeat that have dynamic defaults and aren't nested in other repeats. - def _get_setvalue_nodes_for_dynamic_defaults(self, survey: "Survey"): - setvalue_nodes = [] - self._dynamic_defaults_helper(current=self, nodes=setvalue_nodes, survey=survey) - return setvalue_nodes - - def _dynamic_defaults_helper(self, current: "Section", nodes: list, survey: "Survey"): + def _dynamic_defaults_helper( + self, current: "Section", survey: "Survey" + ) -> Generator[DetachableElement, None, None]: + if not isinstance(current, Section): + return for e in current.children: if e.type != "repeat": # let nested repeats handle their own defaults dynamic_default = e.get_setvalue_node_for_dynamic_default( in_repeat=True, survey=survey ) if dynamic_default: - nodes.append(dynamic_default) - self._dynamic_defaults_helper(current=e, nodes=nodes, survey=survey) + yield dynamic_default + yield from self._dynamic_defaults_helper(current=e, survey=survey) # I'm anal about matching function signatures when overriding a function, # but there's no reason for kwargs to be an argument From 18c5649f9d83c9ed36b58f8ecd0647917ca59b32 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Sat, 30 Nov 2024 19:30:35 +1100 Subject: [PATCH 10/15] fix: change constants sequences to sets for faster lookup - the order does not seem to be significant in any usages - rather these variables are used for membership checks e.g. to validate input. In which case the set lookup is O(1) vs. O(N) which can be significant for large forms, when each question is checked against one or more of these collections, potentially more than once. --- pyxform/constants.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pyxform/constants.py b/pyxform/constants.py index bd569af5..21d59bdb 100644 --- a/pyxform/constants.py +++ b/pyxform/constants.py @@ -87,17 +87,17 @@ NAMESPACES = "namespaces" # The following are the possible sheet names: -SUPPORTED_SHEET_NAMES = [ +SUPPORTED_SHEET_NAMES = { SURVEY, CHOICES, SETTINGS, EXTERNAL_CHOICES, OSM, ENTITIES, -] -XLS_EXTENSIONS = [".xls"] -XLSX_EXTENSIONS = [".xlsx", ".xlsm"] -SUPPORTED_FILE_EXTENSIONS = XLS_EXTENSIONS + XLSX_EXTENSIONS +} +XLS_EXTENSIONS = {".xls"} +XLSX_EXTENSIONS = {".xlsx", ".xlsm"} +SUPPORTED_FILE_EXTENSIONS = {*XLS_EXTENSIONS, *XLSX_EXTENSIONS} LOCATION_PRIORITY = "location-priority" LOCATION_MIN_INTERVAL = "location-min-interval" @@ -107,7 +107,7 @@ TRACK_CHANGES_REASONS = "track-changes-reasons" # supported bind keywords for which external instances will be created for pulldata function -EXTERNAL_INSTANCES = ["calculate", "constraint", "readonly", "required", "relevant"] +EXTERNAL_INSTANCES = {"calculate", "constraint", "readonly", "required", "relevant"} # The ODK XForms version that generated forms comply to CURRENT_XFORMS_VERSION = "1.0.0" @@ -130,14 +130,14 @@ class EntityColumns(StrEnum): OFFLINE = "offline" -DEPRECATED_DEVICE_ID_METADATA_FIELDS = ["subscriberid", "simserial"] +DEPRECATED_DEVICE_ID_METADATA_FIELDS = {"subscriberid", "simserial"} AUDIO_QUALITY_VOICE_ONLY = "voice-only" AUDIO_QUALITY_LOW = "low" AUDIO_QUALITY_NORMAL = "normal" AUDIO_QUALITY_EXTERNAL = "external" -EXTERNAL_INSTANCE_EXTENSIONS = [".xml", ".csv", ".geojson"] +EXTERNAL_INSTANCE_EXTENSIONS = {".xml", ".csv", ".geojson"} EXTERNAL_CHOICES_ITEMSET_REF_LABEL = "label" EXTERNAL_CHOICES_ITEMSET_REF_VALUE = "name" @@ -153,13 +153,13 @@ class EntityColumns(StrEnum): "becomes '_setting'." ) -CONVERTIBLE_BIND_ATTRIBUTES = ( +CONVERTIBLE_BIND_ATTRIBUTES = { "readonly", "required", "relevant", "constraint", "calculate", -) +} NSMAP = { "xmlns": "http://www.w3.org/2002/xforms", "xmlns:h": "http://www.w3.org/1999/xhtml", From f1c47f514f06432ce9e2939bd087825bbbffa90e Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Sat, 30 Nov 2024 19:39:57 +1100 Subject: [PATCH 11/15] fix: utils performance improvements - in writexml(), use `any()` to avoid evaluating the whole sequence since it only matters if there is 1 or more NODE_TYPE_TEXT. - in node() - use a set for faster lookup of blocked_attributes (it's 1 element but in case more are added it could stay O(1)) - use a tuple for `unicode_args` for slightly less memory - remove unnecessary `iter()` on `kwargs.items()` - use one f-string for the parse string rather than concat 7 items - skip appendChild for `None` elements --- pyxform/utils.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/pyxform/utils.py b/pyxform/utils.py index 4399a298..dcf4414b 100644 --- a/pyxform/utils.py +++ b/pyxform/utils.py @@ -58,7 +58,7 @@ def writexml(self, writer, indent="", addindent="", newl=""): if self.childNodes: writer.write(">") # For text or mixed content, write without adding indents or newlines. - if 0 < len([c for c in self.childNodes if c.nodeType in NODE_TYPE_TEXT]): + if any(c.nodeType in NODE_TYPE_TEXT for c in self.childNodes): # Conditions to match old Survey.py regex for remaining whitespace. child_nodes = len(self.childNodes) for idx, cnode in enumerate(self.childNodes): @@ -94,32 +94,24 @@ def node(*args, **kwargs) -> DetachableElement: kwargs -- attributes returns a xml.dom.minidom.Element """ - blocked_attributes = ["tag"] + blocked_attributes = {"tag"} tag = args[0] if len(args) > 0 else kwargs["tag"] args = args[1:] result = DetachableElement(tag) - unicode_args = [u for u in args if isinstance(u, str)] + unicode_args = tuple(u for u in args if isinstance(u, str)) if len(unicode_args) > 1: raise PyXFormError("""Invalid value for `unicode_args`.""") parsed_string = False # Convert the kwargs xml attribute dictionary to a xml.dom.minidom.Element. - for k, v in iter(kwargs.items()): + for k, v in kwargs.items(): if k in blocked_attributes: continue if k == "toParseString": if v is True and len(unicode_args) == 1: parsed_string = True # Add this header string so parseString can be used? - s = ( - '<' - + tag - + ">" - + unicode_args[0] - + "" - ) + s = f"""<{tag}>{unicode_args[0]}""" parsed_node = parseString(s.encode("utf-8")).documentElement # Move node's children to the result Element # discarding node's root @@ -141,7 +133,8 @@ def node(*args, **kwargs) -> DetachableElement: result.appendChild(text_node) elif isinstance(n, Generator): for e in n: - result.appendChild(e) + if e is not None: + result.appendChild(e) elif not isinstance(n, str): result.appendChild(n) return result From 028f26e93d6b676d6d95e0b8df5c11ab721f7b47 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Sat, 30 Nov 2024 19:41:07 +1100 Subject: [PATCH 12/15] add: show the expected number of matches in PyxformTestCase xpath_count --- tests/pyxform_test_case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pyxform_test_case.py b/tests/pyxform_test_case.py index 5c349128..0d9f3e63 100644 --- a/tests/pyxform_test_case.py +++ b/tests/pyxform_test_case.py @@ -430,7 +430,7 @@ def assert_xpath_count( xpath=xpath, ) msg = ( - f"XPath did not find the expected number of matches (test case {case_num}):\n{xpath}" + f"XPath did not find the expected number of matches ({expected}, test case {case_num}):\n{xpath}" f"\n\nXForm content:\n{matcher_context.content_str}" ) self.assertEqual(expected, len(observed), msg=msg) From ff992795c21f9fd8ceffd07c1aaa4fb16e52a894 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Sat, 30 Nov 2024 19:48:54 +1100 Subject: [PATCH 13/15] add: add missing test cases, other minor tests improvements - test_xform_conversion.py: remove unnecessary warnings arg - test_builder.py: - remove self.maxDiff=None since this is done at the class level - swap AssertEqual arg order since first arg referred to as the "expected" value in test failure messages. - test_choices_sheet.py: remove unnecessary position() assertion - test_fieldlist_labels.py: add appearance assertion since this does not otherwise seem to be tested for groups - test_fields.py: remove debug setting - test_groups.py: add test for group relevance (no other tests for this) - test_image_app_parameter.py: add assertion for appearance, since this does not otherwise seem to be tested for questions - test_survey.py: add test for autoplay setting - test_translations.py: fix typo in label translation header --- tests/test_builder.py | 14 +++++-------- tests/test_choices_sheet.py | 8 +++---- tests/test_fieldlist_labels.py | 7 +++++++ tests/test_fields.py | 1 - tests/test_groups.py | 21 +++++++++++++++++++ tests/test_image_app_parameter.py | 4 ++-- tests/test_survey.py | 16 ++++++++++++++ tests/test_translations.py | 6 +++--- .../xform_test_case/test_xform_conversion.py | 4 ++-- 9 files changed, 60 insertions(+), 21 deletions(-) diff --git a/tests/test_builder.py b/tests/test_builder.py index 83e10372..ee27322a 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -113,8 +113,7 @@ def test_create_table_from_dict(self): }, ], } - - self.assertEqual(g.to_json_dict(), expected_dict) + self.assertEqual(expected_dict, g.to_json_dict()) def test_specify_other(self): survey = utils.create_survey_from_fixture( @@ -169,7 +168,6 @@ def test_specify_other(self): }, ], } - self.maxDiff = None self.assertEqual(survey.to_json_dict(), expected_dict) def test_select_one_question_with_identical_choice_name(self): @@ -211,8 +209,7 @@ def test_select_one_question_with_identical_choice_name(self): }, ], } - self.maxDiff = None - self.assertEqual(survey.to_json_dict(), expected_dict) + self.assertEqual(expected_dict, survey.to_json_dict()) def test_loop(self): survey = utils.create_survey_from_fixture("loop", filetype=FIXTURE_FILETYPE) @@ -351,8 +348,7 @@ def test_loop(self): }, ], } - self.maxDiff = None - self.assertEqual(survey.to_json_dict(), expected_dict) + self.assertEqual(expected_dict, survey.to_json_dict()) def test_sms_columns(self): survey = utils.create_survey_from_fixture("sms_info", filetype=FIXTURE_FILETYPE) @@ -502,7 +498,7 @@ def test_sms_columns(self): ], }, } - self.assertEqual(survey.to_json_dict(), expected_dict) + self.assertEqual(expected_dict, survey.to_json_dict()) def test_style_column(self): survey = utils.create_survey_from_fixture( @@ -541,7 +537,7 @@ def test_style_column(self): "title": "My Survey", "type": "survey", } - self.assertEqual(survey.to_json_dict(), expected_dict) + self.assertEqual(expected_dict, survey.to_json_dict()) STRIP_NS_FROM_TAG_RE = re.compile(r"\{.+\}") diff --git a/tests/test_choices_sheet.py b/tests/test_choices_sheet.py index 084b9b56..6ace4832 100644 --- a/tests/test_choices_sheet.py +++ b/tests/test_choices_sheet.py @@ -121,14 +121,14 @@ def test_choices_extra_columns_output_order_matches_xlsform(self): xml__xpath_match=[ """ /h:html/h:head/x:model/x:instance[@id='choices']/x:root/x:item[ - ./x:name = ./x:*[position() = 1 and text() = '1'] - and ./x:geometry = ./x:*[position() = 2 and text() = '46.5841618 7.0801379 0 0'] + ./x:name = ./x:*[text() = '1'] + and ./x:geometry = ./x:*[text() = '46.5841618 7.0801379 0 0'] ] """, """ /h:html/h:head/x:model/x:instance[@id='choices']/x:root/x:item[ - ./x:name = ./x:*[position() = 1 and text() = '2'] - and ./x:geometry = ./x:*[position() = 2 and text() = '35.8805082 76.515057 0 0'] + ./x:name = ./x:*[text() = '2'] + and ./x:geometry = ./x:*[text() = '35.8805082 76.515057 0 0'] ] """, ], diff --git a/tests/test_fieldlist_labels.py b/tests/test_fieldlist_labels.py index 469fd3a4..a824791d 100644 --- a/tests/test_fieldlist_labels.py +++ b/tests/test_fieldlist_labels.py @@ -44,6 +44,13 @@ def test_unlabeled_group_fieldlist(self): | | end_group | | | | """, warnings_count=0, + xml__xpath_match=[ + """ + /h:html/h:body/x:group[ + @ref = '/test_name/my-group' and @appearance='field-list' + ] + """ + ], ) def test_unlabeled_group_fieldlist_alternate_syntax(self): diff --git a/tests/test_fields.py b/tests/test_fields.py index 55591535..a3d679aa 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -80,7 +80,6 @@ def test_multiple_duplicate_choices_without_setting(self): vc.INVALID_DUPLICATE.format(row=3), vc.INVALID_DUPLICATE.format(row=5), ], - debug=True, ) def test_duplicate_choices_with_setting_not_set_to_yes(self): diff --git a/tests/test_groups.py b/tests/test_groups.py index 1d8d6efe..db44fb2b 100644 --- a/tests/test_groups.py +++ b/tests/test_groups.py @@ -50,3 +50,24 @@ def test_group_intent(self): '' # nopep8 ], ) + + def test_group_relevant_included_in_bind(self): + """Should find the group relevance expression in the group binding.""" + md = """ + | survey | + | | type | name | label | relevant | + | | integer | q1 | Q1 | | + | | begin group | g1 | G1 | ${q1} = 1 | + | | text | q2 | Q2 | | + | | end group | | | | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + """ + /h:html/h:head/x:model/x:bind[ + @nodeset = '/test_name/g1' and @relevant=' /test_name/q1 = 1' + ] + """ + ], + ) diff --git a/tests/test_image_app_parameter.py b/tests/test_image_app_parameter.py index 8d65db32..3d8006e4 100644 --- a/tests/test_image_app_parameter.py +++ b/tests/test_image_app_parameter.py @@ -88,7 +88,7 @@ def test_ignoring_invalid_android_package_name_with_not_supported_appearances( name="data", md=md.format(case=case), xml__xpath_match=[ - "/h:html/h:body/x:upload[not(@intent) and @mediatype='image/*' and @ref='/data/my_image']" + f"/h:html/h:body/x:upload[not(@intent) and @mediatype='image/*' and @ref='/data/my_image' and @appearance='{case}']" ], ) @@ -105,7 +105,7 @@ def test_ignoring_android_package_name_in_image_with_not_supported_appearances(s name="data", md=md.format(case=case), xml__xpath_match=[ - "/h:html/h:body/x:upload[not(@intent) and @mediatype='image/*' and @ref='/data/my_image']" + f"/h:html/h:body/x:upload[not(@intent) and @mediatype='image/*' and @ref='/data/my_image' and @appearance='{case}']" ], ) diff --git a/tests/test_survey.py b/tests/test_survey.py index cd2bec18..d38f3f82 100644 --- a/tests/test_survey.py +++ b/tests/test_survey.py @@ -49,3 +49,19 @@ def test_many_xpath_references_do_not_hit_64_recursion_limit__many_to_many(self) n="\n".join(tmpl_n.format(i) for i in range(1, 250)), ), ) + + def test_autoplay_attribute_added_to_question_body_control(self): + """Should add the autoplay attribute when specified for a question.""" + md = """ + | survey | + | | type | name | label | audio | autoplay | + | | text | feel | Song feel? | amazing.mp3 | audio | + """ + self.assertPyxformXform( + md=md, + xml__xpath_match=[ + """ + /h:html/h:body/x:input[@ref='/test_name/feel' and @autoplay='audio'] + """ + ], + ) diff --git a/tests/test_translations.py b/tests/test_translations.py index 1f891ef3..b644dde6 100644 --- a/tests/test_translations.py +++ b/tests/test_translations.py @@ -1711,9 +1711,9 @@ def test_specify_other__with_translations_only__missing_first_translation(self): """Should add an "other" choice to the itemset instance and an itext label.""" # xls2json validation would raise an error if a choice has no label at all. md = """ - | survey | | | | | | - | | type | name | label | label::eng | label:fr | - | | select_one c1 or_other | q1 | Question 1 | Question A | QA fr | + | survey | | | | | | + | | type | name | label | label::eng | label::fr | + | | select_one c1 or_other | q1 | Question 1 | Question A | QA fr | | choices | | | | | | | | list name | name | label | label::eng | label::fr | | | c1 | na | la | la-e | la-f | diff --git a/tests/xform_test_case/test_xform_conversion.py b/tests/xform_test_case/test_xform_conversion.py index 95a0d9eb..8e67451e 100644 --- a/tests/xform_test_case/test_xform_conversion.py +++ b/tests/xform_test_case/test_xform_conversion.py @@ -35,8 +35,8 @@ def test_conversion_vs_expected(self): ) xlsform = Path(self.path_to_excel_file) if set_name: - result = convert(xlsform=xlsform, warnings=[], form_name=xlsform.stem) + result = convert(xlsform=xlsform, form_name=xlsform.stem) else: - result = convert(xlsform=xlsform, warnings=[]) + result = convert(xlsform=xlsform) with open(expected_output_path, encoding="utf-8") as expected: self.assertXFormEqual(expected.read(), result.xform) From 61c4df1e9f984e176637c251edb6c05fe122b1ae Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Sat, 30 Nov 2024 20:01:38 +1100 Subject: [PATCH 14/15] chg: expression parsing optimisation - usage of lru_cache on `parse_expression` helps performance but it seems to be a diminishing return for cache sizes > 128, and the memory used by the cached strings and ExpressionLexerTokens can become significant if there are lots of long strings being parsed - added option to get the parsed token type only, since calls through `is_single_token_expression` only care about the token type - for token type checks, ignore empty strings or strings that would be to short to be that token type --- pyxform/parsing/expression.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pyxform/parsing/expression.py b/pyxform/parsing/expression.py index 32c6509b..335864e6 100644 --- a/pyxform/parsing/expression.py +++ b/pyxform/parsing/expression.py @@ -3,7 +3,7 @@ from functools import lru_cache -def get_expression_lexer() -> re.Scanner: +def get_expression_lexer(name_only: bool = False) -> re.Scanner: """ Get a expression lexer (scanner) for parsing. """ @@ -61,7 +61,9 @@ def get_expression_lexer() -> re.Scanner: } def get_tokenizer(name): - def tokenizer(scan, value): + def tokenizer(scan, value) -> ExpLexerToken | str: + if name_only: + return name return ExpLexerToken(name, value, scan.match.start(), scan.match.end()) return tokenizer @@ -84,9 +86,10 @@ def __init__(self, name: str, value: str, start: int, end: int) -> None: _EXPRESSION_LEXER = get_expression_lexer() +_TOKEN_NAME_LEXER = get_expression_lexer(name_only=True) -@lru_cache(maxsize=1024) +@lru_cache(maxsize=128) def parse_expression(text: str) -> tuple[list[ExpLexerToken], str]: """ Parse an expression. @@ -104,8 +107,10 @@ def is_single_token_expression(expression: str, token_types: Iterable[str]) -> b """ Does the expression contain single token of one of the provided token types? """ - tokens, _ = parse_expression(expression.strip()) - if 1 == len(tokens) and tokens[0].name in token_types: + if not expression: + return False + tokens, _ = _TOKEN_NAME_LEXER.scan(expression.strip()) + if 1 == len(tokens) and tokens[0] in token_types: return True else: return False @@ -115,6 +120,8 @@ def is_pyxform_reference(value: str) -> bool: """ Does the input string contain only a valid Pyxform reference? e.g. ${my_question} """ + if not value or len(value) <= 3: # Needs 3 characters for "${}", plus a name inside. + return False return is_single_token_expression(expression=value, token_types=("PYXFORM_REF",)) @@ -122,4 +129,6 @@ def is_xml_tag(value: str) -> bool: """ Does the input string contain only a valid XML tag / element name? """ + if not value: + return False return is_single_token_expression(expression=value, token_types=("NAME",)) From 6918b400d3cf6c9151db2104137afe2c52dd68e4 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Sat, 30 Nov 2024 20:40:43 +1100 Subject: [PATCH 15/15] chg: performance and memory usage improvements - the main problem being addressed is that SurveyElement had a lot of fields which are not relevant to every subtype, and many of these fields are containers, which wastes time and memory. - For example an Option (which represents a choice) would get about 60 attributes (now ~8) - most of which empty strings but many are dicts or lists. For large forms this can really stack up and consume a lot of memory. A lot of code was not tolerant of None as a default value so all the fields were initialised. - The dynamic class approach would have made more sense in the earlier days of XLSForm when the columns and behaviour were changing rapidly and there were not many fields. But now there are more features, and specs, and extensive test suites that make it useful to use a more clearly typed class approach. - Having concrete types also makes development easier, by having missing attribute or type warnings. When looking at the git history, clearly it was also confusing about what is a field - for example "jr:count" (repeat_count) is part of the bind dict, not a top-level attribute of a Section. - The changes in this commit could be taken further, for example: - some SurveyElement funcs relate to certain types and could be moved to those types, or a mixin / intermediate subclass. i.e. a fair bit of awkward "hasattr" or "ininstance" remains. - the builder.py module externalises the initialisation of the SurveyElements but perhaps this could be done by SurveyElements - other issues addressed: - avoid copying dicts unnecessarily - use __slots__ on all SurveyElement types to reduce memory usage - do "or other" question/choice insertion in xls2xform instead of the builder module, so that row-specific errors can be shown, and the choice lists are finalised sooner - potential compatibility issues: - minimal tests fixes were required for these changes, so any issues would be more for projects using pyxform internal classes or APIs. - removed some SurveyElement fields that seem to not be used at all, and SurveyElement classes will not have all 60+ fields anymore. - any extra data passed in as kwargs that is not a declared attribute is put into a new `extra_data` dict, and is excluded from `to_json_dict` output. The `extra_data` gets used when specifying extra choices columns, to generate choices output for that data. --- pyxform/builder.py | 194 ++++------- pyxform/constants.py | 2 + pyxform/entities/entity_declaration.py | 18 +- pyxform/external_instance.py | 17 +- pyxform/question.py | 358 +++++++++++++++----- pyxform/section.py | 117 +++++-- pyxform/survey.py | 377 +++++++++++++++------- pyxform/survey_element.py | 303 ++++++++--------- pyxform/utils.py | 27 +- pyxform/xls2json.py | 46 ++- tests/test_builder.py | 8 +- tests/test_j2x_creation.py | 49 +-- tests/test_j2x_question.py | 6 +- tests/test_j2x_xform_build_preparation.py | 4 +- 14 files changed, 969 insertions(+), 557 deletions(-) diff --git a/pyxform/builder.py b/pyxform/builder.py index df0801fb..86675f14 100644 --- a/pyxform/builder.py +++ b/pyxform/builder.py @@ -2,10 +2,9 @@ Survey builder functionality. """ -import copy import os from collections import defaultdict -from typing import TYPE_CHECKING, Any, Union +from typing import Any from pyxform import constants as const from pyxform import file_utils, utils @@ -15,6 +14,7 @@ from pyxform.question import ( InputQuestion, MultipleChoiceQuestion, + Option, OsmUploadQuestion, Question, RangeQuestion, @@ -24,15 +24,9 @@ from pyxform.question_type_dictionary import QUESTION_TYPE_DICT from pyxform.section import GroupedSection, RepeatingSection from pyxform.survey import Survey +from pyxform.survey_element import SurveyElement from pyxform.xls2json import SurveyReader -if TYPE_CHECKING: - from pyxform.survey_element import SurveyElement - -OR_OTHER_CHOICE = { - const.NAME: "other", - const.LABEL: "Other", -} QUESTION_CLASSES = { "": Question, "action": Question, @@ -64,8 +58,6 @@ def __init__(self, **kwargs): self.setvalues_by_triggering_ref = defaultdict(list) # dictionary of setgeopoint target and value tuple indexed by triggering element self.setgeopoint_by_triggering_ref = defaultdict(list) - # For tracking survey-level choices while recursing through the survey. - self._choices: dict[str, Any] = {} def set_sections(self, sections): """ @@ -78,39 +70,28 @@ def set_sections(self, sections): self._sections = sections def create_survey_element_from_dict( - self, d: dict[str, Any], deep_copy: bool = True - ) -> Union["SurveyElement", list["SurveyElement"]]: + self, d: dict[str, Any], choices: dict[str, tuple[Option, ...]] | None = None + ) -> SurveyElement | list[SurveyElement]: """ Convert from a nested python dictionary/array structure (a json dict I call it because it corresponds directly with a json object) to a survey object :param d: data to use for constructing SurveyElements. - :param deep_copy: Take a copy.deepcopy() of the input data, to avoid changing the - original objects during processing. All methods private to this Builder class - should use "False" since the data is already copied by its public methods. """ - # TODO: ideally avoid copying entirely, but at least try to only copy once. - if deep_copy: - d = copy.deepcopy(d) - if "add_none_option" in d: self._add_none_option = d["add_none_option"] if d[const.TYPE] in SECTION_CLASSES: - if d[const.TYPE] == const.SURVEY: - self._choices = d.get(const.CHOICES, {}) - - section = self._create_section_from_dict(d) + section = self._create_section_from_dict(d=d, choices=choices) if d[const.TYPE] == const.SURVEY: section.setvalues_by_triggering_ref = self.setvalues_by_triggering_ref section.setgeopoint_by_triggering_ref = self.setgeopoint_by_triggering_ref - section.choices = self._choices return section elif d[const.TYPE] == const.LOOP: - return self._create_loop_from_dict(d) + return self._create_loop_from_dict(d=d, choices=choices) elif d[const.TYPE] == "include": section_name = d[const.NAME] if section_name not in self._sections: @@ -120,16 +101,19 @@ def create_survey_element_from_dict( self._sections.keys(), ) d = self._sections[section_name] - full_survey = self.create_survey_element_from_dict(d=d, deep_copy=False) + full_survey = self.create_survey_element_from_dict(d=d, choices=choices) return full_survey.children - elif d[const.TYPE] in ["xml-external", "csv-external"]: + elif d[const.TYPE] in {"xml-external", "csv-external"}: return ExternalInstance(**d) elif d[const.TYPE] == "entity": return EntityDeclaration(**d) else: self._save_trigger(d=d) return self._create_question_from_dict( - d, QUESTION_TYPE_DICT, self._add_none_option + d=d, + question_type_dictionary=QUESTION_TYPE_DICT, + add_none_option=self._add_none_option, + choices=choices, ) def _save_trigger(self, d: dict) -> None: @@ -149,69 +133,35 @@ def _create_question_from_dict( d: dict[str, Any], question_type_dictionary: dict[str, Any], add_none_option: bool = False, - ) -> Question | list[Question]: + choices: dict[str, tuple[Option, ...]] | None = None, + ) -> Question | tuple[Question, ...]: question_type_str = d[const.TYPE] # TODO: Keep add none option? if add_none_option and question_type_str.startswith(const.SELECT_ALL_THAT_APPLY): SurveyElementBuilder._add_none_option_to_select_all_that_apply(d) - # Handle or_other on select type questions - or_other_len = len(const.SELECT_OR_OTHER_SUFFIX) - if question_type_str.endswith(const.SELECT_OR_OTHER_SUFFIX): - question_type_str = question_type_str[: len(question_type_str) - or_other_len] - d[const.TYPE] = question_type_str - SurveyElementBuilder._add_other_option_to_multiple_choice_question(d) - return [ - SurveyElementBuilder._create_question_from_dict( - d, question_type_dictionary, add_none_option - ), - SurveyElementBuilder._create_specify_other_question_from_dict(d), - ] - question_class = SurveyElementBuilder._get_question_class( question_type_str, question_type_dictionary ) - # todo: clean up this spaghetti code - d["question_type_dictionary"] = question_type_dictionary if question_class: - return question_class(**d) - - return [] - - @staticmethod - def _add_other_option_to_multiple_choice_question(d: dict[str, Any]) -> None: - # ideally, we'd just be pulling from children - choice_list = d.get(const.CHOICES, d.get(const.CHILDREN, [])) - if len(choice_list) <= 0: - raise PyXFormError("There should be choices for this question.") - if not any(c[const.NAME] == OR_OTHER_CHOICE[const.NAME] for c in choice_list): - choice_list.append(SurveyElementBuilder._get_or_other_choice(choice_list)) + if const.CHOICES in d and choices: + return question_class( + question_type_dictionary=question_type_dictionary, + choices=choices.get(d[const.ITEMSET], d[const.CHOICES]), + **{k: v for k, v in d.items() if k != const.CHOICES}, + ) + else: + return question_class( + question_type_dictionary=question_type_dictionary, **d + ) - @staticmethod - def _get_or_other_choice( - choice_list: list[dict[str, Any]], - ) -> dict[str, str | dict]: - """ - If the choices have any translations, return an OR_OTHER choice for each lang. - """ - if any(isinstance(c.get(const.LABEL), dict) for c in choice_list): - langs = { - lang - for c in choice_list - for lang in c[const.LABEL] - if isinstance(c.get(const.LABEL), dict) - } - return { - const.NAME: OR_OTHER_CHOICE[const.NAME], - const.LABEL: {lang: OR_OTHER_CHOICE[const.LABEL] for lang in langs}, - } - return OR_OTHER_CHOICE + return () @staticmethod def _add_none_option_to_select_all_that_apply(d_copy): - choice_list = d_copy.get(const.CHOICES, d_copy.get(const.CHILDREN, [])) + choice_list = d_copy.get(const.CHOICES, d_copy.get(const.CHILDREN, ())) if len(choice_list) <= 0: raise PyXFormError("There should be choices for this question.") none_choice = {const.NAME: "none", const.LABEL: "None"} @@ -232,81 +182,65 @@ def _get_question_class(question_type_str, question_type_dictionary): and find what class it maps to going through type_dictionary -> QUESTION_CLASSES """ - question_type = question_type_dictionary.get(question_type_str, {}) - control_dict = question_type.get(const.CONTROL, {}) - control_tag = control_dict.get("tag", "") - if control_tag == "upload" and control_dict.get("mediatype") == "osm/*": - control_tag = "osm" + control_tag = "" + question_type = question_type_dictionary.get(question_type_str) + if question_type: + control_dict = question_type.get(const.CONTROL) + if control_dict: + control_tag = control_dict.get("tag") + if control_tag == "upload" and control_dict.get("mediatype") == "osm/*": + control_tag = "osm" return QUESTION_CLASSES[control_tag] - @staticmethod - def _create_specify_other_question_from_dict(d: dict[str, Any]) -> InputQuestion: - kwargs = { - const.TYPE: "text", - const.NAME: f"{d[const.NAME]}_other", - const.LABEL: "Specify other.", - const.BIND: {"relevant": f"selected(../{d[const.NAME]}, 'other')"}, - } - return InputQuestion(**kwargs) - - def _create_section_from_dict(self, d): - children = d.pop(const.CHILDREN, []) + def _create_section_from_dict( + self, d: dict[str, Any], choices: dict[str, tuple[Option, ...]] | None = None + ) -> Survey | GroupedSection | RepeatingSection: + children = d.get(const.CHILDREN) section_class = SECTION_CLASSES[d[const.TYPE]] if d[const.TYPE] == const.SURVEY and const.TITLE not in d: d[const.TITLE] = d[const.NAME] result = section_class(**d) - for child in children: - # Deep copying the child is a hacky solution to the or_other bug. - # I don't know why it works. - # And I hope it doesn't break something else. - # I think the good solution would be to rewrite this class. - survey_element = self.create_survey_element_from_dict( - d=child, deep_copy=False - ) - if child[const.TYPE].endswith(const.SELECT_OR_OTHER_SUFFIX): - select_question = survey_element[0] - itemset_choices = self._choices.get(select_question[const.ITEMSET], None) - if ( - itemset_choices is not None - and isinstance(itemset_choices, list) - and not any( - c[const.NAME] == OR_OTHER_CHOICE[const.NAME] - for c in itemset_choices + if children: + for child in children: + if isinstance(result, Survey): + survey_element = self.create_survey_element_from_dict( + d=child, choices=result.choices + ) + else: + survey_element = self.create_survey_element_from_dict( + d=child, choices=choices ) - ): - itemset_choices.append(self._get_or_other_choice(itemset_choices)) - # This is required for builder_tests.BuilderTests.test_loop to pass. - self._add_other_option_to_multiple_choice_question(d=child) - if survey_element: result.add_children(survey_element) return result - def _create_loop_from_dict(self, d): + def _create_loop_from_dict( + self, d: dict[str, Any], choices: dict[str, tuple[Option, ...]] | None = None + ): """ Takes a json_dict of "loop" type Returns a GroupedSection """ - children = d.pop(const.CHILDREN, []) - columns = d.pop(const.COLUMNS, []) + children = d.get(const.CHILDREN) result = GroupedSection(**d) # columns is a left over from when this was # create_table_from_dict, I will need to clean this up - for column_dict in columns: + for column_dict in d.get(const.COLUMNS, ()): # If this is a none option for a select all that apply # question then we should skip adding it to the result if column_dict[const.NAME] == "none": continue - column = GroupedSection(**column_dict) - for child in children: - question_dict = self._name_and_label_substitutions(child, column_dict) - question = self.create_survey_element_from_dict( - d=question_dict, deep_copy=False - ) - column.add_child(question) + column = GroupedSection(type=const.GROUP, **column_dict) + if children is not None: + for child in children: + question_dict = self._name_and_label_substitutions(child, column_dict) + question = self.create_survey_element_from_dict( + d=question_dict, choices=choices + ) + column.add_child(question) result.add_child(column) if result.name != "": return result @@ -318,7 +252,7 @@ def _create_loop_from_dict(self, d): def _name_and_label_substitutions(question_template, column_headers): # if the label in column_headers has multiple languages setup a # dictionary by language to do substitutions. - info_by_lang = {} + info_by_lang = None if isinstance(column_headers[const.LABEL], dict): info_by_lang = { lang: { @@ -335,7 +269,7 @@ def _name_and_label_substitutions(question_template, column_headers): elif isinstance(result[key], dict): result[key] = result[key].copy() for key2 in result[key].keys(): - if isinstance(column_headers[const.LABEL], dict): + if info_by_lang and isinstance(column_headers[const.LABEL], dict): result[key][key2] %= info_by_lang.get(key2, column_headers) else: result[key][key2] %= column_headers @@ -344,7 +278,7 @@ def _name_and_label_substitutions(question_template, column_headers): def create_survey_element_from_json(self, str_or_path): d = utils.get_pyobj_from_json(str_or_path) # Loading JSON creates a new dictionary structure so no need to re-copy. - return self.create_survey_element_from_dict(d=d, deep_copy=False) + return self.create_survey_element_from_dict(d=d) def create_survey_element_from_dict(d, sections=None): diff --git a/pyxform/constants.py b/pyxform/constants.py index 21d59bdb..c33323e9 100644 --- a/pyxform/constants.py +++ b/pyxform/constants.py @@ -169,3 +169,5 @@ class EntityColumns(StrEnum): "xmlns:orx": "http://openrosa.org/xforms", "xmlns:odk": "http://www.opendatakit.org/xforms", } +SUPPORTED_MEDIA_TYPES = {"image", "big-image", "audio", "video"} +OR_OTHER_CHOICE = {NAME: "other", LABEL: "Other"} diff --git a/pyxform/entities/entity_declaration.py b/pyxform/entities/entity_declaration.py index 18ae84c6..ed046003 100644 --- a/pyxform/entities/entity_declaration.py +++ b/pyxform/entities/entity_declaration.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING from pyxform import constants as const -from pyxform.survey_element import SurveyElement +from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement from pyxform.utils import node if TYPE_CHECKING: @@ -9,6 +9,11 @@ EC = const.EntityColumns +ENTITY_EXTRA_FIELDS = ( + const.TYPE, + const.PARAMETERS, +) +ENTITY_FIELDS = (*SURVEY_ELEMENT_FIELDS, *ENTITY_EXTRA_FIELDS) class EntityDeclaration(SurveyElement): @@ -29,6 +34,17 @@ class EntityDeclaration(SurveyElement): 0 1 1 error, need id to update """ + __slots__ = ENTITY_EXTRA_FIELDS + + @staticmethod + def get_slot_names() -> tuple[str, ...]: + return ENTITY_FIELDS + + def __init__(self, name: str, type: str, parameters: dict, **kwargs): + self.parameters: dict = parameters + self.type: str = type + super().__init__(name=name, **kwargs) + def xml_instance(self, **kwargs): parameters = self.get(const.PARAMETERS, {}) diff --git a/pyxform/external_instance.py b/pyxform/external_instance.py index 4688b4de..50301ddb 100644 --- a/pyxform/external_instance.py +++ b/pyxform/external_instance.py @@ -4,13 +4,28 @@ from typing import TYPE_CHECKING -from pyxform.survey_element import SurveyElement +from pyxform import constants +from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement if TYPE_CHECKING: from pyxform.survey import Survey +EXTERNAL_INSTANCE_EXTRA_FIELDS = (constants.TYPE,) +EXTERNAL_INSTANCE_FIELDS = (*SURVEY_ELEMENT_FIELDS, *EXTERNAL_INSTANCE_EXTRA_FIELDS) + + class ExternalInstance(SurveyElement): + __slots__ = EXTERNAL_INSTANCE_EXTRA_FIELDS + + @staticmethod + def get_slot_names() -> tuple[str, ...]: + return EXTERNAL_INSTANCE_FIELDS + + def __init__(self, name: str, type: str, **kwargs): + self.type: str = type + super().__init__(name=name, **kwargs) + def xml_control(self, survey: "Survey"): """ No-op since there is no associated form control to place under . diff --git a/pyxform/question.py b/pyxform/question.py index 1b7c30b4..211626d8 100644 --- a/pyxform/question.py +++ b/pyxform/question.py @@ -3,8 +3,11 @@ """ import os.path +from collections.abc import Iterable +from itertools import chain from typing import TYPE_CHECKING +from pyxform import constants from pyxform.constants import ( EXTERNAL_CHOICES_ITEMSET_REF_LABEL, EXTERNAL_CHOICES_ITEMSET_REF_LABEL_GEOJSON, @@ -14,7 +17,7 @@ ) from pyxform.errors import PyXFormError from pyxform.question_type_dictionary import QUESTION_TYPE_DICT -from pyxform.survey_element import SurveyElement +from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement from pyxform.utils import ( PYXFORM_REFERENCE_REGEX, DetachableElement, @@ -27,15 +30,115 @@ from pyxform.survey import Survey +QUESTION_EXTRA_FIELDS = ( + "_itemset_dyn_label", + "_itemset_has_media", + "_itemset_multi_language", + "_qtd_defaults", + "_qtd_kwargs", + "action", + "default", + "guidance_hint", + "instance", + "query", + "sms_field", + "trigger", + constants.BIND, + constants.CHOICE_FILTER, + constants.COMPACT_TAG, # used for compact (sms) representation + constants.CONTROL, + constants.HINT, + constants.MEDIA, + constants.PARAMETERS, + constants.TYPE, +) +QUESTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *QUESTION_EXTRA_FIELDS) + +SELECT_QUESTION_EXTRA_FIELDS = ( + constants.CHILDREN, + constants.ITEMSET, + constants.LIST_NAME_U, +) +SELECT_QUESTION_FIELDS = (*QUESTION_FIELDS, *SELECT_QUESTION_EXTRA_FIELDS) + +OSM_QUESTION_EXTRA_FIELDS = (constants.CHILDREN,) +OSM_QUESTION_FIELDS = (*QUESTION_FIELDS, *SELECT_QUESTION_EXTRA_FIELDS) + +OPTION_EXTRA_FIELDS = ( + "_choice_itext_id", + constants.MEDIA, + "sms_option", +) +OPTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *OPTION_EXTRA_FIELDS) + +TAG_EXTRA_FIELDS = (constants.CHILDREN,) +TAG_FIELDS = (*SURVEY_ELEMENT_FIELDS, *TAG_EXTRA_FIELDS) + + class Question(SurveyElement): - FIELDS = SurveyElement.FIELDS.copy() - FIELDS.update( - { - "_itemset_multi_language": bool, - "_itemset_has_media": bool, - "_itemset_dyn_label": bool, - } - ) + __slots__ = QUESTION_EXTRA_FIELDS + + @staticmethod + def get_slot_names() -> tuple[str, ...]: + return QUESTION_FIELDS + + def __init__(self, fields: tuple[str, ...] | None = None, **kwargs): + # Internals + self._qtd_defaults: dict | None = None + self._qtd_kwargs: dict | None = None + + # Structure + self.action: dict[str, str] | None = None + self.bind: dict | None = None + self.control: dict | None = None + self.instance: dict | None = None + self.media: dict | None = None + self.type: str | None = None + + # Common / template settings + self.choice_filter: str | None = None + self.default: str | None = None + self.guidance_hint: str | dict | None = None + self.hint: str | dict | None = None + # constraint_message, required_message are placed in bind dict. + self.parameters: dict | None = None + self.query: str | None = None + self.trigger: str | None = None + + # SMS / compact settings + self.compact_tag: str | None = None + self.sms_field: str | None = None + + qtd = kwargs.pop("question_type_dictionary", QUESTION_TYPE_DICT) + type_arg = kwargs.get("type") + default_type = qtd.get(type_arg) + if default_type is None: + raise PyXFormError(f"Unknown question type '{type_arg}'.") + + # Keeping original qtd_kwargs is only needed if output of QTD data is not + # acceptable in to_json_dict() i.e. to exclude default bind/control values. + self._qtd_defaults = qtd.get(type_arg) + qtd_kwargs = None + for k, v in self._qtd_defaults.items(): + if isinstance(v, dict): + template = v.copy() + if k in kwargs: + template.update(kwargs[k]) + if qtd_kwargs is None: + qtd_kwargs = {} + qtd_kwargs[k] = kwargs[k] + kwargs[k] = template + elif k not in kwargs: + kwargs[k] = v + + if qtd_kwargs: + self._qtd_kwargs = qtd_kwargs + + if fields is None: + fields = QUESTION_EXTRA_FIELDS + else: + fields = chain(QUESTION_EXTRA_FIELDS, fields) + super().__init__(fields=fields, **kwargs) def validate(self): SurveyElement.validate(self) @@ -46,10 +149,12 @@ def validate(self): raise PyXFormError(f"Unknown question type '{self.type}'.") def xml_instance(self, survey: "Survey", **kwargs): - attributes = {} - attributes.update(self.get("instance", {})) - for key, value in attributes.items(): - attributes[key] = survey.insert_xpaths(value, self) + attributes = self.get("instance") + if attributes is None: + attributes = {} + else: + for key, value in attributes.items(): + attributes[key] = survey.insert_xpaths(value, self) if self.get("default") and not default_is_dynamic(self.default, self.type): return node(self.name, str(self.get("default")), **attributes) @@ -57,7 +162,15 @@ def xml_instance(self, survey: "Survey", **kwargs): def xml_control(self, survey: "Survey"): if self.type == "calculate" or ( - ("calculate" in self.bind or self.trigger) and not (self.label or self.hint) + ( + ( + hasattr(self, "bind") + and self.bind is not None + and "calculate" in self.bind + ) + or self.trigger + ) + and not (self.label or self.hint) ): nested_setvalues = survey.get_trigger_values_for_question_name( self.name, "setvalue" @@ -93,6 +206,19 @@ def xml_control(self, survey: "Survey"): return xml_node + def xml_action(self): + """ + Return the action for this survey element. + """ + if self.action: + return node( + self.action["name"], + ref=self.get_xpath(), + **{k: v for k, v in self.action.items() if k != "name"}, + ) + + return None + def nest_set_nodes(self, survey, xml_node, tag, nested_items): for item in nested_items: node_attrs = { @@ -107,6 +233,19 @@ def nest_set_nodes(self, survey, xml_node, tag, nested_items): def build_xml(self, survey: "Survey") -> DetachableElement | None: return None + def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: + to_delete = (k for k in self.get_slot_names() if k.startswith("_")) + if self._qtd_defaults: + to_delete = chain(to_delete, self._qtd_defaults) + if delete_keys is not None: + to_delete = chain(to_delete, delete_keys) + result = super().to_json_dict(delete_keys=to_delete) + if self._qtd_kwargs: + for k, v in self._qtd_kwargs.items(): + if v: + result[k] = v + return result + class InputQuestion(Question): """ @@ -125,15 +264,17 @@ def build_xml(self, survey: "Survey"): result = node(**control_dict) if label_and_hint: for element in self.xml_label_and_hint(survey=survey): - result.appendChild(element) + if element: + result.appendChild(element) # Input types are used for selects with external choices sheets. if self["query"]: - choice_filter = self.get("choice_filter") - query = "instance('" + self["query"] + "')/root/item" - choice_filter = survey.insert_xpaths(choice_filter, self, True) - if choice_filter: - query += "[" + choice_filter + "]" + choice_filter = self.get(constants.CHOICE_FILTER) + if choice_filter is not None: + pred = survey.insert_xpaths(choice_filter, self, True) + query = f"""instance('{self["query"]}')/root/item[{pred}]""" + else: + query = f"""instance('{self["query"]}')/root/item""" result.setAttribute("query", query) return result @@ -163,6 +304,26 @@ def build_xml(self, survey: "Survey"): class Option(SurveyElement): + __slots__ = OPTION_EXTRA_FIELDS + + @staticmethod + def get_slot_names() -> tuple[str, ...]: + return OPTION_FIELDS + + def __init__( + self, + name: str, + label: str | dict | None = None, + media: dict | None = None, + sms_option: str | None = None, + **kwargs, + ): + self._choice_itext_id: str | None = None + self.media: dict | None = media + self.sms_option: str | None = sms_option + + super().__init__(name=name, label=label, **kwargs) + def xml_value(self): return node("value", self.name) @@ -180,65 +341,85 @@ def xml_control(self, survey: "Survey"): raise NotImplementedError() def _translation_path(self, display_element): - choice_itext_id = self.get("_choice_itext_id") - if choice_itext_id is not None: - return choice_itext_id + if self._choice_itext_id is not None: + return self._choice_itext_id return super()._translation_path(display_element=display_element) + def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: + to_delete = (k for k in self.get_slot_names() if k.startswith("_")) + if delete_keys is not None: + to_delete = chain(to_delete, delete_keys) + return super().to_json_dict(delete_keys=to_delete) + class MultipleChoiceQuestion(Question): - def __init__(self, **kwargs): + __slots__ = SELECT_QUESTION_EXTRA_FIELDS + + @staticmethod + def get_slot_names() -> tuple[str, ...]: + return SELECT_QUESTION_FIELDS + + def __init__( + self, itemset: str | None = None, list_name: str | None = None, **kwargs + ): + # Internals + self._itemset_dyn_label: bool = False + self._itemset_has_media: bool = False + self._itemset_multi_language: bool = False + + # Structure + self.children: tuple[Option, ...] | None = None + self.itemset: str | None = itemset + self.list_name: str | None = list_name + # Notice that choices can be specified under choices or children. # I'm going to try to stick to just choices. # Aliases in the json format will make it more difficult # to use going forward. - kwargs["children"] = [ - Option(**c) - for c in combine_lists( - a=kwargs.pop("choices", None), b=kwargs.pop("children", None) + choices = combine_lists( + a=kwargs.pop(constants.CHOICES, None), b=kwargs.pop(constants.CHILDREN, None) + ) + if choices: + self.children = tuple( + c if isinstance(c, Option) else Option(**c) for c in choices ) - ] super().__init__(**kwargs) def validate(self): Question.validate(self) - descendants = self.iter_descendants() - next(descendants) # iter_descendants includes self; we need to pop it - - for choice in descendants: - choice.validate() + if self.children: + for child in self.children: + child.validate() def build_xml(self, survey: "Survey"): - if self.bind["type"] not in ["string", "odk:rank"]: + if self.bind["type"] not in {"string", "odk:rank"}: raise PyXFormError("""Invalid value for `self.bind["type"]`.""") - control_dict = self.control.copy() + # Resolve field references in attributes - for key, value in control_dict.items(): - control_dict[key] = survey.insert_xpaths(value, self) + control_dict = { + key: survey.insert_xpaths(value, self) for key, value in self.control.items() + } control_dict["ref"] = self.get_xpath() result = node(**control_dict) for element in self.xml_label_and_hint(survey=survey): - result.appendChild(element) + if element: + result.appendChild(element) # itemset are only supposed to be strings, # check to prevent the rare dicts that show up if self["itemset"] and isinstance(self["itemset"], str): - choice_filter = self.get("choice_filter") - itemset, file_extension = os.path.splitext(self["itemset"]) - itemset_value_ref = self.parameters.get( - "value", - EXTERNAL_CHOICES_ITEMSET_REF_VALUE_GEOJSON - if file_extension == ".geojson" - else EXTERNAL_CHOICES_ITEMSET_REF_VALUE, - ) - itemset_label_ref = self.parameters.get( - "label", - EXTERNAL_CHOICES_ITEMSET_REF_LABEL_GEOJSON - if file_extension == ".geojson" - else EXTERNAL_CHOICES_ITEMSET_REF_LABEL, - ) + + if file_extension == ".geojson": + itemset_value_ref = EXTERNAL_CHOICES_ITEMSET_REF_VALUE_GEOJSON + itemset_label_ref = EXTERNAL_CHOICES_ITEMSET_REF_LABEL_GEOJSON + else: + itemset_value_ref = EXTERNAL_CHOICES_ITEMSET_REF_VALUE + itemset_label_ref = EXTERNAL_CHOICES_ITEMSET_REF_LABEL + if hasattr(self, "parameters") and self.parameters is not None: + itemset_value_ref = self.parameters.get("value", itemset_value_ref) + itemset_label_ref = self.parameters.get("label", itemset_label_ref) multi_language = self.get("_itemset_multi_language", False) has_media = self.get("_itemset_has_media", False) @@ -255,9 +436,11 @@ def build_xml(self, survey: "Survey"): itemset = self["itemset"] itemset_label_ref = "jr:itext(itextId)" - choice_filter = survey.insert_xpaths( - choice_filter, self, True, is_previous_question - ) + choice_filter = self.get(constants.CHOICE_FILTER) + if choice_filter is not None: + choice_filter = survey.insert_xpaths( + choice_filter, self, True, is_previous_question + ) if is_previous_question: path = ( survey.insert_xpaths(self["itemset"], self, reference_parent=True) @@ -277,10 +460,10 @@ def build_xml(self, survey: "Survey"): name = path[-1] choice_filter = f"./{name} != ''" else: - nodeset = "instance('" + itemset + "')/root/item" + nodeset = f"instance('{itemset}')/root/item" if choice_filter: - nodeset += "[" + choice_filter + "]" + nodeset += f"[{choice_filter}]" if self["parameters"]: params = self["parameters"] @@ -305,34 +488,38 @@ def build_xml(self, survey: "Survey"): node("label", ref=itemset_label_ref), ] result.appendChild(node("itemset", *itemset_children, nodeset=nodeset)) - else: + elif self.children: for child in self.children: result.appendChild(child.xml(survey=survey)) return result -class SelectOneQuestion(MultipleChoiceQuestion): - def __init__(self, **kwargs): - self._dict[self.TYPE] = "select one" - super().__init__(**kwargs) +class Tag(SurveyElement): + __slots__ = TAG_EXTRA_FIELDS + @staticmethod + def get_slot_names() -> tuple[str, ...]: + return TAG_FIELDS -class Tag(SurveyElement): - def __init__(self, **kwargs): - kwargs["children"] = [ - Option(**c) - for c in combine_lists( - a=kwargs.pop("choices", None), b=kwargs.pop("children", None) + def __init__(self, name: str, label: str | dict | None = None, **kwargs): + self.children: tuple[Option, ...] | None = None + + choices = combine_lists( + a=kwargs.pop(constants.CHOICES, None), b=kwargs.pop(constants.CHILDREN, None) + ) + if choices: + self.children = tuple( + c if isinstance(c, Option) else Option(**c) for c in choices ) - ] - super().__init__(**kwargs) + super().__init__(name=name, label=label, **kwargs) def xml(self, survey: "Survey"): result = node("tag", key=self.name) result.appendChild(self.xml_label(survey=survey)) - for choice in self.children: - result.appendChild(choice.xml(survey=survey)) + if self.children: + for choice in self.children: + result.appendChild(choice.xml(survey=survey)) return result @@ -344,13 +531,21 @@ def xml_control(self, survey: "Survey"): class OsmUploadQuestion(UploadQuestion): + __slots__ = OSM_QUESTION_EXTRA_FIELDS + + @staticmethod + def get_slot_names() -> tuple[str, ...]: + return OSM_QUESTION_FIELDS + def __init__(self, **kwargs): - kwargs["children"] = [ - Tag(**c) - for c in combine_lists( - a=kwargs.pop("tags", None), b=kwargs.pop("children", None) - ) - ] + self.children: tuple[Option, ...] | None = None + + choices = combine_lists( + a=kwargs.pop("tags", None), b=kwargs.pop(constants.CHILDREN, None) + ) + if choices: + self.children = tuple(Tag(**c) for c in choices) + super().__init__(**kwargs) def build_xml(self, survey: "Survey"): @@ -359,8 +554,9 @@ def build_xml(self, survey: "Survey"): control_dict["mediatype"] = self._get_media_type() result = node("upload", *self.xml_label_and_hint(survey=survey), **control_dict) - for osm_tag in self.children: - result.appendChild(osm_tag.xml(survey=survey)) + if self.children: + for osm_tag in self.children: + result.appendChild(osm_tag.xml(survey=survey)) return result diff --git a/pyxform/section.py b/pyxform/section.py index b91fea59..2111980c 100644 --- a/pyxform/section.py +++ b/pyxform/section.py @@ -2,19 +2,76 @@ Section survey element module. """ -from collections.abc import Generator +from collections.abc import Generator, Iterable +from itertools import chain from typing import TYPE_CHECKING +from pyxform import constants from pyxform.errors import PyXFormError from pyxform.external_instance import ExternalInstance -from pyxform.survey_element import SurveyElement +from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement from pyxform.utils import DetachableElement, node if TYPE_CHECKING: + from pyxform.question import Question from pyxform.survey import Survey +SECTION_EXTRA_FIELDS = ( + constants.BIND, + constants.CHILDREN, + constants.CONTROL, + constants.HINT, + constants.MEDIA, + constants.TYPE, + "instance", + "flat", + "sms_field", +) +SECTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *SECTION_EXTRA_FIELDS) + + class Section(SurveyElement): + __slots__ = SECTION_EXTRA_FIELDS + + @staticmethod + def get_slot_names() -> tuple[str, ...]: + return SECTION_FIELDS + + def __init__( + self, + name: str, + type: str, + label: str | dict | None = None, + hint: str | dict | None = None, + bind: dict | None = None, + control: dict | None = None, + instance: dict | None = None, + media: dict | None = None, + flat: bool | None = None, + sms_field: str | None = None, + fields: tuple[str, ...] | None = None, + **kwargs, + ): + # Structure + self.bind: dict | None = bind + self.children: list[Section | Question] | None = None + self.control: dict | None = control + # instance is for custom instance attrs from survey e.g. instance::abc:xyz + self.instance: dict | None = instance + # TODO: is media valid for groups? No tests for it, but it behaves like Questions. + self.media: dict | None = media + self.type: str | None = type + + # Group settings are generally put in bind/control dicts. + self.hint: str | dict | None = hint + self.flat: bool | None = flat + self.sms_field: str | None = sms_field + + # Recursively creating child objects currently handled by the builder module. + kwargs.pop(constants.CHILDREN, None) + super().__init__(name=name, label=label, fields=fields, **kwargs) + def validate(self): super().validate() for element in self.children: @@ -42,7 +99,8 @@ def xml_instance(self, survey: "Survey", **kwargs): attributes = {} attributes.update(kwargs) - attributes.update(self.get("instance", {})) + if self.instance: + attributes.update(self.instance) # Resolve field references in attributes for key, value in attributes.items(): attributes[key] = survey.insert_xpaths(value, self) @@ -50,7 +108,7 @@ def xml_instance(self, survey: "Survey", **kwargs): for child in self.children: repeating_template = None - if child.get("flat"): + if hasattr(child, "flat") and child.get("flat"): for grandchild in child.xml_instance_array(survey=survey): result.appendChild(grandchild) elif isinstance(child, ExternalInstance): @@ -82,7 +140,7 @@ def xml_instance_array(self, survey: "Survey"): This method is used for generating flat instances. """ for child in self.children: - if child.get("flat"): + if hasattr(child, "flat") and child.get("flat"): yield from child.xml_instance_array(survey=survey) else: yield child.xml_instance(survey=survey) @@ -113,11 +171,15 @@ def xml_control(self, survey: "Survey"): """ - control_dict = self.control.copy() # Resolve field references in attributes - for key, value in control_dict.items(): - control_dict[key] = survey.insert_xpaths(value, self) - repeat_node = node("repeat", nodeset=self.get_xpath(), **control_dict) + if self.control: + control_dict = { + key: survey.insert_xpaths(value, self) + for key, value in self.control.items() + } + repeat_node = node("repeat", nodeset=self.get_xpath(), **control_dict) + else: + repeat_node = node("repeat", nodeset=self.get_xpath()) for n in Section.xml_control(self, survey=survey): repeat_node.appendChild(n) @@ -128,7 +190,10 @@ def xml_control(self, survey: "Survey"): label = self.xml_label(survey=survey) if label: return node("group", label, repeat_node, ref=self.get_xpath()) - return node("group", repeat_node, ref=self.get_xpath(), **self.control) + if self.control: + return node("group", repeat_node, ref=self.get_xpath(), **self.control) + else: + return node("group", repeat_node, ref=self.get_xpath()) # Get setvalue nodes for all descendants of this repeat that have dynamic defaults and aren't nested in other repeats. def _dynamic_defaults_helper( @@ -165,38 +230,38 @@ class GroupedSection(Section): # super(GroupedSection, self).__init__(kwargs) def xml_control(self, survey: "Survey"): - control_dict = self.control - - if control_dict.get("bodyless"): + if self.control and self.control.get("bodyless"): return None children = [] - attributes = {} - attributes.update(self.control) # Resolve field references in attributes - for key, value in attributes.items(): - attributes[key] = survey.insert_xpaths(value, self) + if self.control: + attributes = { + key: survey.insert_xpaths(value, self) + for key, value in self.control.items() + } + if "appearance" in self.control: + attributes["appearance"] = self.control["appearance"] + else: + attributes = {} if not self.get("flat"): attributes["ref"] = self.get_xpath() - if "label" in self and len(self["label"]) > 0: + if "label" in self and self.label is not None and len(self["label"]) > 0: children.append(self.xml_label(survey=survey)) for n in Section.xml_control(self, survey=survey): children.append(n) - if "appearance" in control_dict: - attributes["appearance"] = control_dict["appearance"] - - if "intent" in control_dict: - attributes["intent"] = survey.insert_xpaths(control_dict["intent"], self) - return node("group", *children, **attributes) - def to_json_dict(self): + def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: + to_delete = (constants.BIND,) + if delete_keys is not None: + to_delete = chain(to_delete, delete_keys) + result = super().to_json_dict(delete_keys=to_delete) # This is quite hacky, might want to think about a smart way # to approach this problem. - result = super().to_json_dict() result["type"] = "group" return result diff --git a/pyxform/survey.py b/pyxform/survey.py index ddcc5d28..a45ca14c 100644 --- a/pyxform/survey.py +++ b/pyxform/survey.py @@ -7,20 +7,22 @@ import tempfile import xml.etree.ElementTree as ETree from collections import defaultdict -from collections.abc import Generator +from collections.abc import Generator, Iterable from datetime import datetime from functools import lru_cache +from itertools import chain from pathlib import Path from pyxform import aliases, constants from pyxform.constants import EXTERNAL_INSTANCE_EXTENSIONS, NSMAP +from pyxform.entities.entity_declaration import EntityDeclaration from pyxform.errors import PyXFormError, ValidationError from pyxform.external_instance import ExternalInstance from pyxform.instance import SurveyInstance from pyxform.parsing import instance_expression -from pyxform.question import Option, Question -from pyxform.section import Section -from pyxform.survey_element import SurveyElement +from pyxform.question import MultipleChoiceQuestion, Option, Question, Tag +from pyxform.section import SECTION_EXTRA_FIELDS, Section +from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement from pyxform.utils import ( BRACKETED_TAG_REGEX, LAST_SAVED_INSTANCE_NAME, @@ -74,7 +76,7 @@ def register_nsmap(): register_nsmap() -@lru_cache(maxsize=65536) # 2^16 +@lru_cache(maxsize=128) def is_parent_a_repeat(survey, xpath): """ Returns the XPATH of the first repeat of the given xpath in the survey, @@ -84,14 +86,14 @@ def is_parent_a_repeat(survey, xpath): if not parent_xpath: return False - for item in survey.iter_descendants(): + for item in survey.iter_descendants(condition=lambda i: isinstance(i, Section)): if item.type == constants.REPEAT and item.get_xpath() == parent_xpath: return parent_xpath return is_parent_a_repeat(survey, parent_xpath) -@lru_cache(maxsize=65536) # 2^16 +@lru_cache(maxsize=128) def share_same_repeat_parent(survey, xpath, context_xpath, reference_parent=False): """ Returns a tuple of the number of steps from the context xpath to the shared @@ -169,7 +171,7 @@ def _get_steps_and_target_xpath(context_parent, xpath_parent, include_parent=Fal return (None, None) -@lru_cache(maxsize=65536) # 2^16 +@lru_cache(maxsize=128) def is_label_dynamic(label: str) -> bool: return ( label is not None @@ -182,44 +184,117 @@ def recursive_dict(): return defaultdict(recursive_dict) +SURVEY_EXTRA_FIELDS = ( + "_created", + "_search_lists", + "_translations", + "_xpath", + "add_none_option", + "clean_text_values", + "attribute", + "auto_delete", + "auto_send", + "choices", + "default_language", + "file_name", + "id_string", + "instance_name", + "instance_xmlns", + "namespaces", + "omit_instanceID", + "public_key", + "setgeopoint_by_triggering_ref", + "setvalues_by_triggering_ref", + "sms_allow_media", + "sms_date_format", + "sms_datetime_format", + "sms_keyword", + "sms_response", + "sms_separator", + "style", + "submission_url", + "title", + "version", + constants.ALLOW_CHOICE_DUPLICATES, + constants.COMPACT_DELIMITER, + constants.COMPACT_PREFIX, + constants.ENTITY_FEATURES, +) +SURVEY_FIELDS = (*SURVEY_ELEMENT_FIELDS, *SECTION_EXTRA_FIELDS, *SURVEY_EXTRA_FIELDS) + + class Survey(Section): """ Survey class - represents the full XForm XML. """ - FIELDS = Section.FIELDS.copy() - FIELDS.update( - { - "_xpath": dict, - "_created": datetime.now, # This can't be dumped to json - "setvalues_by_triggering_ref": dict, - "setgeopoint_by_triggering_ref": dict, - "title": str, - "id_string": str, - "sms_keyword": str, - "sms_separator": str, - "sms_allow_media": bool, - "sms_date_format": str, - "sms_datetime_format": str, - "sms_response": str, - constants.COMPACT_PREFIX: str, - constants.COMPACT_DELIMITER: str, - "file_name": str, - "default_language": str, - "_translations": recursive_dict, - "submission_url": str, - "auto_send": str, - "auto_delete": str, - "public_key": str, - "instance_xmlns": str, - "version": str, - "choices": dict, - "style": str, - "attribute": dict, - "namespaces": str, - constants.ENTITY_FEATURES: list, - } - ) + __slots__ = SURVEY_EXTRA_FIELDS + + @staticmethod + def get_slot_names() -> tuple[str, ...]: + return SURVEY_FIELDS + + def __init__(self, **kwargs): + # Internals + self._created: datetime.now = datetime.now() + self._search_lists: set = set() + self._translations: recursive_dict = recursive_dict() + self._xpath: dict[str, SurveyElement | None] = {} + + # Structure + # attribute is for custom instance attrs from settings e.g. attribute::abc:xyz + self.attribute: dict | None = None + self.choices: dict[str, tuple[Option, ...]] | None = None + self.entity_features: list[str] | None = None + self.setgeopoint_by_triggering_ref: dict[str, list[str]] = {} + self.setvalues_by_triggering_ref: dict[str, list[str]] = {} + + # Common / template settings + self.default_language: str = "" + self.id_string: str = "" + self.instance_name: str = "" + self.style: str | None = None + self.title: str = "" + self.version: str = "" + + # Other settings + self.add_none_option: bool = False + self.allow_choice_duplicates: bool = False + self.auto_delete: str | None = None + self.auto_send: str | None = None + self.clean_text_values: bool = False + self.instance_xmlns: str | None = None + self.namespaces: str | None = None + self.omit_instanceID: bool = False + self.public_key: str | None = None + self.submission_url: str | None = None + + # SMS / compact settings + self.delimiter: str | None = None + self.prefix: str | None = None + self.sms_allow_media: bool | None = None + self.sms_date_format: str | None = None + self.sms_datetime_format: str | None = None + self.sms_keyword: str | None = None + self.sms_response: str | None = None + self.sms_separator: str | None = None + + choices = kwargs.pop("choices", None) + if choices is not None: + self.choices = { + list_name: tuple( + c if isinstance(c, Option) else Option(**c) for c in values + ) + for list_name, values in choices.items() + } + kwargs[constants.TYPE] = constants.SURVEY + super().__init__(fields=SURVEY_EXTRA_FIELDS, **kwargs) + + def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: + to_delete = (k for k in self.get_slot_names() if k.startswith("_")) + if delete_keys is not None: + to_delete = chain(to_delete, delete_keys) + return super().to_json_dict(delete_keys=to_delete) def validate(self): if self.id_string in [None, "None"]: @@ -247,14 +322,17 @@ def _validate_uniqueness_of_section_names(self): def get_nsmap(self): """Add additional namespaces""" - namespaces = getattr(self, constants.NAMESPACES, "") - if len(getattr(self, constants.ENTITY_FEATURES, [])) > 0: - namespaces += " entities=http://www.opendatakit.org/xforms/entities" + if self.entity_features: + entities_ns = " entities=http://www.opendatakit.org/xforms/entities" + if self.namespaces is None: + self.namespaces = entities_ns + else: + self.namespaces += entities_ns - if namespaces and isinstance(namespaces, str): + if self.namespaces: nslist = [ ns.split("=") - for ns in namespaces.split() + for ns in self.namespaces.split() if len(ns.split("=")) == 2 and ns.split("=")[0] != "" ] xmlns = "xmlns:" @@ -287,8 +365,8 @@ def xml(self): self.insert_xpaths(triggering_reference, self) body_kwargs = {} - if hasattr(self, constants.STYLE) and getattr(self, constants.STYLE): - body_kwargs["class"] = getattr(self, constants.STYLE) + if self.style: + body_kwargs["class"] = self.style nsmap = self.get_nsmap() return node( @@ -298,13 +376,11 @@ def xml(self): **nsmap, ) - def get_trigger_values_for_question_name(self, question_name, trigger_type): - trigger_map = { - "setvalue": self.setvalues_by_triggering_ref, - "setgeopoint": self.setgeopoint_by_triggering_ref, - } - - return trigger_map.get(trigger_type, {}).get(f"${{{question_name}}}") + def get_trigger_values_for_question_name(self, question_name: str, trigger_type: str): + if trigger_type == "setvalue": + return self.setvalues_by_triggering_ref.get(f"${{{question_name}}}") + elif trigger_type == "setgeopoint": + return self.setgeopoint_by_triggering_ref.get(f"${{{question_name}}}") def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo: """ @@ -315,34 +391,42 @@ def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo: has_dyn_label = has_dynamic_label(choice_list) multi_language = False if isinstance(self._translations, dict): - choices = tuple( - k + choices = ( + True for items in self._translations.values() for k, v in items.items() if v.get(constants.TYPE, "") == constants.CHOICE and "-".join(k.split("-")[:-1]) == list_name ) - if 0 < len(choices): - multi_language = True + try: + if next(choices): + multi_language = True + except StopIteration: + pass for idx, choice in enumerate(choice_list): choice_element_list = [] # Add a unique id to the choice element in case there are itext references if multi_language or has_media or has_dyn_label: - itext_id = "-".join([list_name, str(idx)]) + itext_id = f"{list_name}-{idx}" choice_element_list.append(node("itextId", itext_id)) for name, value in choice.items(): - if isinstance(value, str) and name != "label": - choice_element_list.append(node(name, str(value))) - if ( + if not value: + continue + elif name != "label" and isinstance(value, str): + choice_element_list.append(node(name, value)) + elif name == "extra_data" and isinstance(value, dict): + for k, v in value.items(): + choice_element_list.append(node(k, v)) + elif ( not multi_language and not has_media and not has_dyn_label and isinstance(value, str) and name == "label" ): - choice_element_list.append(node(name, str(value))) + choice_element_list.append(node(name, value)) instance_element_list.append(node("item", *choice_element_list)) @@ -355,7 +439,7 @@ def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo: ) @staticmethod - def _generate_external_instances(element) -> InstanceInfo | None: + def _generate_external_instances(element: SurveyElement) -> InstanceInfo | None: if isinstance(element, ExternalInstance): name = element["name"] extension = element["type"].split("-")[0] @@ -401,7 +485,7 @@ def _validate_external_instances(instances) -> None: raise ValidationError("\n".join(errors)) @staticmethod - def _generate_pulldata_instances(element) -> list[InstanceInfo] | None: + def _generate_pulldata_instances(element: SurveyElement) -> list[InstanceInfo] | None: def get_pulldata_functions(element): """ Returns a list of different pulldata(... function strings if @@ -412,11 +496,23 @@ def get_pulldata_functions(element): """ functions_present = [] for formula_name in constants.EXTERNAL_INSTANCES: - if "pulldata(" in str(element["bind"].get(formula_name)): + if ( + hasattr(element, "bind") + and element.bind is not None + and "pulldata(" in str(element["bind"].get(formula_name)) + ): functions_present.append(element["bind"][formula_name]) - if "pulldata(" in str(element["choice_filter"]): - functions_present.append(element["choice_filter"]) - if "pulldata(" in str(element["default"]): + if ( + hasattr(element, constants.CHOICE_FILTER) + and element.choice_filter is not None + and "pulldata(" in str(element[constants.CHOICE_FILTER]) + ): + functions_present.append(element[constants.CHOICE_FILTER]) + if ( + hasattr(element, "default") + and element.default is not None + and "pulldata(" in str(element["default"]) + ): functions_present.append(element["default"]) return functions_present @@ -434,6 +530,8 @@ def get_instance_info(element, file_id): instance=node("instance", id=file_id, src=uri), ) + if isinstance(element, Option | ExternalInstance | Tag | Survey): + return None pulldata_usages = get_pulldata_functions(element) if len(pulldata_usages) > 0: pulldata_instances = [] @@ -451,7 +549,9 @@ def get_instance_info(element, file_id): return None @staticmethod - def _generate_from_file_instances(element) -> InstanceInfo | None: + def _generate_from_file_instances(element: SurveyElement) -> InstanceInfo | None: + if not isinstance(element, MultipleChoiceQuestion) or element.itemset is None: + return None itemset = element.get("itemset") file_id, ext = os.path.splitext(itemset) if itemset and ext in EXTERNAL_INSTANCE_EXTENSIONS: @@ -474,16 +574,21 @@ def _generate_last_saved_instance(element) -> bool: """ True if a last-saved instance should be generated, false otherwise. """ + if not hasattr(element, "bind") or element.bind is None: + return False for expression_type in constants.EXTERNAL_INSTANCES: last_saved_expression = re.search( LAST_SAVED_REGEX, str(element["bind"].get(expression_type)) ) if last_saved_expression: return True - return bool( - re.search(LAST_SAVED_REGEX, str(element["choice_filter"])) - or re.search(LAST_SAVED_REGEX, str(element["default"])) + hasattr(element, constants.CHOICE_FILTER) + and element.choice_filter is not None + and re.search(LAST_SAVED_REGEX, str(element.choice_filter)) + or hasattr(element, "default") + and element.default is not None + and re.search(LAST_SAVED_REGEX, str(element.default)) ) @staticmethod @@ -550,10 +655,12 @@ def _generate_instances(self) -> Generator[DetachableElement, None, None]: instances += [self._get_last_saved_instance()] # Append last so the choice instance is excluded on a name clash. - for name, value in self.choices.items(): - instances += [ - self._generate_static_instances(list_name=name, choice_list=value) - ] + if self.choices: + for name, value in self.choices.items(): + if name not in self._search_lists: + instances += [ + self._generate_static_instances(list_name=name, choice_list=value) + ] # Check that external instances have unique names. if instances: @@ -580,14 +687,14 @@ def _generate_instances(self) -> Generator[DetachableElement, None, None]: yield i.instance seen[i.name] = i - def xml_descendent_bindings(self) -> Generator["DetachableElement", None, None]: + def xml_descendent_bindings(self) -> Generator[DetachableElement | None, None, None]: """ Yield bindings for this node and all its descendants. """ - for e in self.iter_descendants(): - xml_bindings = e.xml_bindings(survey=self) - if xml_bindings is not None: - yield from xml_bindings + for e in self.iter_descendants( + condition=lambda i: not isinstance(i, Option | Tag) + ): + yield from e.xml_bindings(survey=self) # dynamic defaults for repeats go in the body. All other dynamic defaults (setvalue actions) go in the model if not next( @@ -616,13 +723,12 @@ def xml_model(self): model_kwargs = {"odk:xforms-version": constants.CURRENT_XFORMS_VERSION} - entity_features = getattr(self, constants.ENTITY_FEATURES, []) - if len(entity_features) > 0: - if "offline" in entity_features: + if self.entity_features: + if "offline" in self.entity_features: model_kwargs["entities:entities-version"] = ( constants.ENTITIES_OFFLINE_VERSION ) - elif "update" in entity_features: + elif "update" in self.entity_features: model_kwargs["entities:entities-version"] = ( constants.ENTITIES_UPDATE_VERSION ) @@ -664,8 +770,9 @@ def xml_instance(self, **kwargs): result = Section.xml_instance(self, survey=self, **kwargs) # set these first to prevent overwriting id and version - for key, value in self.attribute.items(): - result.setAttribute(str(key), value) + if self.attribute: + for key, value in self.attribute.items(): + result.setAttribute(str(key), value) result.setAttribute("id", self.id_string) @@ -696,7 +803,7 @@ def _add_to_nested_dict(self, dicty, path, value): dicty[path[0]] = {} self._add_to_nested_dict(dicty[path[0]], path[1:], value) - def _redirect_is_search_itext(self, element: SurveyElement) -> bool: + def _redirect_is_search_itext(self, element: Question) -> bool: """ For selects using the "search()" function, redirect itext for in-line items. @@ -728,11 +835,12 @@ def _redirect_is_search_itext(self, element: SurveyElement) -> bool: "Remove the 'search()' usage, or change the select type." ) raise PyXFormError(msg) - itemset = element[constants.ITEMSET] - self.choices.pop(itemset, None) - element[constants.ITEMSET] = "" - for i, opt in enumerate(element.get(constants.CHILDREN, [])): - opt["_choice_itext_id"] = f"{element[constants.LIST_NAME_U]}-{i}" + if self.choices: + element.children = self.choices.get(element[constants.ITEMSET], None) + element[constants.ITEMSET] = "" + if element.children is not None: + for i, opt in enumerate(element.children): + opt["_choice_itext_id"] = f"{element[constants.LIST_NAME_U]}-{i}" return is_search def _setup_translations(self): @@ -766,15 +874,19 @@ def get_choices(): for idx, choice in enumerate(choice_list): for col_name, choice_value in choice.items(): lang_choice = None + if not choice_value: + continue if col_name == constants.MEDIA: has_media = True - if isinstance(choice_value, dict): lang_choice = choice_value - multi_language = True elif col_name == constants.LABEL: - lang_choice = {self.default_language: choice_value} - if is_label_dynamic(choice_value): - dyn_label = True + if isinstance(choice_value, dict): + lang_choice = choice_value + multi_language = True + else: + lang_choice = {self.default_language: choice_value} + if is_label_dynamic(choice_value): + dyn_label = True if lang_choice is not None: # e.g. (label, {"default": "Yes"}, "consent", 0) choices.append((col_name, lang_choice, list_name, idx)) @@ -790,27 +902,33 @@ def get_choices(): c[0], c[1], f"{c[2]}-{c[3]}" ) - for path, value in get_choices(): - last_path = path.pop() - leaf_value = {last_path: value, constants.TYPE: constants.CHOICE} - self._add_to_nested_dict(self._translations, path, leaf_value) + if self.choices: + for path, value in get_choices(): + last_path = path.pop() + leaf_value = {last_path: value, constants.TYPE: constants.CHOICE} + self._add_to_nested_dict(self._translations, path, leaf_value) select_types = set(aliases.select.keys()) search_lists = set() non_search_lists = set() - for element in self.iter_descendants(): - itemset = element.get("itemset") - if itemset is not None: - element._itemset_multi_language = itemset in itemsets_multi_language - element._itemset_has_media = itemset in itemsets_has_media - element._itemset_dyn_label = itemset in itemsets_has_dyn_label - - if element[constants.TYPE] in select_types: - select_ref = (element[constants.NAME], element[constants.LIST_NAME_U]) - if self._redirect_is_search_itext(element=element): - search_lists.add(select_ref) - else: - non_search_lists.add(select_ref) + for element in self.iter_descendants( + condition=lambda i: isinstance(i, Question | Section) + ): + if isinstance(element, MultipleChoiceQuestion): + if element.itemset is not None: + element._itemset_multi_language = ( + element.itemset in itemsets_multi_language + ) + element._itemset_has_media = element.itemset in itemsets_has_media + element._itemset_dyn_label = element.itemset in itemsets_has_dyn_label + + if element.type in select_types: + select_ref = (element[constants.NAME], element[constants.LIST_NAME_U]) + if self._redirect_is_search_itext(element=element): + search_lists.add(select_ref) + self._search_lists.add(element[constants.LIST_NAME_U]) + else: + non_search_lists.add(select_ref) # Skip creation of translations for choices in selects. The creation of these # translations is done above in this function. @@ -891,7 +1009,7 @@ def _set_up_media_translations(media_dict, translation_key): media_dict = media_dict_default for media_type, possibly_localized_media in media_dict.items(): - if media_type not in SurveyElement.SUPPORTED_MEDIA: + if media_type not in constants.SUPPORTED_MEDIA_TYPES: raise PyXFormError("Media type: " + media_type + " not supported") if isinstance(possibly_localized_media, dict): @@ -921,7 +1039,9 @@ def _set_up_media_translations(media_dict, translation_key): translations_trans_key[media_type] = media for survey_element in self.iter_descendants( - condition=lambda i: not isinstance(i, Option) + condition=lambda i: not isinstance( + i, Survey | EntityDeclaration | ExternalInstance | Tag | Option + ) ): # Skip set up of media for choices in selects. Translations for their media # content should have been set up in _setup_translations, with one copy of @@ -1027,7 +1147,6 @@ def __unicode__(self): return f"" def _setup_xpath_dictionary(self): - self._xpath: dict[str, SurveyElement | None] = {} # pylint: disable=attribute-defined-outside-init for element in self.iter_descendants(lambda i: isinstance(i, Question | Section)): if element.name in self._xpath: self._xpath[element.name] = None @@ -1144,7 +1263,7 @@ def _is_return_relative_path() -> bool: raise PyXFormError(intro + " There is no survey element with this name.") if self._xpath[name] is None: raise PyXFormError( - intro + " There are multiple survey elements" " with this name." + intro + " There are multiple survey elements with this name." ) if _is_return_relative_path(): @@ -1159,7 +1278,13 @@ def _is_return_relative_path() -> bool: ) return " " + last_saved_prefix + self._xpath[name].get_xpath() + " " - def insert_xpaths(self, text, context, use_current=False, reference_parent=False): + def insert_xpaths( + self, + text: str, + context: SurveyElement, + use_current: bool = False, + reference_parent: bool = False, + ): """ Replace all instances of ${var} with the xpath to var. """ @@ -1169,6 +1294,7 @@ def _var_repl_function(matchobj): matchobj, context, use_current, reference_parent ) + # "text" may actually be a dict, e.g. for custom attributes. return re.sub(BRACKETED_TAG_REGEX, _var_repl_function, str(text)) def _var_repl_output_function(self, matchobj, context): @@ -1227,7 +1353,7 @@ def print_xform_to_file( if warnings is None: warnings = [] if not path: - path = self._print_name + ".xml" + path = self.id_string + ".xml" if pretty_print: xml = self._to_pretty_xml() else: @@ -1272,17 +1398,18 @@ def to_xml(self, validate=True, pretty_print=True, warnings=None, enketo=False): # So it must be explicitly created, opened, closed, and removed. tmp = tempfile.NamedTemporaryFile(delete=False) tmp.close() + tmp_path = Path(tmp.name) try: # this will throw an exception if the xml is not valid xml = self.print_xform_to_file( - path=tmp.name, + path=tmp_path, validate=validate, pretty_print=pretty_print, warnings=warnings, enketo=enketo, ) finally: - Path(tmp.name).unlink(missing_ok=True) + tmp_path.unlink(missing_ok=True) return xml def instantiate(self): diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py index d455729a..f72d4f74 100644 --- a/pyxform/survey_element.py +++ b/pyxform/survey_element.py @@ -4,18 +4,18 @@ import json import re -from collections.abc import Callable, Generator +from collections.abc import Callable, Generator, Iterable, Mapping from itertools import chain -from typing import TYPE_CHECKING, Any, ClassVar, Optional +from typing import TYPE_CHECKING, Optional from pyxform import aliases as alias from pyxform import constants as const from pyxform.errors import PyXFormError from pyxform.parsing.expression import is_xml_tag -from pyxform.question_type_dictionary import QUESTION_TYPE_DICT from pyxform.utils import ( BRACKETED_TAG_REGEX, INVALID_XFORM_TAG_REGEXP, + DetachableElement, default_is_dynamic, node, ) @@ -23,56 +23,21 @@ if TYPE_CHECKING: from pyxform.survey import Survey - from pyxform.utils import DetachableElement + # The following are important keys for the underlying dict that describes SurveyElement -FIELDS = { - "name": str, - const.COMPACT_TAG: str, # used for compact (sms) representation - "sms_field": str, - "sms_option": str, - "label": str, - "hint": str, - "guidance_hint": str, - "default": str, - "type": str, - "appearance": str, - "parameters": dict, - "intent": str, - "jr:count": str, - "bind": dict, - "instance": dict, - "control": dict, - "media": dict, +SURVEY_ELEMENT_FIELDS = ( + "name", + "label", # this node will also have a parent and children, like a tree! - "parent": lambda: None, - "children": list, - "itemset": str, - "choice_filter": str, - "query": str, - "autoplay": str, - "flat": lambda: False, - "action": str, - const.LIST_NAME_U: str, - "trigger": str, -} - - -def _overlay(over, under): - if isinstance(under, dict): - result = under.copy() - result.update(over) - return result - return over if over else under - - -SURVEY_ELEMENT_LOCAL_KEYS = { - "_survey_element_default_type", - "_survey_element_xpath", -} + "parent", + "extra_data", +) +SURVEY_ELEMENT_EXTRA_FIELDS = ("_survey_element_xpath",) +SURVEY_ELEMENT_SLOTS = (*SURVEY_ELEMENT_FIELDS, *SURVEY_ELEMENT_EXTRA_FIELDS) -class SurveyElement(dict): +class SurveyElement(Mapping): """ SurveyElement is the base class we'll looks for the following keys in kwargs: name, label, hint, type, bind, control, parent, @@ -80,70 +45,95 @@ class SurveyElement(dict): """ __name__ = "SurveyElement" - FIELDS: ClassVar[dict[str, Any]] = FIELDS.copy() - - def _dict_get(self, key): - """Built-in dict.get to bypass __getattr__""" - return dict.get(self, key) - - def __getattr__(self, key): - """ - Get attributes from FIELDS rather than the class. - """ - if key in self.FIELDS: - under = None - default = self._dict_get("_survey_element_default_type") - if default is not None: - under = default.get(key, None) - if not under: - return self._dict_get(key) - return _overlay(self._dict_get(key), under) - raise AttributeError(key) + __slots__ = SURVEY_ELEMENT_SLOTS def __hash__(self): return hash(id(self)) + def __getitem__(self, key): + return self.__getattribute__(key) + + @staticmethod + def get_slot_names() -> tuple[str, ...]: + """Each subclass must provide a list of slots from itself and all parents.""" + return SURVEY_ELEMENT_SLOTS + + def __len__(self): + return len(self.get_slot_names()) + + def __iter__(self): + return iter(self.get_slot_names()) + + def __setitem__(self, key, value): + self.__setattr__(key, value) + def __setattr__(self, key, value): if key == "parent": # If object graph position changes then invalidate cached. self._survey_element_xpath = None - self[key] = value - - def __init__(self, **kwargs): - self._survey_element_default_type: dict = kwargs.get( - "question_type_dict", QUESTION_TYPE_DICT - ).get(kwargs.get("type"), {}) + super().__setattr__(key, value) + + def __init__( + self, + name: str, + label: str | dict | None = None, + fields: tuple[str, ...] | None = None, + **kwargs, + ): + # Internals self._survey_element_xpath: str | None = None - for key, default in self.FIELDS.items(): - self[key] = kwargs.get(key, default()) - self._link_children() + + # Structure + self.parent: SurveyElement | None = None + self.extra_data: dict | None = None + + # Settings + self.name: str = name + self.label: str | dict | None = label + + if fields is not None: + for key in fields: + if key not in SURVEY_ELEMENT_FIELDS: + value = kwargs.pop(key, None) + if value or not hasattr(self, key): + self[key] = value + if len(kwargs) > 0: + self.extra_data = kwargs + + if hasattr(self, const.CHILDREN): + self._link_children() # Create a space label for unlabeled elements with the label # appearance tag. # This is because such elements are used to label the # options for selects in a field-list and might want blank labels for # themselves. - if self.control.get("appearance") == "label" and not self.label: - self["label"] = " " + if ( + hasattr(self, "control") + and self.control + and self.control.get("appearance") == "label" + and not self.label + ): + self.label = " " super().__init__() def _link_children(self): - for child in self.children: - child.parent = self + if self.children is not None: + for child in self.children: + child.parent = self def add_child(self, child): + if self.children is None: + self.children = [] self.children.append(child) child.parent = self def add_children(self, children): - if isinstance(children, list): + if isinstance(children, list | tuple): for child in children: self.add_child(child) else: self.add_child(children) - # Supported media types for attaching to questions - SUPPORTED_MEDIA = ("image", "big-image", "audio", "video") - def validate(self): if not is_xml_tag(self.name): invalid_char = re.search(INVALID_XFORM_TAG_REGEXP, self.name) @@ -166,12 +156,13 @@ def iter_descendants( yield self else: yield self - for e in self.children: - yield from e.iter_descendants(condition=condition) + if hasattr(self, const.CHILDREN) and self.children is not None: + for e in self.children: + yield from e.iter_descendants(condition=condition) def iter_ancestors( self, condition: Callable[["SurveyElement"], bool] | None = None - ) -> Generator[tuple["SurvekyElement", int], None, None]: + ) -> Generator[tuple["SurveyElement", int], None, None]: """ Get each self.parent with their distance from self (starting at 1). @@ -250,10 +241,10 @@ def condition(e): # The "flat" setting was added in 2013 to support ODK Tables, and results in # a data instance with no element nesting. Not sure if still needed. return isinstance(e, Survey) or ( - not isinstance(e, Survey) and not e.get("flat") + not isinstance(e, Survey) and not (hasattr(e, "flat") and e.get("flat")) ) - current_value = self._dict_get("_survey_element_xpath") + current_value = self._survey_element_xpath if current_value is None: if condition(self): self_element = (self,) @@ -268,7 +259,7 @@ def condition(e): return new_value return current_value - def _delete_keys_from_dict(self, dictionary: dict, keys: list): + def _delete_keys_from_dict(self, dictionary: dict, keys: Iterable[str]): """ Deletes a list of keys from a dictionary. Credits: https://stackoverflow.com/a/49723101 @@ -281,27 +272,33 @@ def _delete_keys_from_dict(self, dictionary: dict, keys: list): if isinstance(value, dict): self._delete_keys_from_dict(value, keys) - def to_json_dict(self): + def copy(self): + return {k: self[k] for k in self} + + def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: """ Create a dict copy of this survey element by removing inappropriate attributes and converting its children to dicts """ self.validate() result = self.copy() - to_delete = [ - "parent", - "question_type_dictionary", - "_created", - "_xpath", - *SURVEY_ELEMENT_LOCAL_KEYS, - ] + to_delete = chain(SURVEY_ELEMENT_EXTRA_FIELDS, ("extra_data",)) + if delete_keys is not None: + to_delete = chain(to_delete, delete_keys) # Delete all keys that may cause a "Circular Reference" # error while converting the result to JSON self._delete_keys_from_dict(result, to_delete) - children = result.pop("children") - result["children"] = [] - for child in children: - result["children"].append(child.to_json_dict()) + children = result.pop("children", None) + if children: + result["children"] = [ + c.to_json_dict(delete_keys=("parent",)) for c in children + ] + choices = result.pop("choices", None) + if choices: + result["choices"] = { + list_name: [o.to_json_dict(delete_keys=("parent",)) for o in options] + for list_name, options in choices.items() + } # Translation items with "output_context" have circular references. if "_translations" in result: for lang in result["_translations"].values(): @@ -386,7 +383,9 @@ def get_translations(self, default_language): } for display_element in ["label", "hint", "guidance_hint"]: - label_or_hint = self[display_element] + label_or_hint = None + if hasattr(self, display_element): + label_or_hint = self[display_element] if ( display_element == "label" @@ -400,6 +399,7 @@ def get_translations(self, default_language): # how they're defined - https://opendatakit.github.io/xforms-spec/#languages if ( display_element == "guidance_hint" + and label_or_hint is not None and not isinstance(label_or_hint, dict) and len(label_or_hint) > 0 ): @@ -409,8 +409,11 @@ def get_translations(self, default_language): if ( display_element == "hint" and not isinstance(label_or_hint, dict) + and hasattr(self, "hint") + and self.get("hint") is not None and len(label_or_hint) > 0 - and "guidance_hint" in self.keys() + and hasattr(self, "guidance_hint") + and self.get("guidance_hint") is not None and len(self["guidance_hint"]) > 0 ): label_or_hint = {default_language: label_or_hint} @@ -428,11 +431,15 @@ def get_translations(self, default_language): def needs_itext_ref(self): return isinstance(self.label, dict) or ( - isinstance(self.media, dict) and len(self.media) > 0 + hasattr(self, const.MEDIA) and isinstance(self.media, dict) and self.media ) def get_setvalue_node_for_dynamic_default(self, survey: "Survey", in_repeat=False): - if not self.default or not default_is_dynamic(self.default, self.type): + if ( + not hasattr(self, "default") + or not self.default + or not default_is_dynamic(self.default, self.type) + ): return None default_with_xpath_paths = survey.insert_xpaths(self.default, self) @@ -455,17 +462,21 @@ def xml_label(self, survey: "Survey"): # then we need to make a label with an itext ref ref = f"""jr:itext('{self._translation_path("label")}')""" return node("label", ref=ref) - else: + elif self.label: label, output_inserted = survey.insert_output_values(self.label, self) return node("label", label, toParseString=output_inserted) + else: + return node("label") def xml_hint(self, survey: "Survey"): if isinstance(self.hint, dict) or self.guidance_hint: path = self._translation_path("hint") return node("hint", ref=f"jr:itext('{path}')") - else: + elif self.hint: hint, output_inserted = survey.insert_output_values(self.hint, self) return node("hint", hint, toParseString=output_inserted) + else: + return node("hint") def xml_label_and_hint(self, survey: "Survey") -> list["DetachableElement"]: """ @@ -492,48 +503,54 @@ def xml_label_and_hint(self, survey: "Survey") -> list["DetachableElement"]: raise PyXFormError(msg) # big-image must combine with image - if "image" not in self.media and "big-image" in self.media: + if ( + self.media is not None + and "image" not in self.media + and "big-image" in self.media + ): raise PyXFormError( "To use big-image, you must also specify an image for the survey element named {self.name}." ) return result - def xml_bindings(self, survey: "Survey"): + def xml_bindings( + self, survey: "Survey" + ) -> Generator[DetachableElement | None, None, None]: """ Return the binding(s) for this survey element. """ - bind_dict = self.bind.copy() - if self.get("flat"): + if not hasattr(self, "bind") or self.get("bind") is None: + return None + if hasattr(self, "flat") and self.get("flat"): # Don't generate bind element for flat groups. return None - if bind_dict: + + bind_dict = {} + for k, v in self.bind.items(): # the expression goes in a setvalue action - if self.trigger and "calculate" in self.bind: - del bind_dict["calculate"] - - for k, v in bind_dict.items(): - # I think all the binding conversions should be happening on - # the xls2json side. - if ( - hashable(v) - and v in alias.BINDING_CONVERSIONS - and k in const.CONVERTIBLE_BIND_ATTRIBUTES - ): - v = alias.BINDING_CONVERSIONS[v] - elif k == "jr:constraintMsg" and ( - isinstance(v, dict) or re.search(BRACKETED_TAG_REGEX, v) - ): - v = f"""jr:itext('{self._translation_path("jr:constraintMsg")}')""" - elif k == "jr:requiredMsg" and ( - isinstance(v, dict) or re.search(BRACKETED_TAG_REGEX, v) - ): - v = f"""jr:itext('{self._translation_path("jr:requiredMsg")}')""" - elif k == "jr:noAppErrorString" and isinstance(v, dict): - v = f"""jr:itext('{self._translation_path("jr:noAppErrorString")}')""" - bind_dict[k] = survey.insert_xpaths(v, context=self) - return [node("bind", nodeset=self.get_xpath(), **bind_dict)] - return None + if hasattr(self, "trigger") and self.trigger and k == "calculate": + continue + # I think all the binding conversions should be happening on + # the xls2json side. + if ( + hashable(v) + and v in alias.BINDING_CONVERSIONS + and k in const.CONVERTIBLE_BIND_ATTRIBUTES + ): + v = alias.BINDING_CONVERSIONS[v] + elif k == "jr:constraintMsg" and ( + isinstance(v, dict) or re.search(BRACKETED_TAG_REGEX, v) + ): + v = f"""jr:itext('{self._translation_path("jr:constraintMsg")}')""" + elif k == "jr:requiredMsg" and ( + isinstance(v, dict) or re.search(BRACKETED_TAG_REGEX, v) + ): + v = f"""jr:itext('{self._translation_path("jr:requiredMsg")}')""" + elif k == "jr:noAppErrorString" and isinstance(v, dict): + v = f"""jr:itext('{self._translation_path("jr:noAppErrorString")}')""" + bind_dict[k] = survey.insert_xpaths(v, context=self) + yield node("bind", nodeset=self.get_xpath(), **bind_dict) def xml_control(self, survey: "Survey"): """ @@ -542,16 +559,6 @@ def xml_control(self, survey: "Survey"): """ raise NotImplementedError("Control not implemented") - def xml_action(self): - """ - Return the action for this survey element. - """ - if self.action: - action_dict = self.action.copy() - return node(action_dict.pop("name"), ref=self.get_xpath(), **action_dict) - - return None - def hashable(v): """Determine whether `v` can be hashed.""" diff --git a/pyxform/utils.py b/pyxform/utils.py index dcf4414b..66eb771b 100644 --- a/pyxform/utils.py +++ b/pyxform/utils.py @@ -6,8 +6,9 @@ import csv import json import re -from collections.abc import Generator +from collections.abc import Generator, Iterable from io import StringIO +from itertools import chain from json.decoder import JSONDecodeError from typing import Any from xml.dom import Node @@ -116,9 +117,7 @@ def node(*args, **kwargs) -> DetachableElement: # Move node's children to the result Element # discarding node's root for child in parsed_node.childNodes: - # No tests seem to involve moving elements with children. - # Deep clone used anyway in case of unknown edge cases. - result.appendChild(child.cloneNode(deep=True)) + result.appendChild(child.cloneNode(deep=False)) else: result.setAttribute(k, v) @@ -313,16 +312,16 @@ def coalesce(*args): def combine_lists( - a: list[dict] | None = None, - b: list[dict] | None = None, -) -> list[dict]: + a: Iterable | None = None, + b: Iterable | None = None, +) -> Iterable | None: """Get the list that is not None, or both lists combined, or an empty list.""" - if a is None and b is None: - combined = [] + if b is None: + if a is None: + return None + else: + return a elif a is None: - combined = b - elif b is None: - combined = a + return b else: - combined = a + b - return combined + return chain(a, b) diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py index 90699c6e..af281ef9 100644 --- a/pyxform/xls2json.py +++ b/pyxform/xls2json.py @@ -1132,13 +1132,57 @@ def workbook_to_json( specify_other_question = None if parse_dict.get("specify_other") is not None: sheet_translations.or_other_seen = True - select_type += constants.SELECT_OR_OTHER_SUFFIX if row.get(constants.CHOICE_FILTER): msg = ( ROW_FORMAT_STRING % row_number + " Choice filter not supported with or_other." ) raise PyXFormError(msg) + itemset_choices = choices.get(list_name, None) + if not itemset_choices: + msg = ( + ROW_FORMAT_STRING % row_number + + " Please specify choices for this 'or other' question." + ) + raise PyXFormError(msg) + if ( + itemset_choices is not None + and isinstance(itemset_choices, list) + and not any( + c[constants.NAME] == constants.OR_OTHER_CHOICE[constants.NAME] + for c in itemset_choices + ) + ): + if any( + isinstance(c.get(constants.LABEL), dict) + for c in itemset_choices + ): + itemset_choices.append( + { + constants.NAME: constants.OR_OTHER_CHOICE[ + constants.NAME + ], + constants.LABEL: { + lang: constants.OR_OTHER_CHOICE[constants.LABEL] + for lang in { + lang + for c in itemset_choices + for lang in c[constants.LABEL] + if isinstance(c.get(constants.LABEL), dict) + } + }, + } + ) + else: + itemset_choices.append(constants.OR_OTHER_CHOICE) + specify_other_question = { + constants.TYPE: "text", + constants.NAME: f"{row[constants.NAME]}_other", + constants.LABEL: "Specify other.", + constants.BIND: { + "relevant": f"selected(../{row[constants.NAME]}, 'other')" + }, + } new_json_dict = row.copy() new_json_dict[constants.TYPE] = select_type diff --git a/tests/test_builder.py b/tests/test_builder.py index ee27322a..2c5ded36 100644 --- a/tests/test_builder.py +++ b/tests/test_builder.py @@ -32,8 +32,8 @@ class BuilderTests(TestCase): # self.assertTrue(xml_compare(expected, result)) def test_unknown_question_type(self): - survey = utils.build_survey("unknown_question_type.xls") - self.assertRaises(PyXFormError, survey.to_xml) + with self.assertRaises(PyXFormError): + utils.build_survey("unknown_question_type.xls") def test_uniqueness_of_section_names(self): # Looking at the xls file, I think this test might be broken. @@ -43,9 +43,7 @@ def test_uniqueness_of_section_names(self): def setUp(self): self.this_directory = os.path.dirname(__file__) survey_out = Survey(name="age", sms_keyword="age", type="survey") - question = InputQuestion(name="age") - question.type = "integer" - question.label = "How old are you?" + question = InputQuestion(name="age", type="integer", label="How old are you?") survey_out.add_child(question) self.survey_out_dict = survey_out.to_json_dict() print_pyobj_to_json( diff --git a/tests/test_j2x_creation.py b/tests/test_j2x_creation.py index 72761275..f4b69989 100644 --- a/tests/test_j2x_creation.py +++ b/tests/test_j2x_creation.py @@ -16,12 +16,15 @@ def test_survey_can_be_created_in_a_slightly_less_verbose_manner(self): {"name": "blue", "label": "Blue"}, ] - q = MultipleChoiceQuestion(name="Favorite_Color", choices=option_dict_array) - q.type = "select one" - s = Survey(name="Roses_are_Red", children=[q]) + q = MultipleChoiceQuestion( + name="Favorite_Color", type="select one", choices=option_dict_array + ) + s = Survey(name="Roses_are_Red") + s.add_child(q) expected_dict = { "name": "Roses_are_Red", + "type": "survey", "children": [ { "name": "Favorite_Color", @@ -34,36 +37,38 @@ def test_survey_can_be_created_in_a_slightly_less_verbose_manner(self): ], } - self.assertEqual(s.to_json_dict(), expected_dict) + self.assertEqual(expected_dict, s.to_json_dict()) - def allow_surveys_with_comment_rows(self): + def test_allow_surveys_with_comment_rows(self): """assume that a survey with rows that don't have name, type, or label headings raise warning only""" path = utils.path_to_text_fixture("allow_comment_rows_test.xls") survey = create_survey_from_xls(path) expected_dict = { - "default_language": "default", - "id_string": "allow_comment_rows_test", "children": [ { - "name": "farmer_name", "label": {"English": "First and last name of farmer"}, + "name": "farmer_name", "type": "text", - } + }, + { + "children": [ + { + "bind": {"jr:preload": "uid", "readonly": "true()"}, + "name": "instanceID", + "type": "calculate", + } + ], + "control": {"bodyless": True}, + "name": "meta", + "type": "group", + }, ], - "name": "allow_comment_rows_test", - "_translations": { - "English": { - "/allow_comment_rows_test/farmer_name:label": { - "long": "First and last name of farmer" - } - } - }, + "default_language": "default", + "id_string": "allow_comment_rows_test", + "name": "data", + "sms_keyword": "allow_comment_rows_test", "title": "allow_comment_rows_test", - "_xpath": { - "allow_comment_rows_test": "/allow_comment_rows_test", - "farmer_name": "/allow_comment_rows_test/farmer_name", - }, "type": "survey", } - self.assertEqual(survey.to_json_dict(), expected_dict) + self.assertEqual(expected_dict, survey.to_json_dict()) diff --git a/tests/test_j2x_question.py b/tests/test_j2x_question.py index 14c65002..c2229d90 100644 --- a/tests/test_j2x_question.py +++ b/tests/test_j2x_question.py @@ -2,6 +2,7 @@ Testing creation of Surveys using verbose methods """ +from collections.abc import Generator from unittest import TestCase from pyxform import Survey @@ -19,6 +20,8 @@ def ctw(control): """ if isinstance(control, list) and len(control) == 1: control = control[0] + elif isinstance(control, Generator): + control = next(control) return control.toxml() @@ -188,7 +191,8 @@ def test_simple_phone_number_question_type_multilingual(self): "type": "string", "constraint": r"regex(., '^\d*$')", } - observed = {k: v for k, v in q.xml_bindings(survey=self.s)[0].attributes.items()} # noqa: C416 + binding = next(q.xml_bindings(survey=self.s)) + observed = {k: v for k, v in binding.attributes.items()} # noqa: C416 self.assertDictEqual(expected, observed) def test_simple_select_all_question_multilingual(self): diff --git a/tests/test_j2x_xform_build_preparation.py b/tests/test_j2x_xform_build_preparation.py index 062a2280..c83d7440 100644 --- a/tests/test_j2x_xform_build_preparation.py +++ b/tests/test_j2x_xform_build_preparation.py @@ -15,10 +15,10 @@ def test_dictionary_consolidates_duplicate_entries(self): ] first_yesno_question = MultipleChoiceQuestion( - name="yn_q1", options=yes_or_no_dict_array, type="select one" + name="yn_q1", choices=yes_or_no_dict_array, type="select one" ) second_yesno_question = MultipleChoiceQuestion( - name="yn_q2", options=yes_or_no_dict_array, type="select one" + name="yn_q2", choices=yes_or_no_dict_array, type="select one" ) s = Survey(name="yes_or_no_tests")