diff --git a/pyxform/builder.py b/pyxform/builder.py
index 4092028c..d7edad2c 100644
--- a/pyxform/builder.py
+++ b/pyxform/builder.py
@@ -83,12 +83,13 @@ def __init__(self, **kwargs):
         # I don't know why we would need an explicit none option for
         # select alls
         self._add_none_option = False
+        self._sections: Optional[Dict[str, Dict]] = None
         self.set_sections(kwargs.get("sections", {}))
 
         # dictionary of setvalue target and value tuple indexed by triggering element
         self.setvalues_by_triggering_ref = {}
         # For tracking survey-level choices while recursing through the survey.
-        self.choices: Dict[str, Any] = {}
+        self._choices: Dict[str, Any] = {}
 
     def set_sections(self, sections):
         """
@@ -112,13 +113,13 @@ def create_survey_element_from_dict(
 
         if d[const.TYPE] in self.SECTION_CLASSES:
             if d[const.TYPE] == const.SURVEY:
-                self.choices = copy.deepcopy(d.get(const.CHOICES, {}))
+                self._choices = copy.deepcopy(d.get(const.CHOICES, {}))
 
             section = self._create_section_from_dict(d)
 
             if d[const.TYPE] == const.SURVEY:
                 section.setvalues_by_triggering_ref = self.setvalues_by_triggering_ref
-                section.choices = self.choices
+                section.choices = self._choices
 
             return section
         elif d[const.TYPE] == const.LOOP:
@@ -263,7 +264,7 @@ def _create_section_from_dict(self, d):
             survey_element = self.create_survey_element_from_dict(copy.deepcopy(child))
             if child[const.TYPE].endswith(const.SELECT_OR_OTHER_SUFFIX):
                 select_question = survey_element[0]
-                itemset_choices = self.choices.get(select_question[const.ITEMSET], None)
+                itemset_choices = self._choices.get(select_question[const.ITEMSET], None)
                 if (
                     itemset_choices is not None
                     and isinstance(itemset_choices, list)
@@ -307,7 +308,8 @@ def _create_loop_from_dict(self, d):
         # TODO: Verify that nothing breaks if this returns a list
         return result.children
 
-    def _name_and_label_substitutions(self, question_template, column_headers):
+    @staticmethod
+    def _name_and_label_substitutions(question_template, column_headers):
         # if the label in column_headers has multiple languages setup a
         # dictionary by language to do substitutions.
         info_by_lang = {}
diff --git a/pyxform/constants.py b/pyxform/constants.py
index 73c0ed65..a1f469ce 100644
--- a/pyxform/constants.py
+++ b/pyxform/constants.py
@@ -64,6 +64,8 @@
 SELECT_ALL_THAT_APPLY = "select all that apply"
 SELECT_OR_OTHER_SUFFIX = " or specify other"
 RANK = "rank"
+QUESTION = "question"
+CHOICE = "choice"
 CHOICES = "choices"
 
 # XLS Specific constants
diff --git a/pyxform/entities/entities_parsing.py b/pyxform/entities/entities_parsing.py
index f4c4a620..30a60f0d 100644
--- a/pyxform/entities/entities_parsing.py
+++ b/pyxform/entities/entities_parsing.py
@@ -1,4 +1,4 @@
-from typing import Dict, List
+from typing import Any, Dict, List
 
 from pyxform import constants
 from pyxform.errors import PyXFormError
@@ -6,8 +6,8 @@
 
 
 def get_entity_declaration(
-    entities_sheet: Dict, workbook_dict: Dict, warnings: List
-) -> Dict:
+    entities_sheet: List[Dict], workbook_dict: Dict[str, List[Dict]], warnings: List[str]
+) -> Dict[str, Any]:
     if len(entities_sheet) == 0:
         similar = find_sheet_misspellings(
             key=constants.ENTITIES, keys=workbook_dict.keys()
@@ -81,7 +81,7 @@ def get_validated_dataset_name(entity):
 
 
 def validate_entity_saveto(
-    row: Dict, row_number: int, entity_declaration: Dict, in_repeat: bool
+    row: Dict, row_number: int, entity_declaration: Dict[str, Any], in_repeat: bool
 ):
     save_to = row.get("bind", {}).get("entities:saveto", "")
     if not save_to:
diff --git a/pyxform/instance_info.py b/pyxform/instance_info.py
deleted file mode 100644
index 049d8974..00000000
--- a/pyxform/instance_info.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-InstanceInfo class module.
-"""
-
-
-class InstanceInfo:
-    """Standardise Instance details relevant during XML generation."""
-
-    def __init__(self, type, context, name, src, instance):
-        self.type = type
-        self.context = context
-        self.name = name
-        self.src = src
-        self.instance = instance
diff --git a/pyxform/question.py b/pyxform/question.py
index 1b53a0e2..4421e630 100644
--- a/pyxform/question.py
+++ b/pyxform/question.py
@@ -14,15 +14,19 @@
 from pyxform.errors import PyXFormError
 from pyxform.question_type_dictionary import QUESTION_TYPE_DICT
 from pyxform.survey_element import SurveyElement
-from pyxform.utils import (
-    PYXFORM_REFERENCE_REGEX,
-    default_is_dynamic,
-    has_dynamic_label,
-    node,
-)
+from pyxform.utils import PYXFORM_REFERENCE_REGEX, default_is_dynamic, node
 
 
 class Question(SurveyElement):
+    FIELDS = SurveyElement.FIELDS.copy()
+    FIELDS.update(
+        {
+            "_itemset_multi_language": bool,
+            "_itemset_has_media": bool,
+            "_itemset_dyn_label": bool,
+        }
+    )
+
     def validate(self):
         SurveyElement.validate(self)
 
@@ -202,12 +206,6 @@ def build_xml(self):
         for element in self.xml_label_and_hint():
             result.appendChild(element)
 
-        choices = survey.get("choices")
-        multi_language = False
-        if choices is not None and len(choices) > 0:
-            first_choices = next(iter(choices.values()))
-            multi_language = isinstance(first_choices[0].get("label"), dict)
-
         # itemset are only supposed to be strings,
         # check to prevent the rare dicts that show up
         if self["itemset"] and isinstance(self["itemset"], str):
@@ -227,16 +225,13 @@ def build_xml(self):
                 else EXTERNAL_CHOICES_ITEMSET_REF_LABEL,
             )
 
-            has_media = False
-            has_dyn_label = False
+            multi_language = self.get("_itemset_multi_language", False)
+            has_media = self.get("_itemset_has_media", False)
+            has_dyn_label = self.get("_itemset_dyn_label", False)
             is_previous_question = bool(
                 PYXFORM_REFERENCE_REGEX.search(self.get("itemset"))
             )
 
-            if choices.get(itemset):
-                has_media = bool(choices[itemset][0].get("media"))
-                has_dyn_label = has_dynamic_label(choices[itemset], multi_language)
-
             if file_extension in EXTERNAL_INSTANCE_EXTENSIONS:
                 itemset = itemset
             else:
diff --git a/pyxform/survey.py b/pyxform/survey.py
index 242fbceb..9551413d 100644
--- a/pyxform/survey.py
+++ b/pyxform/survey.py
@@ -10,14 +10,13 @@
 from collections import defaultdict
 from datetime import datetime
 from functools import lru_cache
-from typing import Iterator, List, Optional, Tuple
+from typing import Generator, Iterator, List, Optional, Tuple
 
 from pyxform import aliases, constants
 from pyxform.constants import EXTERNAL_INSTANCE_EXTENSIONS
 from pyxform.errors import PyXFormError, ValidationError
 from pyxform.external_instance import ExternalInstance
 from pyxform.instance import SurveyInstance
-from pyxform.instance_info import InstanceInfo
 from pyxform.parsing import instance_expression
 from pyxform.question import Option, Question
 from pyxform.section import Section
@@ -47,6 +46,26 @@
 RE_XML_TEXT = re.compile(r"(>)\n\s*(\s[^<>\s].*?)\n\s*(\s</)", re.DOTALL)
 
 
+class InstanceInfo:
+    """Standardise Instance details relevant during XML generation."""
+
+    __slots__ = ("type", "context", "name", "src", "instance")
+
+    def __init__(
+        self,
+        type: str,
+        context: Optional[str],
+        name: str,
+        src: Optional[str],
+        instance: "DetachableElement",
+    ):
+        self.type: str = type
+        self.context: Optional[str] = context
+        self.name: str = name
+        self.src: Optional[str] = src
+        self.instance: "DetachableElement" = instance
+
+
 def register_nsmap():
     """Function to register NSMAP namespaces with ETree"""
     for prefix, uri in NSMAP.items():
@@ -57,7 +76,7 @@ def register_nsmap():
 register_nsmap()
 
 
-@lru_cache(maxsize=None)
+@lru_cache(maxsize=65536)  # 2^16
 def is_parent_a_repeat(survey, xpath):
     """
     Returns the XPATH of the first repeat of the given xpath in the survey,
@@ -73,7 +92,7 @@ def is_parent_a_repeat(survey, xpath):
     return is_parent_a_repeat(survey, parent_xpath)
 
 
-@lru_cache(maxsize=None)
+@lru_cache(maxsize=65536)  # 2^16
 def share_same_repeat_parent(survey, xpath, context_xpath, reference_parent=False):
     """
     Returns a tuple of the number of steps from the context xpath to the shared
@@ -151,6 +170,22 @@ def _get_steps_and_target_xpath(context_parent, xpath_parent, include_parent=Fal
     return (None, None)
 
 
+@lru_cache(maxsize=65536)  # 2^16
+def is_label_dynamic(label: str) -> bool:
+    if (
+        label is not None
+        and isinstance(label, str)
+        and re.search(BRACKETED_TAG_REGEX, label) is not None
+    ):
+        return True
+    else:
+        return False
+
+
+def recursive_dict():
+    return defaultdict(recursive_dict)
+
+
 class Survey(Section):
     """
     Survey class - represents the full XForm XML.
@@ -174,7 +209,7 @@ class Survey(Section):
             constants.COMPACT_DELIMITER: str,
             "file_name": str,
             "default_language": str,
-            "_translations": dict,
+            "_translations": recursive_dict,
             "submission_url": str,
             "auto_send": str,
             "auto_delete": str,
@@ -267,27 +302,30 @@ def xml(self):
             "h:html",
             node("h:head", node("h:title", self.title), self.xml_model()),
             node("h:body", *self.xml_control(), **body_kwargs),
-            **nsmap
+            **nsmap,
         )
 
     def get_setvalues_for_question_name(self, question_name):
         return self.setvalues_by_triggering_ref.get("${%s}" % question_name)
 
-    @staticmethod
-    def _generate_static_instances(list_name, choice_list) -> InstanceInfo:
+    def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo:
         """
-        Generates <instance> elements for static data
-        (e.g. choices for select type questions)
-
-        Note that per commit message 0578242 and in xls2json.py R539, an
-        instance is only output for select items defined in the choices sheet
-        when the item has a choice_filter, and it is that way for backwards
-        compatibility.
+        Generate <instance> elements for static data (e.g. choices for selects)
         """
         instance_element_list = []
-        multi_language = isinstance(choice_list[0].get("label"), dict)
         has_media = bool(choice_list[0].get("media"))
-        has_dyn_label = has_dynamic_label(choice_list, multi_language)
+        has_dyn_label = has_dynamic_label(choice_list)
+        multi_language = False
+        if isinstance(self._translations, dict):
+            choices = tuple(
+                k
+                for items in self._translations.values()
+                for k, v in items.items()
+                if v.get(constants.TYPE, "") == constants.CHOICE
+                and k.split("-")[0] == list_name
+            )
+            if 0 < len(choices):
+                multi_language = True
 
         for idx, choice in enumerate(choice_list):
             choice_element_list = []
@@ -302,7 +340,7 @@ def _generate_static_instances(list_name, choice_list) -> InstanceInfo:
                 if (
                     not multi_language
                     and not has_media
-                    and not has_dynamic_label(choice_list, multi_language)
+                    and not has_dyn_label
                     and isinstance(value, str)
                     and name == "label"
                 ):
@@ -391,9 +429,9 @@ def get_instance_info(element, file_id):
             uri = "jr://file-csv/{}.csv".format(file_id)
 
             return InstanceInfo(
-                type=u"pulldata",
+                type="pulldata",
                 context="[type: {t}, name: {n}]".format(
-                    t=element[u"parent"][u"type"], n=element[u"parent"][u"name"]
+                    t=element["parent"]["type"], n=element["parent"]["name"]
                 ),
                 name=file_id,
                 src=uri,
@@ -626,7 +664,11 @@ def xml_instance(self, **kwargs):
 
     def _add_to_nested_dict(self, dicty, path, value):
         if len(path) == 1:
-            dicty[path[0]] = value
+            key = path[0]
+            if key in dicty and isinstance(dicty[key], dict) and isinstance(value, dict):
+                dicty[key].update(value)
+            else:
+                dicty[key] = value
             return
         if path[0] not in dicty:
             dicty[path[0]] = {}
@@ -638,32 +680,69 @@ def _setup_translations(self):
         setup media and itext functions
         """
 
-        def _setup_choice_translations(name, choice_value, itext_id):
-            for media_type_or_language, value in choice_value.items():  # noqa
+        def _setup_choice_translations(
+            name, choice_value, itext_id
+        ) -> Generator[Tuple[List[str], str], None, None]:
+            for media_or_lang, value in choice_value.items():  # noqa
                 if isinstance(value, dict):
                     for language, val in value.items():
-                        self._add_to_nested_dict(
-                            self._translations,
-                            [language, itext_id, media_type_or_language],
-                            val,
-                        )
+                        yield ([language, itext_id, media_or_lang], val)
                 else:
-                    if name == "media":
-                        self._add_to_nested_dict(
-                            self._translations,
-                            [self.default_language, itext_id, media_type_or_language],
-                            value,
-                        )
+                    if name == constants.MEDIA:
+                        yield ([self.default_language, itext_id, media_or_lang], value)
                     else:
-                        self._add_to_nested_dict(
-                            self._translations,
-                            [media_type_or_language, itext_id, "long"],
-                            value,
+                        yield ([media_or_lang, itext_id, "long"], value)
+
+        itemsets_multi_language = set()
+        itemsets_has_media = set()
+        itemsets_has_dyn_label = set()
+
+        def get_choices():
+            for list_name, choice_list in self.choices.items():
+                multi_language = False
+                has_media = False
+                dyn_label = False
+                choices = []
+                for idx, choice in enumerate(choice_list):
+                    for col_name, choice_value in choice.items():
+                        lang_choice = None
+                        if col_name == constants.MEDIA:
+                            has_media = True
+                        if isinstance(choice_value, dict):
+                            lang_choice = choice_value
+                            multi_language = True
+                        elif col_name == constants.LABEL:
+                            lang_choice = {self.default_language: choice_value}
+                            if is_label_dynamic(choice_value):
+                                dyn_label = True
+                        if lang_choice is not None:
+                            # e.g. (label, {"default": "Yes"}, "consent", 0)
+                            choices.append((col_name, lang_choice, list_name, idx))
+                if multi_language or has_media or dyn_label:
+                    if multi_language:
+                        itemsets_multi_language.add(list_name)
+                    if has_media:
+                        itemsets_has_media.add(list_name)
+                    if dyn_label:
+                        itemsets_has_dyn_label.add(list_name)
+                    for c in choices:
+                        yield from _setup_choice_translations(
+                            c[0], c[1], f"{c[2]}-{c[3]}"
                         )
 
-        self._translations = defaultdict(dict)  # pylint: disable=W0201
+        for path, value in get_choices():
+            last_path = path.pop()
+            leaf_value = {last_path: value, constants.TYPE: constants.CHOICE}
+            self._add_to_nested_dict(self._translations, path, leaf_value)
+
         select_types = set(aliases.select.keys())
         for element in self.iter_descendants():
+            itemset = element.get("itemset")
+            if itemset is not None:
+                element._itemset_multi_language = itemset in itemsets_multi_language
+                element._itemset_has_media = itemset in itemsets_has_media
+                element._itemset_dyn_label = itemset in itemsets_has_dyn_label
+
             # Skip creation of translations for choices in selects. The creation of these
             # translations is done futher below in this function.
             parent = element.get("parent")
@@ -685,37 +764,15 @@ def _setup_choice_translations(name, choice_value, itext_id):
                             form: {
                                 "text": d["text"],
                                 "output_context": d["output_context"],
-                            }
+                            },
+                            constants.TYPE: constants.QUESTION,
                         }
                     )
 
-        # This code sets up translations for choices in selects.
-        for list_name, choice_list in self.choices.items():
-            multi_language = isinstance(choice_list[0].get("label"), dict)
-            has_media = bool(choice_list[0].get("media"))
-            if (
-                not multi_language
-                and not has_media
-                and not has_dynamic_label(choice_list, multi_language)
-            ):
-                continue
-            for idx, choice in enumerate(choice_list):
-                for name, choice_value in choice.items():
-                    itext_id = "-".join([list_name, str(idx)])
-                    if isinstance(choice_value, dict):
-                        _setup_choice_translations(name, choice_value, itext_id)
-                    elif name == "label":
-                        self._add_to_nested_dict(
-                            self._translations,
-                            [self.default_language, itext_id, "long"],
-                            choice_value,
-                        )
-
     def _add_empty_translations(self):
         """
-        Adds translations so that every itext element has the same elements \
-        accross every language.
-        When translations are not provided "-" will be used.
+        Adds translations so that every itext element has the same elements across every
+        language. When translations are not provided "-" will be used.
         This disables any of the default_language fallback functionality.
         """
         paths = {}
@@ -767,7 +824,6 @@ def _set_up_media_translations(media_dict, translation_key):
 
                     # Create the required dictionaries in _translations,
                     # then add media as a leaf value:
-
                     if language not in self._translations:
                         self._translations[language] = {}
 
@@ -783,9 +839,6 @@ def _set_up_media_translations(media_dict, translation_key):
 
                     translations_trans_key[media_type] = media
 
-        if not self._translations:
-            self._translations = defaultdict(dict)  # pylint: disable=W0201
-
         for survey_element in self.iter_descendants():
             # Skip set up of media for choices in selects. Translations for their media
             # content should have been set up in _setup_translations, with one copy of
@@ -818,6 +871,9 @@ def itext(self):
                     raise Exception()
 
                 for media_type, media_value in content.items():
+                    # Ignore key indicating Question or Choice translation type.
+                    if media_type == constants.TYPE:
+                        continue
                     if isinstance(media_value, dict):
                         value, output_inserted = self.insert_output_values(
                             media_value["text"], context=media_value["output_context"]
diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py
index 2831a82d..937b26ac 100644
--- a/pyxform/survey_element.py
+++ b/pyxform/survey_element.py
@@ -4,6 +4,7 @@
 """
 import json
 import re
+from collections import deque
 from functools import lru_cache
 from typing import TYPE_CHECKING
 
@@ -188,16 +189,14 @@ def get_lineage(self):
         """
         Return a the list [root, ..., self._parent, self]
         """
-        result = [self]
+        result = deque((self,))
         current_element = self
         while current_element.parent:
             current_element = current_element.parent
-            result = [current_element] + result
+            result.appendleft(current_element)
         # For some reason the root element has a True flat property...
-        output = [result[0]]
-        for item in result[1:]:
-            if not item.get("flat"):
-                output.append(item)
+        output = [result.popleft()]
+        output.extend([i for i in result if not i.get("flat")])
         return output
 
     def get_root(self):
diff --git a/pyxform/utils.py b/pyxform/utils.py
index 7786cb1c..2b7ddb48 100644
--- a/pyxform/utils.py
+++ b/pyxform/utils.py
@@ -10,7 +10,7 @@
 import re
 from collections import namedtuple
 from json.decoder import JSONDecodeError
-from typing import List, Tuple
+from typing import Dict, List, Tuple
 from xml.dom.minidom import Element, Text, parseString
 
 import openpyxl
@@ -277,17 +277,20 @@ def default_is_dynamic(element_default, element_type=None):
     return False
 
 
-# If the first or second choice label includes a reference, we must use itext.
-# Check the first two choices in case first is something like "Other".
-def has_dynamic_label(choice_list, multi_language):
-    if not multi_language:
-        for i in range(0, min(2, len(choice_list))):
-            if (
-                choice_list[i].get("label") is not None
-                and re.search(BRACKETED_TAG_REGEX, choice_list[i].get("label"))
-                is not None
-            ):
-                return True
+def has_dynamic_label(choice_list: "List[Dict[str, str]]") -> bool:
+    """
+    If the first or second choice label includes a reference, we must use itext.
+
+    Check the first two choices in case first is something like "Other".
+    """
+    for c in choice_list[:2]:
+        choice_label = c.get("label")
+        if (
+            choice_label is not None
+            and isinstance(choice_label, str)
+            and re.search(BRACKETED_TAG_REGEX, choice_label) is not None
+        ):
+            return True
     return False
 
 
diff --git a/pyxform/validators/pyxform/translations_checks.py b/pyxform/validators/pyxform/translations_checks.py
index 722017ba..01121d76 100644
--- a/pyxform/validators/pyxform/translations_checks.py
+++ b/pyxform/validators/pyxform/translations_checks.py
@@ -6,9 +6,9 @@
 from pyxform.errors import PyXFormError
 
 if TYPE_CHECKING:
-    from typing import Dict, List, Optional, Sequence, Set, Union
+    from typing import Dict, List, Optional, Sequence, Set, Tuple
 
-    SheetData = List[Dict[str, Union[str, Dict]]]
+    SheetData = Tuple[Tuple[str, ...]]
     Warnings = List[str]
 
 
@@ -93,31 +93,20 @@ def __init__(
     def _find_translations(
         self, sheet_data: "SheetData", translatable_columns: "Dict[str, str]"
     ):
-        def process_cell(typ, cell):
-            if cell is not None:
-                if typ in translatable_columns.keys():
-                    name = translatable_columns[typ]
-                    if isinstance(cell, str):
-                        self.seen[const.DEFAULT_LANGUAGE_VALUE].append(name)
-                        self.columns_seen.add(name)
-                    elif isinstance(cell, dict):
-                        for lng in cell:
-                            self.seen[lng].append(name)
-                            self.columns_seen.add(name)
-
-        if 0 < len(sheet_data):
-            # e.g. ("name", "q1"), ("label", {"en": "Hello", "fr": "Bonjour"})
-            for column_type, cell_content in sheet_data[0].items():
-                if column_type == const.MEDIA and isinstance(cell_content, dict):
-                    # e.g. ("audio", {"eng": "my.mp3"})
-                    for media_type, media_cell in cell_content.items():
-                        process_cell(typ=media_type, cell=media_cell)
-                if column_type == const.BIND:
-                    # e.g. ("jr:constraintMsg", "Try again")
-                    for bind_type, bind_cell in cell_content.items():
-                        process_cell(typ=bind_type, cell=bind_cell)
-                else:
-                    process_cell(typ=column_type, cell=cell_content)
+        def process_header(head):
+            if head[0] in translatable_columns.keys():
+                name = translatable_columns[head[0]]
+                if len(head) == 1:
+                    self.seen[const.DEFAULT_LANGUAGE_VALUE].append(name)
+                elif len(head) == 2:
+                    self.seen[head[1]].append(name)
+                self.columns_seen.add(name)
+
+        for header in sheet_data:
+            if 1 < len(header) and header[0] in (const.MEDIA, const.BIND):
+                process_header(head=header[1:])
+            else:
+                process_header(head=header)
 
     def seen_default_only(self) -> bool:
         return 0 == len(self.seen) or (
diff --git a/pyxform/xform2json.py b/pyxform/xform2json.py
index c8497733..4cf0ebcc 100644
--- a/pyxform/xform2json.py
+++ b/pyxform/xform2json.py
@@ -668,7 +668,8 @@ def _get_choices(self) -> Dict[str, Any]:
             choices[instance["id"]] = items
         return choices
 
-    def _get_name_from_ref(self, ref):
+    @staticmethod
+    def _get_name_from_ref(ref):
         """given /xlsform_spec_test/launch,
         return the string after the last occurance of the character '/'
         """
@@ -678,10 +679,12 @@ def _get_name_from_ref(self, ref):
         else:
             return ref[pos + 1 :].strip()
 
-    def _expand_child(self, obj_list):
+    @staticmethod
+    def _expand_child(obj_list):
         return obj_list
 
-    def _shorten_xpaths_in_string(self, text):
+    @staticmethod
+    def _shorten_xpaths_in_string(text):
         def get_last_item(xpath_str):
             last_item = xpath_str.split("/")
             return last_item[len(last_item) - 1].strip()
diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py
index e1fe6cae..2fca218b 100644
--- a/pyxform/xls2json.py
+++ b/pyxform/xls2json.py
@@ -8,7 +8,7 @@
 import re
 import sys
 from collections import Counter
-from typing import IO, Any, Dict, List, Optional
+from typing import IO, Any, Dict, List, Optional, Tuple
 
 from pyxform import aliases, constants
 from pyxform.constants import (
@@ -97,13 +97,25 @@ def replace_smart_quotes_in_dict(_d):
             _d[key] = value
 
 
+class DealiasAndGroupHeadersResult:
+    __slots__ = ("headers", "data")
+
+    def __init__(self, headers: Tuple[Tuple[str, ...], ...], data: List[Dict]):
+        """
+        :param headers: Distinct headers seen in the sheet, parsed / split if applicable.
+        :param data: Sheet data rows, in grouped dict format.
+        """
+        self.headers: Tuple[Tuple[str, ...], ...] = headers
+        self.data: List[Dict] = data
+
+
 def dealias_and_group_headers(
-    dict_array: "List[Dict]",
-    header_aliases: "Dict",
+    dict_array: List[Dict],
+    header_aliases: Dict[str, str],
     use_double_colons: bool,
     default_language: str = constants.DEFAULT_LANGUAGE_VALUE,
     ignore_case: bool = False,
-):
+) -> DealiasAndGroupHeadersResult:
     """
     For each row in the worksheet, group all keys that contain a double colon.
     So
@@ -117,10 +129,10 @@ def dealias_and_group_headers(
     """
     group_delimiter = "::"
     out_dict_array = list()
+    seen_headers = {}
     for row in dict_array:
         out_row = dict()
         for header, val in row.items():
-
             if ignore_case:
                 header = header.lower()
 
@@ -150,9 +162,12 @@ def dealias_and_group_headers(
             new_key = tokens[0]
             new_value = list_to_nested_dict(tokens[1:] + [val])
             out_row = merge_dicts(out_row, {new_key: new_value}, default_language)
+            seen_headers[tuple(tokens)] = None
 
         out_dict_array.append(out_row)
-    return out_dict_array
+    return DealiasAndGroupHeadersResult(
+        headers=tuple(seen_headers.keys()), data=out_dict_array
+    )
 
 
 def dealias_types(dict_array):
@@ -474,9 +489,11 @@ def workbook_to_json(
         settings_sheet_headers = []
 
     settings_sheet = dealias_and_group_headers(
-        settings_sheet_headers, aliases.settings_header, use_double_colons
+        dict_array=settings_sheet_headers,
+        header_aliases=aliases.settings_header,
+        use_double_colons=use_double_colons,
     )
-    settings = settings_sheet[0] if len(settings_sheet) > 0 else {}
+    settings = settings_sheet.data[0] if len(settings_sheet.data) > 0 else {}
     replace_smart_quotes_in_dict(settings)
 
     default_language = settings.get(constants.DEFAULT_LANGUAGE_KEY, default_language)
@@ -514,10 +531,13 @@ def workbook_to_json(
         replace_smart_quotes_in_dict(choice_item)
 
     external_choices_sheet = dealias_and_group_headers(
-        external_choices_sheet, aliases.list_header, use_double_colons, default_language
+        dict_array=external_choices_sheet,
+        header_aliases=aliases.list_header,
+        use_double_colons=use_double_colons,
+        default_language=default_language,
     )
     external_choices = group_dictionaries_by_key(
-        external_choices_sheet, constants.LIST_NAME
+        list_of_dicts=external_choices_sheet.data, key=constants.LIST_NAME
     )
 
     # ########## Choices sheet ##########
@@ -525,9 +545,14 @@ def workbook_to_json(
     for choice_item in choices_sheet:
         replace_smart_quotes_in_dict(choice_item)
     choices_sheet = dealias_and_group_headers(
-        choices_sheet, aliases.list_header, use_double_colons, default_language
+        dict_array=choices_sheet,
+        header_aliases=aliases.list_header,
+        use_double_colons=use_double_colons,
+        default_language=default_language,
+    )
+    combined_lists = group_dictionaries_by_key(
+        list_of_dicts=choices_sheet.data, key=constants.LIST_NAME
     )
-    combined_lists = group_dictionaries_by_key(choices_sheet, constants.LIST_NAME)
     # To combine the warning into one message, the check for missing choices translation
     # columns is run with Survey sheet below.
 
@@ -601,9 +626,13 @@ def workbook_to_json(
     # ########## Entities sheet ###########
     entities_sheet = workbook_dict.get(constants.ENTITIES, [])
     entities_sheet = dealias_and_group_headers(
-        entities_sheet, aliases.entities_header, False
+        dict_array=entities_sheet,
+        header_aliases=aliases.entities_header,
+        use_double_colons=False,
+    )
+    entity_declaration = get_entity_declaration(
+        entities_sheet=entities_sheet.data, workbook_dict=workbook_dict, warnings=warnings
     )
-    entity_declaration = get_entity_declaration(entities_sheet, workbook_dict, warnings)
 
     # ########## Survey sheet ###########
     survey_sheet = workbook_dict[constants.SURVEY]
@@ -614,23 +643,30 @@ def workbook_to_json(
     if clean_text_values_enabled:
         survey_sheet = clean_text_values(survey_sheet)
     survey_sheet = dealias_and_group_headers(
-        survey_sheet, aliases.survey_header, use_double_colons, default_language
+        dict_array=survey_sheet,
+        header_aliases=aliases.survey_header,
+        use_double_colons=use_double_colons,
+        default_language=default_language,
     )
-    survey_sheet = dealias_types(survey_sheet)
+    survey_sheet.data = dealias_types(dict_array=survey_sheet.data)
 
     # Check for missing translations. The choices sheet is checked here so that the
     # warning can be combined into one message.
     sheet_translations = SheetTranslations(
-        survey_sheet=survey_sheet,
-        choices_sheet=choices_sheet,
+        survey_sheet=survey_sheet.headers,
+        choices_sheet=choices_sheet.headers,
     )
     sheet_translations.missing_check(warnings=warnings)
 
     # No spell check for OSM sheet (infrequently used, many spurious matches).
     osm_sheet = dealias_and_group_headers(
-        workbook_dict.get(constants.OSM, []), aliases.list_header, True
+        dict_array=workbook_dict.get(constants.OSM, []),
+        header_aliases=aliases.list_header,
+        use_double_colons=True,
+    )
+    osm_tags = group_dictionaries_by_key(
+        list_of_dicts=osm_sheet.data, key=constants.LIST_NAME
     )
-    osm_tags = group_dictionaries_by_key(osm_sheet, constants.LIST_NAME)
     # #################################
 
     # Parse the survey sheet while generating a survey in our json format:
@@ -673,7 +709,7 @@ def workbook_to_json(
 
     # row by row, validate questions, throwing errors and adding warnings
     # where needed.
-    for row in survey_sheet:
+    for row in survey_sheet.data:
         row_number += 1
         if stack[-1] is not None:
             prev_control_type = stack[-1]["control_type"]
@@ -1650,7 +1686,7 @@ def _setup_question_types_dictionary(self):
             header_aliases={},
             use_double_colons=use_double_colons,
             default_language=constants.DEFAULT_LANGUAGE_VALUE,
-        )
+        ).data
         self._dict = organize_by_values(self._dict, "name")
 
 
diff --git a/tests/test_translations.py b/tests/test_translations.py
index 8116a0bb..b6a85ba0 100644
--- a/tests/test_translations.py
+++ b/tests/test_translations.py
@@ -416,6 +416,146 @@ def run(name):
             ):
                 run(name=f"questions={count}, without check (seconds):")
 
+    def test_translation_detection__survey_and_choices_columns_present(self):
+        """Should identify that the survey is multi-language when first row(s) empty."""
+        md = """
+        | survey  |                |       |            |            |
+        |         | type           | name  | label      | label::en  |
+        |         | select_one c0  | f     | f          |            |
+        |         | select_one c1  | q1    | Question 1 | Question A |
+        | choices |           |      |        |            |           |
+        |         | list name | name | label  | label::en  | label::fr |
+        |         | c0        | n    | l      |            |           |
+        |         | c1        | na   | la     |            |           |
+        |         | c1        | nb   | lb     | lb-e       | lb-f      |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                xpq.body_select1_itemset("f"),
+                xpq.body_label_inline("select1", "f", "f"),
+                xpq.body_select1_itemset("q1"),
+                xpq.body_label_itext("select1", "q1"),
+                xpq.model_itext_label("q1", DEFAULT_LANG, "Question 1"),
+                xpq.model_itext_label("q1", "en", "Question A"),
+                xpq.model_itext_label("q1", "fr", "-"),
+                xpc.model_instance_choices_label("c0", (("n", "l"),)),
+                xpc.model_instance_choices_itext("c1", ("na", "nb")),
+                xpc.model_itext_choice_text_label_by_pos(
+                    DEFAULT_LANG, "c1", ("la", "lb")
+                ),
+                xpc.model_itext_choice_text_label_by_pos("en", "c1", ("-", "lb-e")),
+                xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("-", "lb-f")),
+            ],
+        )
+
+    def test_translation_detection__survey_columns_not_present(self):
+        """Should identify that the survey is multi-language when only choices translated."""
+        md = """
+        | survey  |                |       |            |
+        |         | type           | name  | label      |
+        |         | select_one c0  | f     | f          |
+        |         | select_one c1  | q1    | Question 1 |
+        | choices |           |      |        |            |           |
+        |         | list name | name | label  | label::en  | label::fr |
+        |         | c0        | n    | l      |            |           |
+        |         | c1        | na   | la     |            |           |
+        |         | c1        | nb   | lb     | lb-e       | lb-f      |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                xpq.body_select1_itemset("f"),
+                xpq.body_label_inline("select1", "f", "f"),
+                xpq.body_select1_itemset("q1"),
+                xpq.body_label_inline("select1", "q1", "Question 1"),
+                xpc.model_instance_choices_label("c0", (("n", "l"),)),
+                xpc.model_instance_choices_itext("c1", ("na", "nb")),
+                xpc.model_itext_choice_text_label_by_pos(
+                    DEFAULT_LANG, "c1", ("la", "lb")
+                ),
+                xpc.model_itext_choice_text_label_by_pos("en", "c1", ("-", "lb-e")),
+                xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("-", "lb-f")),
+            ],
+        )
+
+    def test_translation_detection__only_survey_columns_present(self):
+        """Should identify that the survey is multi-language when first row(s) empty."""
+        md = """
+        | survey  |                |       |            |            |
+        |         | type           | name  | label      | label::en  |
+        |         | select_one c0  | f     | f          |            |
+        |         | select_one c1  | q1    | Question 1 | Question A |
+        | choices |           |      |        |
+        |         | list name | name | label  |
+        |         | c0        | n    | l      |
+        |         | c1        | na   | la     |
+        |         | c1        | nb   | lb     |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                xpq.body_select1_itemset("f"),
+                xpq.body_label_inline("select1", "f", "f"),
+                xpq.body_select1_itemset("q1"),
+                xpq.body_label_itext("select1", "q1"),
+                xpq.model_itext_label("q1", DEFAULT_LANG, "Question 1"),
+                xpq.model_itext_label("q1", "en", "Question A"),
+                xpc.model_instance_choices_label("c0", (("n", "l"),)),
+                xpc.model_instance_choices_label("c1", (("na", "la"), ("nb", "lb"))),
+            ],
+        )
+
+    def test_translation_detection__survey_columns_present_with_media(self):
+        """Should identify that the survey is multi-language when first row(s) empty."""
+        md = """
+        | survey  |                |       |            |            |           |
+        |         | type           | name  | label      | label::en  | image::en |
+        |         | select_one c0  | f     | f          |            |           |
+        |         | select_one c1  | q1    | Question 1 | Question A | c1.png    |
+        | choices |           |      |        |            |           |
+        |         | list name | name | label  | label::en  | label::fr | audio::de |
+        |         | c0        | n    | l      |            |           |           |
+        |         | c1        | na   | la     |            |           |           |
+        |         | c1        | nb   | lb     | lb-e       |           | c1_nb.mp3 |
+        |         | c1        | nc   | lc     | lc-e       | lc-f      | c1_nc.mp3 |
+        """
+        self.assertPyxformXform(
+            md=md,
+            debug=True,
+            xml__xpath_match=[
+                xpq.body_select1_itemset("f"),
+                xpq.body_label_inline("select1", "f", "f"),
+                xpq.body_select1_itemset("q1"),
+                xpq.body_label_itext("select1", "q1"),
+                xpq.model_itext_label("q1", DEFAULT_LANG, "Question 1"),
+                xpq.model_itext_label(
+                    "q1",
+                    "en",
+                    "Question A",
+                ),
+                xpq.model_itext_form("q1", "en", "image", "c1.png"),
+                xpc.model_instance_choices_label("c0", (("n", "l"),)),
+                xpc.model_itext_choice_text_label_by_pos(
+                    DEFAULT_LANG, "c1", ("la", "lb", "lc")
+                ),
+                xpc.model_itext_choice_text_label_by_pos(
+                    "en", "c1", ("-", "lb-e", "lc-e")
+                ),
+                xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("-", "-", "lc-f")),
+                xpc.model_itext_choice_text_label_by_pos("de", "c1", ("-", "-", "-")),
+                xpc.model_itext_choice_media_by_pos(
+                    "de",
+                    "c1",
+                    (
+                        ((None, None),),
+                        (("audio", "c1_nb.mp3"),),
+                        (("audio", "c1_nc.mp3"),),
+                    ),
+                ),
+            ],
+        )
+
 
 class TestTranslationsSurvey(PyxformTestCase):
     """Translations behaviour of columns in the Survey sheet."""
@@ -1090,9 +1230,9 @@ def test_missing_translation__one_lang_simple__warn__default(self):
         |         | type          | name  | label      |
         |         | select_one c1 | q1    | Question 1 |
         | choices |           |      |                 |
-        |         | list name | name | label | label::eng | media::audio |
-        |         | c1        | na   | la-d  | la-e       | la-d.mp3     |
-        |         | c1        | nb   | lb-d  | lb-e       | lb-d.mp3     |
+        |         | list name | name | label::eng | media::audio |
+        |         | c1        | na   | la-e       | la-d.mp3     |
+        |         | c1        | nb   | lb-e       | lb-d.mp3     |
         """
         cols = {CHOICES: {"default": ("label",), "eng": ("media::audio",)}}
         warning = format_missing_translations_msg(_in=cols)
diff --git a/tests/test_xform2json.py b/tests/test_xform2json.py
index ba9a56ce..7d283fb0 100644
--- a/tests/test_xform2json.py
+++ b/tests/test_xform2json.py
@@ -35,9 +35,10 @@ def test_convert_toJSON_multi_language(self):
             kwargs={"id_string": "id", "name": "multi-language", "title": "some-title"},
             autoname=False,
         )
-        expected_xml = survey.to_xml()
+        expected = survey.to_xml()
         generated_json = survey.to_json()
 
-        survey = create_survey_element_from_dict(json.loads(generated_json))
+        survey_from_builder = create_survey_element_from_dict(json.loads(generated_json))
+        observed = survey_from_builder.to_xml()
 
-        self.assertEqual(expected_xml, survey.to_xml())
+        self.assertEqual(expected, observed)
diff --git a/tests/xpath_helpers/choices.py b/tests/xpath_helpers/choices.py
index dddcdb49..d546221b 100644
--- a/tests/xpath_helpers/choices.py
+++ b/tests/xpath_helpers/choices.py
@@ -106,6 +106,7 @@ def model_itext_choice_media_by_pos(
                 """
                 for idx, choice in enumerate(choices)
                 for form, media in choice
+                if form is not None
             )
         )
         return f"""
@@ -134,6 +135,7 @@ def model_no_itext_choice_media_by_pos(
                 """
                 for idx, choice in enumerate(choices)
                 for form, media in choice
+                if form is not None
             )
         )
         return f"""
diff --git a/tests/xpath_helpers/questions.py b/tests/xpath_helpers/questions.py
index c6af3b65..bf865ab3 100644
--- a/tests/xpath_helpers/questions.py
+++ b/tests/xpath_helpers/questions.py
@@ -1,6 +1,6 @@
 class XPathHelper:
     """
-    XPath expressions for choices assertions.
+    XPath expressions for questions assertions.
     """
 
     @staticmethod
@@ -20,6 +20,47 @@ def model_instance_bind(q_name: str, _type: str):
           ]
         """
 
+    @staticmethod
+    def model_itext_label(q_name: str, lang: str, q_label: str):
+        """Model itext contains the question label."""
+        return f"""
+        /h:html/h:head/x:model/x:itext/x:translation[@lang='{lang}']
+          /x:text[@id='/test_name/{q_name}:label']
+          /x:value[not(@form) and text()='{q_label}']
+        """
+
+    @staticmethod
+    def model_itext_form(q_name: str, lang: str, form: str, fname: str):
+        """Model itext contains an alternate form itext for the label or hint."""
+        prefix = {
+            "audio": ("label", "jr://audio/"),
+            "image": ("label", "jr://images/"),
+            "big-image": ("label", "jr://images/"),
+            "video": ("label", "jr://video/"),
+            "guidance": ("hint", ""),
+        }
+        return f"""
+        /h:html/h:head/x:model/x:itext/x:translation[@lang='{lang}']
+          /x:text[@id='/test_name/{q_name}:{prefix[form][0]}']
+          /x:value[@form='{form}' and text()='{prefix[form][1]}{fname}']
+        """
+
+    @staticmethod
+    def body_label_inline(q_type: str, q_name: str, q_label: str):
+        """Body element contains the question label."""
+        return f"""
+        /h:html/h:body/x:{q_type}[@ref='/test_name/{q_name}']
+          /x:label[not(@ref) and text()='{q_label}']
+        """
+
+    @staticmethod
+    def body_label_itext(q_type: str, q_name: str):
+        """Body element references itext for the question label."""
+        return f"""
+        /h:html/h:body/x:{q_type}[@ref='/test_name/{q_name}']
+          /x:label[@ref="jr:itext('/test_name/{q_name}:label')" and not(text())]
+        """
+
     @staticmethod
     def body_select1_itemset(q_name: str):
         """Body has a select1 with an itemset, and no inline items."""