diff --git a/pyxform/parsing/expression.py b/pyxform/parsing/expression.py
index de99b66b..af919859 100644
--- a/pyxform/parsing/expression.py
+++ b/pyxform/parsing/expression.py
@@ -1,13 +1,108 @@
+import re
 from collections.abc import Iterable
+from functools import lru_cache
+from typing import NamedTuple
 
-from pyxform.utils import parse_expression
+
+def get_expression_lexer() -> re.Scanner:
+    """
+    Get a expression lexer (scanner) for parsing.
+    """
+    # ncname regex adapted from eulxml https://github.com/emory-libraries/eulxml/blob/2e1a9f71ffd1fd455bd8326ec82125e333b352e0/eulxml/xpath/lexrules.py
+    # (C) 2010,2011 Emory University Libraries [Apache v2.0 License]
+    # They in turn adapted it from https://www.w3.org/TR/REC-xml/#NT-NameStartChar
+    # and https://www.w3.org/TR/REC-xml-names/#NT-NCName
+    namestartchar = (
+        r"([A-Z]|_|[a-z]|\xc0-\xd6]|[\xd8-\xf6]|[\xf8-\u02ff]|"
+        + r"[\u0370-\u037d]|[\u037f-\u1fff]|[\u200c-\u200d]|[\u2070-\u218f]|"
+        + r"[\u2c00-\u2fef]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]"
+        + r"|[\U00010000-\U000EFFFF])"
+    )
+    # additional characters allowed in NCNames after the first character
+    namechar_extra = r"[-.0-9\xb7\u0300-\u036f\u203f-\u2040]"
+    ncname_regex = (
+        r"(" + namestartchar + r")(" + namestartchar + r"|" + namechar_extra + r")*"
+    )
+    ncname_regex = ncname_regex + r"(:" + ncname_regex + r")?"
+
+    date_regex = r"-?\d{4}-\d{2}-\d{2}"
+    time_regex = r"\d{2}:\d{2}:\d{2}(\.\s+)?(((\+|\-)\d{2}:\d{2})|Z)?"
+    date_time_regex = date_regex + "T" + time_regex
+
+    # Rule order is significant - match priority runs top to bottom.
+    lexer_rules = {
+        # https://www.w3.org/TR/xmlschema-2/#dateTime
+        "DATETIME": date_time_regex,
+        "DATE": date_regex,
+        "TIME": time_regex,
+        "NUMBER": r"-?\d+\.\d*|-?\.\d+|-?\d+",
+        # https://www.w3.org/TR/1999/REC-xpath-19991116/#exprlex
+        "OPS_MATH": r"[\*\+\-]|mod|div",
+        "OPS_COMP": r"\=|\!\=|\<|\>|\<=|>=",
+        "OPS_BOOL": r"and|or",
+        "OPS_UNION": r"\|",
+        "OPEN_PAREN": r"\(",
+        "CLOSE_PAREN": r"\)",
+        "BRACKET": r"\[\]\{\}",
+        "PARENT_REF": r"\.\.",
+        "SELF_REF": r"\.",
+        "PATH_SEP": r"\/",  # javarosa.xpath says "//" is an "unsupported construct".
+        "SYSTEM_LITERAL": r""""[^"]*"|'[^']*'""",
+        "COMMA": r",",
+        "WHITESPACE": r"\s+",
+        "PYXFORM_REF": r"\$\{" + ncname_regex + r"(#" + ncname_regex + r")?" + r"\}",
+        "FUNC_CALL": ncname_regex + r"\(",
+        "XPATH_PRED_START": ncname_regex + r"\[",
+        "XPATH_PRED_END": r"\]",
+        "URI_SCHEME": ncname_regex + r"://",
+        "NAME": ncname_regex,  # Must be after rules containing ncname_regex.
+        "PYXFORM_REF_START": r"\$\{",
+        "PYXFORM_REF_END": r"\}",
+        "OTHER": r".+?",  # Catch any other character so that parsing doesn't stop.
+    }
+
+    def get_tokenizer(name):
+        def tokenizer(scan, value):
+            return ExpLexerToken(name, value, scan.match.start(), scan.match.end())
+
+        return tokenizer
+
+    lexicon = [(v, get_tokenizer(k)) for k, v in lexer_rules.items()]
+    # re.Scanner is undocumented but has been around since at least 2003
+    # https://mail.python.org/pipermail/python-dev/2003-April/035075.html
+    return re.Scanner(lexicon)
+
+
+# Scanner takes a few 100ms to compile so use this shared instance.
+class ExpLexerToken(NamedTuple):
+    name: str
+    value: str
+    start: int
+    end: int
+
+
+_EXPRESSION_LEXER = get_expression_lexer()
+
+
+@lru_cache(maxsize=1024)
+def parse_expression(text: str) -> tuple[list[ExpLexerToken], str]:
+    """
+    Parse an expression.
+
+    Use this function instead of _EXPRESSION_LEXER to take advantage of caching.
+
+    :param text: The expression.
+    :return: The parsed tokens, and any remaining unparsed text.
+    """
+    tokens, remainder = _EXPRESSION_LEXER.scan(text)
+    return tokens, remainder
 
 
 def is_single_token_expression(expression: str, token_types: Iterable[str]) -> bool:
     """
     Does the expression contain single token of one of the provided token types?
     """
-    tokens, _ = parse_expression(text=expression.strip())
+    tokens, _ = parse_expression(expression.strip())
     if 1 == len(tokens) and tokens[0].name in token_types:
         return True
     else:
diff --git a/pyxform/parsing/instance_expression.py b/pyxform/parsing/instance_expression.py
index 09ee91c8..4b3f82ed 100644
--- a/pyxform/parsing/instance_expression.py
+++ b/pyxform/parsing/instance_expression.py
@@ -1,7 +1,7 @@
-import re
 from typing import TYPE_CHECKING
 
-from pyxform.utils import BRACKETED_TAG_REGEX, EXPRESSION_LEXER, ExpLexerToken, node
+from pyxform.parsing.expression import ExpLexerToken, parse_expression
+from pyxform.utils import BRACKETED_TAG_REGEX, node
 
 if TYPE_CHECKING:
     from pyxform.survey import Survey
@@ -37,7 +37,7 @@ def find_boundaries(xml_text: str) -> list[tuple[int, int]]:
     path_enter = False
     pred_enter = False
     last_token = None
-    tokens, _ = EXPRESSION_LEXER.scan(xml_text)
+    tokens, _ = parse_expression(xml_text)
     boundaries = []
 
     for t in tokens:
@@ -111,8 +111,7 @@ def replace_with_output(xml_text: str, context: "SurveyElement", survey: "Survey
             old_str = xml_text[start:end]
             # Pass the new string through the pyxform reference replacer.
             # noinspection PyProtectedMember
-            new_str = re.sub(
-                BRACKETED_TAG_REGEX,
+            new_str = BRACKETED_TAG_REGEX.sub(
                 lambda m: survey._var_repl_function(m, context),
                 old_str,
             )
diff --git a/pyxform/utils.py b/pyxform/utils.py
index 5e362e8d..37e5a849 100644
--- a/pyxform/utils.py
+++ b/pyxform/utils.py
@@ -9,7 +9,7 @@
 import re
 from io import StringIO
 from json.decoder import JSONDecodeError
-from typing import Any, NamedTuple
+from typing import Any
 from xml.dom import Node
 from xml.dom.minidom import Element, Text, _write_data
 
@@ -17,11 +17,10 @@
 
 from pyxform import constants as const
 from pyxform.errors import PyXFormError
+from pyxform.parsing.expression import parse_expression
 
 SEP = "_"
-
-INVALID_XFORM_TAG_REGEXP = r"[^a-zA-Z:_][^a-zA-Z:_0-9\-.]*"
-
+INVALID_XFORM_TAG_REGEXP = re.compile(r"[^a-zA-Z:_][^a-zA-Z:_0-9\-.]*")
 LAST_SAVED_INSTANCE_NAME = "__last-saved"
 BRACKETED_TAG_REGEX = re.compile(r"\${(last-saved#)?(.*?)}")
 LAST_SAVED_REGEX = re.compile(r"\${last-saved#(.*?)}")
@@ -334,94 +333,5 @@ def levenshtein_distance(a: str, b: str) -> int:
     return v0[n]
 
 
-def get_expression_lexer() -> re.Scanner:
-    """
-    Get a expression lexer (scanner) for parsing.
-    """
-    # ncname regex adapted from eulxml https://github.com/emory-libraries/eulxml/blob/2e1a9f71ffd1fd455bd8326ec82125e333b352e0/eulxml/xpath/lexrules.py
-    # (C) 2010,2011 Emory University Libraries [Apache v2.0 License]
-    # They in turn adapted it from https://www.w3.org/TR/REC-xml/#NT-NameStartChar
-    # and https://www.w3.org/TR/REC-xml-names/#NT-NCName
-    namestartchar = (
-        r"([A-Z]|_|[a-z]|\xc0-\xd6]|[\xd8-\xf6]|[\xf8-\u02ff]|"
-        + r"[\u0370-\u037d]|[\u037f-\u1fff]|[\u200c-\u200d]|[\u2070-\u218f]|"
-        + r"[\u2c00-\u2fef]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]"
-        + r"|[\U00010000-\U000EFFFF])"
-    )
-    # additional characters allowed in NCNames after the first character
-    namechar_extra = r"[-.0-9\xb7\u0300-\u036f\u203f-\u2040]"
-    ncname_regex = (
-        r"(" + namestartchar + r")(" + namestartchar + r"|" + namechar_extra + r")*"
-    )
-    ncname_regex = ncname_regex + r"(:" + ncname_regex + r")?"
-
-    date_regex = r"-?\d{4}-\d{2}-\d{2}"
-    time_regex = r"\d{2}:\d{2}:\d{2}(\.\s+)?(((\+|\-)\d{2}:\d{2})|Z)?"
-    date_time_regex = date_regex + "T" + time_regex
-
-    # Rule order is significant - match priority runs top to bottom.
-    lexer_rules = {
-        # https://www.w3.org/TR/xmlschema-2/#dateTime
-        "DATETIME": date_time_regex,
-        "DATE": date_regex,
-        "TIME": time_regex,
-        "NUMBER": r"-?\d+\.\d*|-?\.\d+|-?\d+",
-        # https://www.w3.org/TR/1999/REC-xpath-19991116/#exprlex
-        "OPS_MATH": r"[\*\+\-]|mod|div",
-        "OPS_COMP": r"\=|\!\=|\<|\>|\<=|>=",
-        "OPS_BOOL": r"and|or",
-        "OPS_UNION": r"\|",
-        "OPEN_PAREN": r"\(",
-        "CLOSE_PAREN": r"\)",
-        "BRACKET": r"\[\]\{\}",
-        "PARENT_REF": r"\.\.",
-        "SELF_REF": r"\.",
-        "PATH_SEP": r"\/",  # javarosa.xpath says "//" is an "unsupported construct".
-        "SYSTEM_LITERAL": r""""[^"]*"|'[^']*'""",
-        "COMMA": r",",
-        "WHITESPACE": r"\s+",
-        "PYXFORM_REF": r"\$\{" + ncname_regex + r"(#" + ncname_regex + r")?" + r"\}",
-        "FUNC_CALL": ncname_regex + r"\(",
-        "XPATH_PRED_START": ncname_regex + r"\[",
-        "XPATH_PRED_END": r"\]",
-        "URI_SCHEME": ncname_regex + r"://",
-        "NAME": ncname_regex,  # Must be after rules containing ncname_regex.
-        "OTHER": r".+?",  # Catch any other character so that parsing doesn't stop.
-    }
-
-    def get_tokenizer(name):
-        def tokenizer(scan, value):
-            return ExpLexerToken(name, value, scan.match.start(), scan.match.end())
-
-        return tokenizer
-
-    lexicon = [(v, get_tokenizer(k)) for k, v in lexer_rules.items()]
-    # re.Scanner is undocumented but has been around since at least 2003
-    # https://mail.python.org/pipermail/python-dev/2003-April/035075.html
-    return re.Scanner(lexicon)
-
-
-# Scanner takes a few 100ms to compile so use this shared instance.
-class ExpLexerToken(NamedTuple):
-    name: str
-    value: str
-    start: int
-    end: int
-
-
-EXPRESSION_LEXER = get_expression_lexer()
-
-
-def parse_expression(text: str) -> tuple[list[ExpLexerToken], str]:
-    """
-    Parse a "default" expression, well enough to identify dynamic defaults vs. not.
-
-    :param text: The expression.
-    :return: The parsed tokens, and any remaining unparsed text.
-    """
-    tokens, remainder = EXPRESSION_LEXER.scan(text)
-    return tokens, remainder
-
-
 def coalesce(*args):
     return next((a for a in args if a is not None), None)
diff --git a/pyxform/validators/error_cleaner.py b/pyxform/validators/error_cleaner.py
index 8305780c..642645e4 100644
--- a/pyxform/validators/error_cleaner.py
+++ b/pyxform/validators/error_cleaner.py
@@ -4,6 +4,8 @@
 
 import re
 
+ERROR_MESSAGE_REGEX = re.compile(r"(/[a-z0-9\-_]+(?:/[a-z0-9\-_]+)+)", flags=re.I)
+
 
 class ErrorCleaner:
     """Cleans up raw error messages from XForm validators for end users."""
@@ -22,9 +24,9 @@ def _replace_xpath_with_tokens(match):
 
     @staticmethod
     def _cleanup_errors(error_message):
-        pattern = r"(/[a-z0-9\-_]+(?:/[a-z0-9\-_]+)+)"
-        error_message = re.sub(
-            pattern, ErrorCleaner._replace_xpath_with_tokens, error_message, flags=re.I
+        error_message = ERROR_MESSAGE_REGEX.sub(
+            ErrorCleaner._replace_xpath_with_tokens,
+            error_message,
         )
         lines = str(error_message).strip().splitlines()
         no_dupes = [
diff --git a/pyxform/validators/pyxform/pyxform_reference.py b/pyxform/validators/pyxform/pyxform_reference.py
new file mode 100644
index 00000000..e55a408a
--- /dev/null
+++ b/pyxform/validators/pyxform/pyxform_reference.py
@@ -0,0 +1,53 @@
+from pyxform import constants as co
+from pyxform.errors import PyXFormError
+from pyxform.parsing.expression import parse_expression
+
+PYXFORM_REFERENCE_INVALID = (
+    "[row : {row_number}] On the '{sheet}' sheet, the '{column}' value is invalid. "
+    "Reference expressions must only include question names, and end with '}}'."
+)
+
+
+def validate_pyxform_reference_syntax(
+    value: str, sheet_name: str, row_number: int, key: str
+) -> None:
+    # Skip columns in potentially large sheets where references are not allowed.
+    if sheet_name == co.SURVEY:
+        if key in (co.TYPE, co.NAME):
+            return
+    elif sheet_name == co.CHOICES:
+        if key in (co.LIST_NAME_S, co.LIST_NAME_U, co.NAME):
+            return
+    elif sheet_name == co.ENTITIES:
+        if key == (co.LIST_NAME_S, co.LIST_NAME_U):
+            return
+
+    tokens, _ = parse_expression(value)
+    start_token = None
+
+    for t in tokens:
+        # The start of an expression.
+        if t is not None and t.name == "PYXFORM_REF_START" and start_token is None:
+            start_token = t
+        # Tokens that are part of an expression.
+        elif start_token is not None:
+            if t.name == "NAME":
+                continue
+            elif t.name == "PYXFORM_REF_END":
+                start_token = None
+            elif t.name in ("PYXFORM_REF_START", "PYXFORM_REF"):
+                msg = PYXFORM_REFERENCE_INVALID.format(
+                    sheet=sheet_name, row_number=row_number, column=key
+                )
+                raise PyXFormError(msg)
+            else:
+                msg = PYXFORM_REFERENCE_INVALID.format(
+                    sheet=sheet_name, row_number=row_number, column=key
+                )
+                raise PyXFormError(msg)
+
+    if start_token is not None:
+        msg = PYXFORM_REFERENCE_INVALID.format(
+            sheet=sheet_name, row_number=row_number, column=key
+        )
+        raise PyXFormError(msg)
diff --git a/pyxform/validators/pyxform/question_types.py b/pyxform/validators/pyxform/question_types.py
index 48f49db8..7ec18edd 100644
--- a/pyxform/validators/pyxform/question_types.py
+++ b/pyxform/validators/pyxform/question_types.py
@@ -2,8 +2,6 @@
 Validations for question types.
 """
 
-import re
-
 from pyxform.errors import PyXFormError
 from pyxform.parsing.expression import is_single_token_expression
 from pyxform.utils import PYXFORM_REFERENCE_REGEX
@@ -37,7 +35,7 @@ def validate_background_geopoint_trigger(row: dict, row_num: int) -> bool:
 def validate_references(referrers: list[tuple[dict, int]], questions: set[str]) -> bool:
     """Triggers must refer to a question that exists."""
     for row, row_num in referrers:
-        matches = re.match(PYXFORM_REFERENCE_REGEX, row["trigger"])
+        matches = PYXFORM_REFERENCE_REGEX.match(row["trigger"])
         if matches is not None:
             trigger = matches.groups()[0]
             if trigger not in questions:
diff --git a/pyxform/validators/pyxform/translations_checks.py b/pyxform/validators/pyxform/translations_checks.py
index 588814a6..b74d2b36 100644
--- a/pyxform/validators/pyxform/translations_checks.py
+++ b/pyxform/validators/pyxform/translations_checks.py
@@ -8,7 +8,7 @@
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
-    SheetData = tuple[tuple[str, ...]]
+    SheetData = tuple[tuple[str, ...], ...]
     Warnings = list[str]
 
 
diff --git a/pyxform/xls2json.py b/pyxform/xls2json.py
index 37c3ca1c..1e340b35 100644
--- a/pyxform/xls2json.py
+++ b/pyxform/xls2json.py
@@ -26,11 +26,14 @@
 from pyxform.validators.pyxform import parameters_generic, select_from_file
 from pyxform.validators.pyxform import question_types as qt
 from pyxform.validators.pyxform.android_package_name import validate_android_package_name
+from pyxform.validators.pyxform.pyxform_reference import validate_pyxform_reference_syntax
 from pyxform.validators.pyxform.translations_checks import SheetTranslations
 from pyxform.xls2json_backends import csv_to_dict, xls_to_dict, xlsx_to_dict
 from pyxform.xlsparseutils import find_sheet_misspellings, is_valid_xml_tag
 
 SMART_QUOTES = {"\u2018": "'", "\u2019": "'", "\u201c": '"', "\u201d": '"'}
+RE_SMART_QUOTES = re.compile(r"|".join(re.escape(old) for old in SMART_QUOTES))
+RE_WHITESPACE = re.compile(r"( )+")
 
 
 def print_pyobj_to_json(pyobj, path=None):
@@ -87,18 +90,6 @@ def list_to_nested_dict(lst):
         return lst[0]
 
 
-def replace_smart_quotes_in_dict(_d):
-    for key, value in _d.items():
-        _changed = False
-        for smart_quote, dumb_quote in SMART_QUOTES.items():
-            if isinstance(value, str):
-                if smart_quote in value:
-                    value = value.replace(smart_quote, dumb_quote)
-                    _changed = True
-        if _changed:
-            _d[key] = value
-
-
 class DealiasAndGroupHeadersResult:
     __slots__ = ("headers", "data")
 
@@ -184,42 +175,24 @@ def dealias_types(dict_array):
     return dict_array
 
 
-def clean_text_values(dict_array):
+def clean_text_values(sheet_name: str, data: list[dict], strip_whitespace: bool = False):
     """
     Go though the dict array and strips all text values.
     Also replaces multiple spaces with single spaces.
     """
-    for row in dict_array:
-        replace_smart_quotes_in_dict(row)
+    for row_number, row in enumerate(data, start=2):
         for key, value in row.items():
             if isinstance(value, str):
-                row[key] = re.sub(r"( )+", " ", value.strip())
-    return dict_array
-
-
-# This is currently unused because name uniqueness is checked in json2xform.
-def check_name_uniqueness(dict_array):
-    """
-    Make sure all names are unique
-    Raises and exception if a duplicate is found
-    """
-    # This set is used to validate the uniqueness of names.
-    name_set = set()
-    row_number = 0  # TODO: There might be a bug with row numbers...
-    for row in dict_array:
-        row_number += 1
-        name = row.get(constants.NAME)
-        if name:
-            if name in name_set:
-                raise PyXFormError(
-                    "Question name is not unique: "
-                    + str(name)
-                    + " Row: "
-                    + str(row_number)
+                # Remove extraneous whitespace characters.
+                if strip_whitespace:
+                    value = RE_WHITESPACE.sub(" ", value.strip())
+                # Replace "smart" quotes with regular quotes.
+                row[key] = RE_SMART_QUOTES.sub(lambda m: SMART_QUOTES[m.group(0)], value)
+                # Check cross reference syntax.
+                validate_pyxform_reference_syntax(
+                    value=value, sheet_name=sheet_name, row_number=row_number, key=key
                 )
-            else:
-                name_set.add(name)
-    return dict_array
+    return data
 
 
 def group_dictionaries_by_key(list_of_dicts, key, remove_key=True):
@@ -487,7 +460,10 @@ def workbook_to_json(
         use_double_colons=use_double_colons,
     )
     settings = settings_sheet.data[0] if len(settings_sheet.data) > 0 else {}
-    replace_smart_quotes_in_dict(settings)
+    settings = clean_text_values(sheet_name=constants.SETTINGS, data=[settings])[0]
+    clean_text_values_enabled = aliases.yes_no.get(
+        settings.get("clean_text_values", "true()")
+    )
 
     default_language = settings.get(constants.DEFAULT_LANGUAGE_KEY, default_language)
 
@@ -522,9 +498,9 @@ def workbook_to_json(
 
     # ########## External Choices sheet ##########
     external_choices_sheet = workbook_dict.get(constants.EXTERNAL_CHOICES, [])
-    for choice_item in external_choices_sheet:
-        replace_smart_quotes_in_dict(choice_item)
-
+    external_choices_sheet = clean_text_values(
+        sheet_name=constants.EXTERNAL_CHOICES, data=external_choices_sheet
+    )
     external_choices_sheet = dealias_and_group_headers(
         dict_array=external_choices_sheet,
         header_aliases=aliases.list_header,
@@ -537,8 +513,7 @@ def workbook_to_json(
 
     # ########## Choices sheet ##########
     choices_sheet = workbook_dict.get(constants.CHOICES, [])
-    for choice_item in choices_sheet:
-        replace_smart_quotes_in_dict(choice_item)
+    choices_sheet = clean_text_values(sheet_name=constants.CHOICES, data=choices_sheet)
     choices_sheet = dealias_and_group_headers(
         dict_array=choices_sheet,
         header_aliases=aliases.list_header,
@@ -617,6 +592,7 @@ def workbook_to_json(
 
     # ########## Entities sheet ###########
     entities_sheet = workbook_dict.get(constants.ENTITIES, [])
+    entities_sheet = clean_text_values(sheet_name=constants.ENTITIES, data=entities_sheet)
     entities_sheet = dealias_and_group_headers(
         dict_array=entities_sheet,
         header_aliases=aliases.entities_header,
@@ -629,11 +605,10 @@ def workbook_to_json(
     # ########## Survey sheet ###########
     survey_sheet = workbook_dict[constants.SURVEY]
     # Process the headers:
-    clean_text_values_enabled = aliases.yes_no.get(
-        settings.get("clean_text_values", "true()")
-    )
     if clean_text_values_enabled:
-        survey_sheet = clean_text_values(survey_sheet)
+        survey_sheet = clean_text_values(
+            sheet_name=constants.SURVEY, data=survey_sheet, strip_whitespace=True
+        )
     survey_sheet = dealias_and_group_headers(
         dict_array=survey_sheet,
         header_aliases=aliases.survey_header,
@@ -662,8 +637,6 @@ def workbook_to_json(
     # #################################
 
     # Parse the survey sheet while generating a survey in our json format:
-    row_number = 1  # We start at 1 because the column header row is not
-    #                 included in the survey sheet (presumably).
     # A stack is used to keep track of begin/end expressions
     stack = [
         {
@@ -703,8 +676,7 @@ def workbook_to_json(
     trigger_references = []
 
     # row by row, validate questions, throwing errors and adding warnings where needed.
-    for row in survey_sheet.data:
-        row_number += 1
+    for row_number, row in enumerate(survey_sheet.data, start=2):
         if stack[-1] is not None:
             prev_control_type = stack[-1]["control_type"]
             parent_children_array = stack[-1]["parent_children"]
@@ -730,7 +702,6 @@ def workbook_to_json(
         # Get question type
         question_type = row.get(constants.TYPE)
         question_name = row.get(constants.NAME)
-        question_names.add(question_name)
 
         if not question_type:
             # if name and label are also missing,
@@ -1117,6 +1088,9 @@ def workbook_to_json(
                 )
                 continue
 
+        # Assuming a question is anything not processed above as a loop/repeat/group.
+        question_names.add(question_name)
+
         # Try to parse question as a select:
         select_parse = select_regexp.search(question_type)
         if select_parse:
diff --git a/pyxform/xlsparseutils.py b/pyxform/xlsparseutils.py
index 73d07823..280f706a 100644
--- a/pyxform/xlsparseutils.py
+++ b/pyxform/xlsparseutils.py
@@ -7,7 +7,7 @@
 # http://www.w3.org/TR/REC-xml/
 TAG_START_CHAR = r"[a-zA-Z:_]"
 TAG_CHAR = r"[a-zA-Z:_0-9\-.]"
-XFORM_TAG_REGEXP = f"{TAG_START_CHAR}{TAG_CHAR}*"
+XFORM_TAG_REGEXP = re.compile(rf"^{TAG_START_CHAR}{TAG_CHAR}*$")
 
 
 def find_sheet_misspellings(key: str, keys: "KeysView") -> "str | None":
@@ -42,4 +42,4 @@ def is_valid_xml_tag(tag):
     """
     Use a regex to see if there are any invalid characters (i.e. spaces).
     """
-    return re.search(r"^" + XFORM_TAG_REGEXP + r"$", tag)
+    return re.search(XFORM_TAG_REGEXP, tag)
diff --git a/tests/validators/pyxform/__init__.py b/tests/validators/pyxform/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/validators/pyxform/test_android_package_name.py b/tests/validators/pyxform/test_android_package_name.py
index e1f7f290..2f4dbe59 100644
--- a/tests/validators/pyxform/test_android_package_name.py
+++ b/tests/validators/pyxform/test_android_package_name.py
@@ -1,4 +1,5 @@
 from pyxform.validators.pyxform.android_package_name import validate_android_package_name
+
 from tests.pyxform_test_case import PyxformTestCase
 
 
diff --git a/tests/validators/pyxform/test_pyxform_reference.py b/tests/validators/pyxform/test_pyxform_reference.py
new file mode 100644
index 00000000..6a8d28a0
--- /dev/null
+++ b/tests/validators/pyxform/test_pyxform_reference.py
@@ -0,0 +1,107 @@
+from itertools import chain, product
+
+from pyxform.errors import PyXFormError
+from pyxform.validators.pyxform import pyxform_reference as pr
+
+from tests.pyxform_test_case import PyxformTestCase
+
+expression_contexts = [
+    ("{}", "Single reference"),
+    ("This: {}", "Single reference with prefix"),
+    ("{} (that)", "Single reference with suffix"),
+    ("This: {} (that)", "Single reference with prefix and suffix"),
+    ("This:{}", "Single reference with prefix, no space"),
+    ("{}(that)", "Single reference with suffix, no space"),
+    ("This:{} (that)", "Single reference with prefix and suffix, no space"),
+]
+ok_tokens = [
+    ("${a}", "OK"),
+    ("${abc123}", "OK"),
+    ("${last-saved#abc123}", "OK"),
+]
+error_tokens = [
+    ("${a }", "Invalid question name"),
+    ("${a\n}", "Invalid question name"),
+    ("${a", "No end character"),
+    ("${a${b}}", "Nested refererence"),
+    ("${last-saved#a }", "Invalid question name"),
+    ("${last-saved#a \n}", "Invalid question name"),
+    ("${last-saved#a", "No end character"),
+    ("${last-saved#a${b}}", "Nested refererence"),
+]
+
+
+class TestPyxformReference(PyxformTestCase):
+    def test_single_reference__ok(self):
+        """Should pass validation for all expected reference forms when used once."""
+        for context, ctx_desc in expression_contexts:
+            for token, tok_desc in ok_tokens:
+                with self.subTest(c=context, ctx=ctx_desc, t=token, tok=tok_desc):
+                    case = context.format(token)
+                    pr.validate_pyxform_reference_syntax(case, "test", 1, "test")
+
+    def test_single_reference__error(self):
+        """Should fail validation when the reference is malformed and used once."""
+        for context, ctx_desc in expression_contexts:
+            for token, tok_desc in error_tokens:
+                with (
+                    self.subTest(c=context, ctx=ctx_desc, t=token, tok=tok_desc),
+                    self.assertRaises(PyXFormError) as err,
+                ):
+                    case = context.format(token)
+                    pr.validate_pyxform_reference_syntax(case, "test", 1, "test")
+                self.assertEqual(
+                    err.exception.args[0],
+                    pr.PYXFORM_REFERENCE_INVALID.format(
+                        sheet="test", row_number=1, column="test"
+                    ),
+                    msg=case,
+                )
+
+    def test_multiple_reference__ok(self):
+        """Should pass validation for multiple (2x) expected reference form combinations."""
+        # Pairs of all OK + OK, in all contexts, both in any order (many tests!).
+        tokens = list(product(ok_tokens, repeat=2))
+        contexts = list(product(expression_contexts, repeat=2))
+        for (context1, ctx_desc1), (context2, ctx_desc2) in contexts:
+            context = context1 + context2
+            ctx_desc = (ctx_desc1, ctx_desc2)
+            for (token1, tok_desc1), (token2, tok_desc2) in tokens:
+                with self.subTest(
+                    context=context,
+                    contexts=ctx_desc,
+                    tokens=(token1, token2),
+                    tok_desc=(tok_desc1, tok_desc2),
+                ):
+                    case = context.format(token1, token2)
+                    pr.validate_pyxform_reference_syntax(case, "test", 1, "test")
+
+    def test_multiple_references__error(self):
+        """Should fail validation when one of multiple (2x) references is malformed."""
+        # Pairs of all OK + error, in all contexts, both in any order (tonnes of tests!).
+        tokens = list(
+            chain(product(ok_tokens, error_tokens), product(error_tokens, ok_tokens))
+        )
+        contexts = list(product(expression_contexts, repeat=2))
+        for (context1, ctx_desc1), (context2, ctx_desc2) in contexts:
+            context = context1 + context2
+            ctx_desc = (ctx_desc1, ctx_desc2)
+            for (token1, tok_desc1), (token2, tok_desc2) in tokens:
+                with (
+                    self.subTest(
+                        context=context,
+                        contexts=ctx_desc,
+                        tokens=(token1, token2),
+                        tok_desc=(tok_desc1, tok_desc2),
+                    ),
+                    self.assertRaises(PyXFormError) as err,
+                ):
+                    case = context.format(token1, token2)
+                    pr.validate_pyxform_reference_syntax(case, "test", 1, "test")
+                self.assertEqual(
+                    err.exception.args[0],
+                    pr.PYXFORM_REFERENCE_INVALID.format(
+                        sheet="test", row_number=1, column="test"
+                    ),
+                    msg=case,
+                )