diff --git a/cl/citations/annotate_citations.py b/cl/citations/annotate_citations.py
index 89056f2b34..ea7d965f9e 100644
--- a/cl/citations/annotate_citations.py
+++ b/cl/citations/annotate_citations.py
@@ -16,6 +16,8 @@ def get_and_clean_opinion_text(document: Opinion | RECAPDocument) -> None:
 
     :param document: The Opinion or RECAPDocument whose text should be parsed
     """
+
+    # We prefer CAP data (xml_harvard) first.
     for attr in [
         "xml_harvard",
         "html_anon_2020",
diff --git a/cl/citations/tests.py b/cl/citations/tests.py
index f4cb51d575..59b1e7a986 100644
--- a/cl/citations/tests.py
+++ b/cl/citations/tests.py
@@ -290,7 +290,7 @@ def test_make_html_from_harvard_xml(self) -> None:
                 s=s,
                 expected_html=expected_html,
             ):
-                opinion = Opinion(html=s)
+                opinion = Opinion(xml_harvard=s)
                 get_and_clean_opinion_text(opinion)
                 citations = get_citations(
                     opinion.cleaned_text, tokenizer=HYPERSCAN_TOKENIZER