Skip to content

Commit

Permalink
Merge pull request #4712 from freelawproject/4312-return-search-score…
Browse files Browse the repository at this point in the history
…s-api

4312 Introduced score field in the V4 Search API results
  • Loading branch information
mlissner authored Nov 21, 2024
2 parents 2426254 + d5655e7 commit fbea00d
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 27 deletions.
4 changes: 2 additions & 2 deletions cl/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from cl.corpus_importer.api_serializers import DocketEntrySerializer
from cl.lib.elasticsearch_utils import merge_highlights_into_result
from cl.search.api_serializers import (
RECAPESResultSerializer,
RECAPESWebhookResultSerializer,
V3OAESResultSerializer,
)
from cl.search.api_utils import ResultObject
Expand Down Expand Up @@ -180,7 +180,7 @@ def send_search_alert_webhook_es(
meta_hl,
result,
)
serialized_results = RECAPESResultSerializer(
serialized_results = RECAPESWebhookResultSerializer(
results, many=True
).data
case _:
Expand Down
9 changes: 8 additions & 1 deletion cl/api/templates/search-api-docs-vlatest.html
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ <h2 id="usage">Basic Usage</h2>
"lexisCite": "",
"meta": {
"timestamp": "2024-06-22T10:26:35.320787Z",
"date_created": "2022-06-26T23:24:18.926040Z"
"date_created": "2022-06-26T23:24:18.926040Z",
"score": {
"bm25": 2.1369965
}
},
"neutralCite": "",
"non_participating_judge_ids": [],
Expand Down Expand Up @@ -249,6 +252,10 @@ <h3 id="notes">Special Notes</h3>
<p>This field only displays Opinion text content.
</p>
</li>
<li>
<p>The <code>meta</code> field in main documents contains the <code>score</code> field, which is currently a JSON object that includes the <code>bm25</code> score used by Elasticsearch to rank results. Additional scores may be introduced in the future.
</p>
</li>
</ol>


Expand Down
1 change: 1 addition & 0 deletions cl/lib/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ def midnight_pt_test(d: datetime.date) -> datetime.datetime:
"timestamp": lambda x: x["result"]
.date_created.isoformat()
.replace("+00:00", "Z"),
"score": lambda x: {"bm25": None},
}

v4_recap_meta_keys = v4_meta_keys.copy()
Expand Down
78 changes: 59 additions & 19 deletions cl/search/api_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,11 @@ class Meta:
)


class MetaDataSerializer(serializers.Serializer):
class ScoreDataSerializer(serializers.Serializer):
bm25 = serializers.FloatField(read_only=True, source="bm25_score")


class BaseMetaDataSerializer(serializers.Serializer):
"""The metadata serializer V4 Search API."""

timestamp = TimeStampField(read_only=True, default_timezone=timezone.utc)
Expand All @@ -444,29 +448,47 @@ class MetaDataSerializer(serializers.Serializer):
)


class RECAPMetaDataSerializer(MetaDataSerializer):
class MainDocumentMetaDataSerializer(BaseMetaDataSerializer):
"""The metadata serializer V4 Search API for main documents.
Includes the score field.
"""

score = ScoreDataSerializer(source="*", read_only=True)


class RECAPMetaDataSerializer(MainDocumentMetaDataSerializer):
"""The metadata serializer for the RECAP search type includes the
additional more_docs field.
"""

more_docs = serializers.BooleanField(
read_only=True, source="child_remaining"
read_only=True, source="child_remaining", default=False
)


class MetaMixin(serializers.Serializer):
"""Mixin to add nested metadata serializer."""
class RECAPWebhookMetaDataSerializer(BaseMetaDataSerializer):
"""The metadata serializer for the RECAP search Webhook that includes the
additional more_docs field without the score field.
"""

more_docs = serializers.BooleanField(
read_only=True, source="child_remaining", default=False
)

meta = MetaDataSerializer(source="*", read_only=True)

class MainMetaMixin(serializers.Serializer):
"""Mixin to add nested metadata serializer for main documents."""

class RECAPMetaMixin(serializers.Serializer):
"""Mixin to add nested metadata serializer for the RECAP search type."""
meta = MainDocumentMetaDataSerializer(source="*", read_only=True)

meta = RECAPMetaDataSerializer(source="*", read_only=True)

class ChildMetaMixin(serializers.Serializer):
"""Mixin to add nested metadata serializer for child documents."""

meta = BaseMetaDataSerializer(source="*", read_only=True)

class BaseRECAPDocumentESResultSerializer(MetaMixin, DocumentSerializer):

class BaseRECAPDocumentESResultSerializer(DocumentSerializer):
"""The base serializer class for RECAP_DOCUMENT search type results."""

# Fields from the RECAPDocument
Expand Down Expand Up @@ -505,6 +527,12 @@ class Meta:
)


class NestedRECAPDocumentESResultSerializer(
BaseRECAPDocumentESResultSerializer, ChildMetaMixin
):
"""Mixin to add nested metadata serializer for nested Recap documents."""


class BaseDocketESResultSerializer(DocumentSerializer):
"""The serializer class for DOCKETS Search type results."""

Expand Down Expand Up @@ -541,25 +569,37 @@ class Meta:
)


class RECAPDocumentESResultSerializer(BaseRECAPDocumentESResultSerializer):
class RECAPDocumentESResultSerializer(
BaseRECAPDocumentESResultSerializer, MainMetaMixin
):
"""The serializer for RECAP_DOCUMENT search type results."""

docket_id = serializers.IntegerField(read_only=True)


class DocketESResultSerializer(MetaMixin, BaseDocketESResultSerializer):
class DocketESResultSerializer(MainMetaMixin, BaseDocketESResultSerializer):
"""The serializer class for DOCKETS Search type results."""


class RECAPESResultSerializer(RECAPMetaMixin, BaseDocketESResultSerializer):
class RECAPESResultSerializer(BaseDocketESResultSerializer):
"""The serializer class for RECAP search type results."""

recap_documents = BaseRECAPDocumentESResultSerializer(
recap_documents = NestedRECAPDocumentESResultSerializer(
many=True, read_only=True, source="child_docs"
)
meta = RECAPMetaDataSerializer(source="*", read_only=True)


class RECAPESWebhookResultSerializer(BaseDocketESResultSerializer):
"""The serializer class for RECAP search Webhooks results."""

recap_documents = NestedRECAPDocumentESResultSerializer(
many=True, read_only=True, source="child_docs"
)
meta = RECAPWebhookMetaDataSerializer(source="*", read_only=True)


class OpinionDocumentESResultSerializer(MetaMixin, DocumentSerializer):
class OpinionDocumentESResultSerializer(ChildMetaMixin, DocumentSerializer):
"""The serializer for OpinionDocument results."""

snippet = HighlightedField(read_only=True, source="text")
Expand All @@ -579,7 +619,7 @@ class Meta:
)


class OpinionClusterESResultSerializer(MetaMixin, DocumentSerializer):
class OpinionClusterESResultSerializer(MainMetaMixin, DocumentSerializer):
"""The serializer for OpinionCluster Search results."""

opinions = OpinionDocumentESResultSerializer(
Expand Down Expand Up @@ -609,7 +649,7 @@ class Meta:
)


class PositionESResultSerializer(MetaMixin, DocumentSerializer):
class PositionESResultSerializer(ChildMetaMixin, DocumentSerializer):
"""The serializer for Positions Search results."""

class Meta:
Expand Down Expand Up @@ -644,7 +684,7 @@ class Meta:
)


class PersonESResultSerializer(MetaMixin, DocumentSerializer):
class PersonESResultSerializer(MainMetaMixin, DocumentSerializer):
"""The serializer for Person Search results."""

name = HighlightedField(read_only=True)
Expand Down Expand Up @@ -674,7 +714,7 @@ class Meta:
)


class OAESResultSerializer(MetaMixin, DocumentSerializer):
class OAESResultSerializer(MainMetaMixin, DocumentSerializer):
"""The serializer for V4 Oral argument results."""

snippet = HighlightedField(read_only=True, source="text")
Expand Down
2 changes: 2 additions & 0 deletions cl/search/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,8 @@ def process_results(self, results: Response) -> None:
)
)
result["child_docs"] = child_result_objects
# Include the ES main document score as bm25_score.
result["bm25_score"] = result.meta.score

if self.reverse:
# If doing backward pagination, reverse the results of the current
Expand Down
33 changes: 28 additions & 5 deletions cl/tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,20 @@ async def _compare_field(
meta_expected_value = await sync_to_async(get_meta_expected_value)(
content_to_compare
)
self.assertEqual(
meta_value,
meta_expected_value,
f"The field '{meta_field}' does not match.",
)
if meta_field == "score":
# Special case for the score field. Only confirm the presence of
# keys and avoid comparing values, as they differ in each response.
self.assertEqual(
set(meta_value.keys()),
set(meta_expected_value.keys()),
f"The keys in field '{meta_field}' do not match.",
)
else:
self.assertEqual(
meta_value,
meta_expected_value,
f"The field '{meta_field}' does not match.",
)

async def _test_api_fields_content(
self,
Expand Down Expand Up @@ -296,6 +305,10 @@ async def _test_api_fields_content(
meta_value,
) in child_value.items():
with self.subTest(meta_field=meta_field):
self.assertFalse(
meta_field == "score",
msg="score key should not be present in nested documents",
)
await self._compare_field(
meta_field,
meta_value,
Expand Down Expand Up @@ -644,6 +657,11 @@ def _assert_webhook_hit_hl(
if webhook["payload"]["alert"]["name"] == alert_title:
hit = webhook["payload"]["results"][0]
if child_field:
self.assertNotIn(
"score",
hit["recap_documents"][0]["meta"],
msg="score shouldn't be present on webhook nested documents",
)
child_field_content = hit["recap_documents"][0][field_name]
self.assertIn(
hl_expected,
Expand All @@ -652,6 +670,11 @@ def _assert_webhook_hit_hl(
% field_name,
)
else:
self.assertNotIn(
"score",
hit["meta"],
msg="score shouldn't be present on webhook main document",
)
parent_field_content = hit[field_name]
self.assertIn(
hl_expected,
Expand Down

0 comments on commit fbea00d

Please sign in to comment.