From 3d2762936fde68ee554733ccb57d1c22c277403e Mon Sep 17 00:00:00 2001 From: Marcel Kornblum Date: Tue, 15 Aug 2023 11:11:19 +0100 Subject: [PATCH] Search improvements: release to user-facing interface (#417) Merge this when you want to deploy the search improvements to prod ============================================== Makes the search improvements underpinning the explore page join up with and connect in to the main user facing search also updates tracking version number for easier analytics segregation NB. May need to re-index and/or run page save operation for each page in order to re-populate index --------- Co-authored-by: Cameron Lamb --- src/content/models.py | 6 +- src/extended_search/index.py | 25 ++++++++ src/search/search.py | 74 +++++++---------------- src/search/templates/search/search.html | 2 +- src/search/templatetags/search_explore.py | 12 ++-- src/tools/models.py | 8 ++- 6 files changed, 65 insertions(+), 62 deletions(-) diff --git a/src/content/models.py b/src/content/models.py index 2c4d8f69a..7090d3198 100644 --- a/src/content/models.py +++ b/src/content/models.py @@ -162,7 +162,7 @@ class ContentPageIndexManager(ModelIndexManager): explicit=True, ), IndexedField("is_creatable", filter=True), - IndexedField("last_published_at", proximity=True), + IndexedField("published_date", proximity=True), ] @@ -258,6 +258,10 @@ class ContentPage(BasePage): search_fields = BasePage.search_fields + ContentPageIndexManager() + @property + def published_date(self): + return self.last_published_at + def _generate_search_field_content(self): self.search_title = self.title self.search_headings = "" diff --git a/src/extended_search/index.py b/src/extended_search/index.py index 87f1ebc3a..00aa4523d 100644 --- a/src/extended_search/index.py +++ b/src/extended_search/index.py @@ -25,6 +25,31 @@ def _check_search_fields(cls, **kwargs): id="wagtailsearch.W004", ) ) + + parent_fields = [] + for parent_cls in cls.__bases__: + parent_fields += getattr(parent_cls, "search_fields", []) + model_fields = [] + for field in cls.get_search_fields(): + model_field_name = getattr(field, "model_field_name", None) + if not model_field_name: + model_field_name = field.field_name + if field not in parent_fields and model_field_name not in model_fields: + message = "indexed field '{name}' is defined in {model} and {parent}" + definition_model = field.get_definition_model(cls) + if definition_model != cls: + errors.append( + checks.Warning( + message.format( + model=cls.__name__, + name=field.field_name, + parent=definition_model.__name__, + ), + obj=cls, + id="extended_search.E001", + ) + ) + model_fields.append(model_field_name) return errors search_fields = [] diff --git a/src/search/search.py b/src/search/search.py index 41b7b2d6b..9c2c54eb5 100644 --- a/src/search/search.py +++ b/src/search/search.py @@ -1,8 +1,8 @@ from content.models import ContentPage, ContentPageIndexManager from extended_search.managers import get_search_query -from news.models import NewsPage +from news.models import NewsPage, NewsPageIndexManager from peoplefinder.models import Person, PersonIndexManager, Team, TeamIndexManager -from tools.models import Tool +from tools.models import Tool, ToolIndexManager from working_at_dit.models import PoliciesAndGuidanceHome @@ -35,24 +35,35 @@ def pinned(self, query): class PagesSearchVector(SearchVector): page_model = None + page_index_manager = None def get_queryset(self): return self.page_model.objects.public_or_login().live() + def get_query(self, query_str): + return get_search_query( + self.page_index_manager, + query_str, + self.page_model, + ) + def pinned(self, query): return self.get_queryset().pinned(query) def search(self, query, *args, **kwargs): queryset = self.get_queryset().not_pinned(query) + query = self.get_query(query) return self._wagtail_search(queryset, query, *args, **kwargs) class AllPagesSearchVector(PagesSearchVector): page_model = ContentPage + page_index_manager = ContentPageIndexManager class GuidanceSearchVector(PagesSearchVector): page_model = ContentPage + page_index_manager = ContentPageIndexManager def get_queryset(self): policies_and_guidance_home = PoliciesAndGuidanceHome.objects.first() @@ -62,10 +73,12 @@ def get_queryset(self): class NewsSearchVector(PagesSearchVector): page_model = NewsPage + page_index_manager = NewsPageIndexManager class ToolsSearchVector(PagesSearchVector): page_model = Tool + page_index_manager = ToolIndexManager class PeopleSearchVector(SearchVector): @@ -81,6 +94,13 @@ def get_queryset(self): def search(self, query, *args, **kwargs): queryset = self.get_queryset() + query = get_search_query( + PersonIndexManager, + query, + Person, + *args, + **kwargs, + ) return self._wagtail_search(queryset, query, *args, **kwargs) @@ -88,56 +108,6 @@ class TeamsSearchVector(SearchVector): def get_queryset(self): return Team.objects.all().with_all_parents() - -# -# New vectors for complex search alongside v2 - should get rolled in at end of -# the indexing improvements workstream. Need to be alongside to run v2 and v2.5 -# queries side by side, e.g. for "explore" page -# - - -class NewAllPagesSearchVector(AllPagesSearchVector): - def _wagtail_search(self, queryset, query, *args, **kwargs): - return queryset.search(query, *args, **kwargs).annotate_score("_score") - - def search(self, query, *args, **kwargs): - query = get_search_query( - ContentPageIndexManager, query, ContentPage, *args, **kwargs - ) - return self._wagtail_search(self.get_queryset(), query, *args, **kwargs) - - -class NewGuidanceSearchVector(GuidanceSearchVector): - ... - - -class NewNewsSearchVector(NewsSearchVector): - ... - - -class NewToolsSearchVector(ToolsSearchVector): - ... - - -class NewPeopleSearchVector(PeopleSearchVector): - def _wagtail_search(self, queryset, query, *args, **kwargs): - return queryset.search(query, *args, **kwargs).annotate_score("_score") - - def search(self, query, *args, **kwargs): - queryset = self.get_queryset() - query_obj = get_search_query(PersonIndexManager, query, Person, *args, **kwargs) - results = set(self._wagtail_search(queryset, query_obj, *args, **kwargs)) - autocomplete_results = set( - self.get_queryset().autocomplete(query).annotate_score("_score") - ) - all_results = results | autocomplete_results - return sorted(all_results, key=lambda x: x._score, reverse=True) - - -class NewTeamsSearchVector(TeamsSearchVector): - def _wagtail_search(self, queryset, query, *args, **kwargs): - return queryset.search(query, *args, **kwargs).annotate_score("_score") - def search(self, query, *args, **kwargs): queryset = self.get_queryset() query = get_search_query(TeamIndexManager, query, Team, *args, **kwargs) diff --git a/src/search/templates/search/search.html b/src/search/templates/search/search.html index 2d134c288..6761ea988 100644 --- a/src/search/templates/search/search.html +++ b/src/search/templates/search/search.html @@ -17,7 +17,7 @@