Skip to content

Commit

Permalink
Update .keyword -> .raw for open search queries
Browse files Browse the repository at this point in the history
  • Loading branch information
amywieliczka committed Apr 15, 2024
1 parent 44c682a commit a1ccab5
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 36 deletions.
6 changes: 3 additions & 3 deletions calisphere/collection_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,10 @@ def _parse_custom_facets(self):
{
'form_name': custom_facet['facet_field'],
'facet_field': (
f"{custom_facet['facet_field'][:-3]}.keyword"),
f"{custom_facet['facet_field'][:-3]}.raw"),
'display_name': custom_facet['label'],
'filter_field': (
f"{custom_facet['facet_field'][:-3]}.keyword"),
f"{custom_facet['facet_field'][:-3]}.raw"),
'sort_by': custom_facet['sort_by'],
'faceting_allowed': True
}
Expand Down Expand Up @@ -310,7 +310,7 @@ def get_mosaic(self):
repositories.append(repository['name'])

if self.index == 'es':
sort = ("sort_title.keyword", "asc")
sort = ("sort_title.raw", "asc")
else:
sort = ("sort_title", "asc")

Expand Down
29 changes: 27 additions & 2 deletions calisphere/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,32 @@
FacetDisplay = namedtuple(
'FacetDisplay', 'facet, display')

# solr schema fields that have a `_ss` varient for facets
# index schema fields that are of type=keyword so we can get
# facets directly without needing an _ss or .raw suffix
UCLDC_SCHEMA_TERM_FIELDS = [
'calisphere-id',
'id',
'campus_name',
'campus_data',
'campus_url',
'campus_id',
'collection_name',
'collection_data',
'collection_url',
'collection_id',
'sort_collection_data',
'repository_name',
'repository_data',
'repository_url',
'repository_id',
'rights_uri',
'url_item',
'fetcher_type',
'mapper_type'
]

# index schema fields that are of type=text and thus need a
# solr _ss or opensearch .raw suffix to get facets
UCLDC_SCHEMA_FACETS = [
FacetDisplay("title", "title"),
FacetDisplay("alternative_title", "alternative title"),
Expand Down Expand Up @@ -70,7 +95,7 @@
for fd in UCLDC_SCHEMA_FACETS
]
UCLDC_ES_SCHEMA_FACETS = [
FacetDisplayField(fd.facet, fd.display, f"{fd.facet}.keyword")
FacetDisplayField(fd.facet, fd.display, f"{fd.facet}.raw")
for fd in UCLDC_SCHEMA_FACETS
]

Expand Down
30 changes: 5 additions & 25 deletions calisphere/es_cache_retry.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
""" logic for cache / retry for es (opensearch) and JSON from registry
"""

from calisphere.constants import UCLDC_SCHEMA_TERM_FIELDS
from future import standard_library
from django.core.cache import cache
from django.conf import settings
Expand Down Expand Up @@ -166,9 +167,9 @@ def es_mlt(item_id):
"query": {
"more_like_this": {
"fields": [
"title.keyword",
"title.raw",
"collection_data",
"subject.keyword",
"subject.raw",
],
"like": [
{"_id": item_id}
Expand Down Expand Up @@ -241,33 +242,12 @@ def query_encode(query_string: str = None,
es_params['query'] = es_filters[0]

if facets:
keyword_fields = [
'calisphere-id',
'id',
'campus_name',
'campus_data',
'campus_url',
'campus_id',
'collection_name',
'collection_data',
'collection_url',
'collection_id',
'sort_collection_data',
'repository_name',
'repository_data',
'repository_url',
'repository_id',
'rights_uri',
'url_item',
'fetcher_type',
'mapper_type'
]
aggs = {}
for facet in facets:
if facet in keyword_fields or facet[-8:] == '.keyword':
if facet in UCLDC_SCHEMA_TERM_FIELDS or facet[-4:] == '.raw':
field = facet
else:
field = f'{facet}.keyword'
field = f'{facet}.raw'

aggs[facet] = {
"terms": {
Expand Down
6 changes: 3 additions & 3 deletions calisphere/facet_filter_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ class ESRelationFF(ESFacetFilterType):
form_name = 'relation_ss'
facet_field = 'relation'
display_name = 'Relation'
filter_field = 'relation.keyword'
filter_field = 'relation.raw'
sort_by = 'value'
faceting_allowed = False

Expand All @@ -186,7 +186,7 @@ class ESTypeFF(ESFacetFilterType):
form_name = 'type_ss'
facet_field = 'type'
display_name = 'Type of Item'
filter_field = 'type.keyword'
filter_field = 'type.raw'


class DecadeFF(FacetFilterType):
Expand All @@ -201,7 +201,7 @@ class ESDecadeFF(ESFacetFilterType):
form_name = 'facet_decade'
facet_field = 'date'
display_name = 'Decade'
filter_field = 'date.keyword'
filter_field = 'date.raw'
sort_by = 'value'


Expand Down
6 changes: 3 additions & 3 deletions calisphere/search_form.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ def solr_escape(text):

class SortField(object):
default = 'relevance'
no_keyword = 'a'
default_without_query_string = 'a'

def __init__(self, request):
if (request.get('q')
or request.getlist('rq')
or request.getlist('fq')):
self.sort = request.get('sort', self.default)
else:
self.sort = request.get('sort', self.no_keyword)
self.sort = request.get('sort', self.default_without_query_string)


class SearchForm(object):
Expand Down Expand Up @@ -389,7 +389,7 @@ class ESCampusCarouselForm(CampusCarouselForm):

class AltSortField(SortField):
default = 'oldest-end'
no_keyword = 'oldest-end'
default_without_query_string = 'oldest-end'


class CollectionFacetValueForm(CollectionForm):
Expand Down

0 comments on commit a1ccab5

Please sign in to comment.