Skip to content

Commit

Permalink
Use trigram similarity instead of FTS
Browse files Browse the repository at this point in the history
  • Loading branch information
Bastien Abadie committed Mar 25, 2024
1 parent 07c607b commit 4f86df2
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 7 deletions.
13 changes: 13 additions & 0 deletions treeherder/model/migrations/0031_trigram_extension.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Generated by Django 4.1.13 on 2024-03-25 16:15

from django.db import migrations
from django.contrib.postgres.operations import TrigramExtension


class Migration(migrations.Migration):

dependencies = [
("model", "0030_group_durations"),
]

operations = [TrigramExtension()]
17 changes: 10 additions & 7 deletions treeherder/model/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
import newrelic.agent
from django.conf import settings
from django.contrib.auth.models import User
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist
from django.core.validators import MinLengthValidator
from django.db import models, transaction
from django.db.models import Count, Max, Min, Q, Subquery
from django.contrib.postgres.search import TrigramSimilarity
from django.db.utils import ProgrammingError
from django.forms import model_to_dict
from django.utils import timezone
Expand Down Expand Up @@ -275,12 +275,15 @@ def search(cls, search_term):
[search_term_fulltext, search_term_like, max_size],
)
else:
# On PostgreSQL we can use the full text search features
vector = SearchVector("summary")
query = SearchQuery(search_term_fulltext)
recent_qs = Bugscache.objects.annotate(rank=SearchRank(vector, query)).order_by(
"-rank", "id"
)[0:max_size]
# On PostgreSQL we can use the ORM directly, but NOT the full text search
# as the ranking algorithm expects english words, not paths
# So we use standard pattern matching AND trigram similarity to compare suite of characters
# instead of words
recent_qs = (
Bugscache.objects.filter(summary__icontains=search_term_fulltext)
.annotate(similarity=TrigramSimilarity("summary", search_term_fulltext))
.order_by("-similarity")[0:max_size]
)

exclude_fields = ["modified", "processed_update"]
try:
Expand Down

0 comments on commit 4f86df2

Please sign in to comment.