Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retake #591

Merged
merged 17 commits into from
Sep 12, 2023
10 changes: 10 additions & 0 deletions docker-compose.uffizzi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ services:
- POSTGRES_DB=grai
ports:
- 5432:5432
command:
[
"postgres",
"-c",
"wal_level=logical",
"-c",
"max_replication_slots=10",
"-c",
"max_wal_senders=5",
]

frontend:
image: "${FRONTEND_IMAGE}"
Expand Down
3 changes: 1 addition & 2 deletions grai-frontend/src/components/graph/drawer/GraphSearch.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,6 @@ const GraphSearch: React.FC<GraphSearchProps> = ({
}
}, [onSearch, selected, tables, setTableCenter])

if (error) return <GraphError error={error} />

return (
<Box>
<Box sx={{ p: 1 }}>
Expand All @@ -142,6 +140,7 @@ const GraphSearch: React.FC<GraphSearchProps> = ({
}}
/>
</Box>
{error && <GraphError error={error} />}
{loading && <Loading />}
<List
disablePadding
Expand Down
49 changes: 15 additions & 34 deletions grai-server/app/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import time
from collections import defaultdict
from enum import Enum
from typing import List, Optional
from typing import List, Optional, Tuple, Union

import strawberry
import strawberry_django
Expand Down Expand Up @@ -35,6 +35,7 @@
from lineage.models import Node as NodeModel
from lineage.models import Source as SourceModel
from lineage.types import EdgeFilter, EdgeOrder, Filter, NodeFilter, NodeOrder
from search.search import SearchClient
from users.types import User, UserFilter
from workspaces.models import Membership as MembershipModel
from workspaces.models import Workspace as WorkspaceModel
Expand Down Expand Up @@ -877,8 +878,6 @@
NodeModel.objects.filter(workspace=self).values_list("namespace", flat=True).distinct()
)

print(namespaces)

return DataWrapper(namespaces)

# Graph
Expand Down Expand Up @@ -920,38 +919,14 @@
self,
search: Optional[str] = strawberry.UNSET,
) -> List[BaseTable]:
def get_tables(search: Optional[str]) -> List[Table]:
def get_words(word: str) -> List[str]:
from django.db import connection

with connection.cursor() as cursor:
cursor.execute(
"SELECT * FROM unique_lexeme WHERE levenshtein_less_equal(word, %s, 2) < 3",
[word],
)
rows = cursor.fetchall()

return list([item[0] for item in rows])
def get_tables(
search: Optional[str],
) -> List[Union[Table, BaseTable]]:
client = SearchClient()

result = []
tables = client.search(workspace=self, query=search)

for word in search.replace("_", " ").replace(".", " ").strip().split(" "):
result += get_words(word)

search_string = " ".join(result)

return list(
NodeModel.objects.raw(
"""
SELECT *
FROM lineage_node
WHERE workspace_id=%s
AND metadata->'grai'->>'node_type'='Table'
AND ts_rank(search, websearch_to_tsquery('simple', replace(replace(%s, ' ', ' or '), '.', ' or '))) > 0
ORDER BY ts_rank(search, websearch_to_tsquery('simple', replace(replace(%s, ' ', ' or '), '.', ' or '))) DESC""",
[self.id, search_string, search_string],
)
)
return tables

graph = GraphCache(workspace=self)

Expand All @@ -960,7 +935,13 @@
if search:
tables = await sync_to_async(get_tables)(search)

ids = [table.id for table in tables]
if len(tables) == 0:
return []

if isinstance(tables[0], BaseTable):
return tables

Check warning on line 942 in grai-server/app/api/types.py

View check run for this annotation

Codecov / codecov/patch

grai-server/app/api/types.py#L941-L942

Added lines #L941 - L942 were not covered by tests

ids = [table["id"] for table in tables]

Check warning on line 944 in grai-server/app/api/types.py

View check run for this annotation

Codecov / codecov/patch

grai-server/app/api/types.py#L944

Added line #L944 was not covered by tests

return graph.get_tables(ids=ids)

Expand Down
3 changes: 3 additions & 0 deletions grai-server/app/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ fi
if [ "$INSTANCE_TYPE" = "server" ]; then
echo "Initializing database with default data..."
bash /usr/src/app/initialize_db.sh

echo "Building retake index..."
python manage.py build_search
fi

exec "$@"
41 changes: 40 additions & 1 deletion grai-server/app/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions grai-server/app/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ django-postgres-extra = "^2.0.8"
grandalf = "^0.8"
drf-nested-routers = "^0.93.4"
django-redis = "^5.3.0"
retakesearch = "^0.1.32"

[tool.poetry.group.dev.dependencies]
isort = "^5.10.1"
Expand Down
Empty file.
3 changes: 3 additions & 0 deletions grai-server/app/search/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
6 changes: 6 additions & 0 deletions grai-server/app/search/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class SearchConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "search"
13 changes: 13 additions & 0 deletions grai-server/app/search/basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from typing import List, Optional

from lineage.graph_cache import GraphCache
from workspaces.models import Workspace

from .search import SearchInterface


class BasicSearch(SearchInterface):
def search(self, workspace: Workspace, query: Optional[str]) -> List:
graph = GraphCache(workspace=workspace)

return graph.get_tables(search=query)
15 changes: 15 additions & 0 deletions grai-server/app/search/management/commands/build_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from django.core.management.base import BaseCommand

from search.search import SearchClient


class Command(BaseCommand):
help = "Build the search index"

def add_arguments(self, parser):
pass

def handle(self, *args, **options):
search = SearchClient()

search.build()
Empty file.
3 changes: 3 additions & 0 deletions grai-server/app/search/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.db import models

# Create your models here.
74 changes: 74 additions & 0 deletions grai-server/app/search/retake.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from typing import List, Optional

from decouple import config
from retakesearch import Client, Database, Search, Table

from workspaces.models import Workspace

from .search import SearchInterface

client = Client(
api_key=config("RETAKE_API_KEY"),
url=config("RETAKE_API_URL"),
)


class RetakeSearch(SearchInterface):
index_name = "nodes"
columns = [
"id",
"name",
"namespace",
"display_name",
"workspace_id",
"metadata->grai->node_type",
]

def build(self):
database = Database(
dbname=config("DB_NAME", default="grai"),
host=config("DB_HOST", default="db"),
port=int(config("DB_PORT", default="5432")),
user=config("DB_USER", default="grai"),
password=config("DB_PASSWORD", default="grai"),
)

table = Table(
name="lineage_node",
columns=self.columns,
transform={
"mapping": {"workspace_id": "keyword"},
"rename": {
"metadata_grai_node_type": "node_type",
},
},
)

try:
index = client.get_index(index_name=self.index_name)
except:
index = client.create_index(index_name=self.index_name)

if not index:
raise ValueError("Table failed to index due to an unexpected error")

index.vectorize(self.columns)
index.add_source(database=database, table=table)

def search(self, workspace: Workspace, query: Optional[str]) -> List:
index = client.get_index(self.index_name)

dsl = {"query": {"query_string": {"query": f"workspace_id.keyword:{str(workspace.id)}"}}}
bm25_search_query = (
Search()
.from_dict(dsl)
# .query("fuzzy", display_name={"value": query, "fuzziness": 10})
.filter("term", node_type="table")
.with_neural(query=query, fields=["display_name"])
)

result = index.search(bm25_search_query)

hits = result.get("hits", {"hits": []}).get("hits", [])

return [hit["_source"] for hit in hits]
34 changes: 34 additions & 0 deletions grai-server/app/search/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from abc import ABC, abstractmethod
from typing import List, Optional

from decouple import config

from workspaces.models import Workspace


class SearchInterface(ABC):
@abstractmethod
def search(self, workspace: Workspace, query: str) -> List:
pass # pragma: no cover

def build(self) -> None:
pass


class SearchClient(SearchInterface):
@property
def client(self):
if config("RETAKE_API_URL", None):
from .retake import RetakeSearch

Check warning on line 22 in grai-server/app/search/search.py

View check run for this annotation

Codecov / codecov/patch

grai-server/app/search/search.py#L22

Added line #L22 was not covered by tests

return RetakeSearch()

Check warning on line 24 in grai-server/app/search/search.py

View check run for this annotation

Codecov / codecov/patch

grai-server/app/search/search.py#L24

Added line #L24 was not covered by tests

from .basic import BasicSearch

return BasicSearch()

def search(self, workspace: Workspace, query: Optional[str]) -> List:
return self.client.search(workspace, query)

def build(self):
return self.client.build()
3 changes: 3 additions & 0 deletions grai-server/app/search/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.test import TestCase

# Create your tests here.
7 changes: 7 additions & 0 deletions grai-server/app/search/tests/test_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from search.search import SearchClient


async def test_build():
client = SearchClient()

client.build()
3 changes: 3 additions & 0 deletions grai-server/app/search/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.shortcuts import render

# Create your views here.
1 change: 1 addition & 0 deletions grai-server/app/the_guide/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def inner(value: str | bool) -> bool:
"notifications",
"workspaces",
"users",
"search",
"telemetry",
]

Expand Down
Loading
Loading