Skip to content

Commit

Permalink
DH-4693/ColumnEntityChecker returns empty for BigQuery
Browse files Browse the repository at this point in the history
  • Loading branch information
MohammadrezaPourreza committed Sep 19, 2023
1 parent 459fb7f commit 084976e
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions dataherald/sql_generator/dataherald_sqlagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import openai
import pandas as pd
import sqlalchemy
from google.api_core.exceptions import GoogleAPIError
from langchain.agents.agent import AgentExecutor
from langchain.agents.agent_toolkits.base import BaseToolkit
Expand All @@ -23,7 +24,9 @@
from langchain.tools.base import BaseTool
from overrides import override
from pydantic import BaseModel, Extra, Field
from sqlalchemy import MetaData
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.sql import func

from dataherald.context_store import ContextStore
from dataherald.db import DB
Expand Down Expand Up @@ -283,8 +286,12 @@ def _run(
) -> str:
schema, entity = tool_input.split(",")
table_name, column_name = schema.split("->")
query = f"SELECT DISTINCT {column_name} FROM {table_name}" # noqa: S608
results = self.db.run_sql(query)[1]["result"]
meta = MetaData(bind=self.db.engine)
table = sqlalchemy.Table(table_name.strip(), meta, autoload=True)
distinct_query = sqlalchemy.select(
[func.distinct(table.c[column_name.strip()])]
)
results = self.db.engine.execute(distinct_query).fetchall()
results = self.find_similar_strings(results, entity)
similar_items = "Similar items:\n"
for item in results:
Expand Down

0 comments on commit 084976e

Please sign in to comment.