Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dbt-duckdb as a supported adapter and use it for local dev/test #710

Merged
merged 1 commit into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230804-064652.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add dbt-duckdb as a supported adapter and remove legacy DuckDB sql client
time: 2023-08-04T06:46:52.989851-07:00
custom:
Author: jwills
Issue: "583"
4 changes: 2 additions & 2 deletions local-data-warehouses/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
This folder includes utilities to run data warehouses for local development. See the [Contributing guide](../CONTRIBUTING.md)
to ensure your environment is setup properly.

## SQLite
## DuckDB

We assume that you have SQLite installed in your environment. By default, tests will run with SQLite.
By default, tests will run with DuckDB.
Comment on lines +6 to +8
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch, thank you!


## PostgreSQL

Expand Down
6 changes: 6 additions & 0 deletions metricflow/cli/dbt_connectors/adapter_backed_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from metricflow.random_id import random_id
from metricflow.sql.render.big_query import BigQuerySqlQueryPlanRenderer
from metricflow.sql.render.databricks import DatabricksSqlQueryPlanRenderer
from metricflow.sql.render.duckdb_renderer import DuckDbSqlQueryPlanRenderer
from metricflow.sql.render.postgres import PostgresSQLSqlQueryPlanRenderer
from metricflow.sql.render.redshift import RedshiftSqlQueryPlanRenderer
from metricflow.sql.render.snowflake import SnowflakeSqlQueryPlanRenderer
Expand All @@ -40,6 +41,7 @@ class SupportedAdapterTypes(enum.Enum):
SNOWFLAKE = "snowflake"
REDSHIFT = "redshift"
BIGQUERY = "bigquery"
DUCKDB = "duckdb"

@property
def sql_engine_type(self) -> SqlEngine:
Expand All @@ -54,6 +56,8 @@ def sql_engine_type(self) -> SqlEngine:
return SqlEngine.REDSHIFT
elif self is SupportedAdapterTypes.SNOWFLAKE:
return SqlEngine.SNOWFLAKE
elif self is SupportedAdapterTypes.DUCKDB:
return SqlEngine.DUCKDB
else:
assert_values_exhausted(self)

Expand All @@ -70,6 +74,8 @@ def sql_query_plan_renderer(self) -> SqlQueryPlanRenderer:
return RedshiftSqlQueryPlanRenderer()
elif self is SupportedAdapterTypes.SNOWFLAKE:
return SnowflakeSqlQueryPlanRenderer()
elif self is SupportedAdapterTypes.DUCKDB:
return DuckDbSqlQueryPlanRenderer()
else:
assert_values_exhausted(self)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,9 @@ snowflake:
warehouse: "{{ env_var('DBT_ENV_SECRET_WAREHOUSE') }}"
database: "{{ env_var('DBT_ENV_SECRET_DATABASE') }}"
schema: "{{ env_var('DBT_ENV_SECRET_SCHEMA') }}"
duckdb:
target: dev
outputs:
dev:
type: duckdb
schema: "{{ env_var('DBT_ENV_SECRET_SCHEMA') }}"
10 changes: 6 additions & 4 deletions metricflow/test/fixtures/sql_client_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from metricflow.test.fixtures.sql_clients.adapter_backed_ddl_client import AdapterBackedDDLSqlClient
from metricflow.test.fixtures.sql_clients.common_client import SqlDialect
from metricflow.test.fixtures.sql_clients.ddl_sql_client import SqlClientWithDDLMethods
from metricflow.test.fixtures.sql_clients.duckdb import DuckDbSqlClient

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -54,8 +53,9 @@ def __configure_test_env_from_url(url: str, password: str, schema: str) -> sqlal
"""
parsed_url = sqlalchemy.engine.make_url(url)

assert parsed_url.host, "Engine host is not set in engine connection URL!"
os.environ[DBT_ENV_SECRET_HOST] = parsed_url.host
if parsed_url.drivername != "duckdb":
assert parsed_url.host, "Engine host is not set in engine connection URL!"
os.environ[DBT_ENV_SECRET_HOST] = parsed_url.host

if parsed_url.username:
os.environ[DBT_ENV_SECRET_USER] = parsed_url.username
Expand Down Expand Up @@ -156,7 +156,9 @@ def make_test_sql_client(url: str, password: str, schema: str) -> SqlClientWithD
__initialize_dbt()
return AdapterBackedDDLSqlClient(adapter=get_adapter_by_type("postgres"))
elif dialect == SqlDialect.DUCKDB:
return DuckDbSqlClient.from_connection_details(url, password)
__configure_test_env_from_url(url, password=password, schema=schema)
__initialize_dbt()
return AdapterBackedDDLSqlClient(adapter=get_adapter_by_type("duckdb"))
elif dialect == SqlDialect.DATABRICKS:
__configure_databricks_env_from_url(url, password=password, schema=schema)
__initialize_dbt()
Expand Down
92 changes: 0 additions & 92 deletions metricflow/test/fixtures/sql_clients/duckdb.py

This file was deleted.

59 changes: 1 addition & 58 deletions metricflow/test/sql_clients/test_sql_client.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from __future__ import annotations

import datetime
import logging
from typing import Sequence, Set, Union
from typing import Set, Union

import pandas as pd
import pytest
Expand All @@ -11,7 +10,6 @@
from metricflow.protocols.sql_client import SqlClient, SqlEngine
from metricflow.random_id import random_id
from metricflow.sql.sql_bind_parameters import SqlBindParameters
from metricflow.sql.sql_column_type import SqlColumnType
from metricflow.test.compare_df import assert_dataframes_equal
from metricflow.test.fixtures.setup_fixtures import MetricFlowTestSessionState
from metricflow.test.fixtures.sql_clients.ddl_sql_client import SqlClientWithDDLMethods
Expand Down Expand Up @@ -45,67 +43,12 @@ def test_query(sql_client: SqlClient) -> None: # noqa: D
_check_1col(df)


def _skip_execution_param_tests_for_unsupported_clients(sql_client: SqlClient) -> None:
if sql_client.sql_engine_type is not SqlEngine.DUCKDB:
pytest.skip(
reason=(
"The dbt Adapter-backed SqlClient implementation does not support bind parameters, so we restrict "
"this test to our DuckDB client, which retains an example implementation."
)
)


def test_query_with_execution_params(sql_client: SqlClient) -> None:
"""Test querying with execution parameters of all supported datatypes."""
_skip_execution_param_tests_for_unsupported_clients(sql_client)
params: Sequence[SqlColumnType] = [
2,
"hi",
3.5,
True,
False,
datetime.datetime(2022, 1, 1),
datetime.date(2020, 12, 31),
]
for param in params:
sql_execution_params = SqlBindParameters.create_from_dict(({"x": param}))
assert sql_execution_params.param_dict["x"] == param # check that pydantic did not coerce type unexpectedly

expr = f"SELECT {sql_client.render_bind_parameter_key('x')} as y"
df = sql_client.query(expr, sql_bind_parameters=sql_execution_params)
assert isinstance(df, pd.DataFrame)
assert df.shape == (1, 1)
assert df.columns.tolist() == ["y"]

# Some engines convert some types to str; convert everything to str for comparison
str_param = str(param)
str_result = str(df["y"][0])
# Some engines use JSON bool syntax (i.e., True -> 'true')
if isinstance(param, bool):
assert str_result in [str_param, str_param.lower()]
# Some engines add decimals to datetime milliseconds; trim here
elif isinstance(param, datetime.datetime):
assert str_result[: len(str_param)] == str_param
else:
assert str_result == str_param


def test_select_one_query(sql_client: SqlClient) -> None: # noqa: D
sql_client.query("SELECT 1")
with pytest.raises(Exception):
sql_client.query("this is garbage")


def test_failed_query_with_execution_params(sql_client: SqlClient) -> None: # noqa: D
_skip_execution_param_tests_for_unsupported_clients(sql_client)
expr = f"SELECT {sql_client.render_bind_parameter_key('x')}"
sql_execution_params = SqlBindParameters.create_from_dict({"x": 1})

sql_client.query(expr, sql_bind_parameters=sql_execution_params)
with pytest.raises(Exception):
sql_client.query("this is garbage")


def test_create_table_from_dataframe( # noqa: D
mf_test_session_state: MetricFlowTestSessionState, ddl_sql_client: SqlClientWithDDLMethods
) -> None:
Expand Down
10 changes: 8 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ dev-packages = [
# handles import statements they are required in all test environments
sql-client-packages = [
"SQLAlchemy~=1.4.42",
"duckdb-engine~=0.9",
"duckdb~=0.8",
"sqlalchemy2-stubs~=0.0.2a21",
]

Expand All @@ -96,6 +94,10 @@ dbt-snowflake = [
"dbt-snowflake~=1.6.0",
]

dbt-duckdb = [
"dbt-duckdb~=1.6.0",
]

[tool.hatch.build.targets.sdist]
exclude = [
"/.github",
Expand All @@ -112,9 +114,13 @@ exclude = [
description = "Environment for development. Includes a DuckDB-backed client."
features = [
"dev-packages",
"dbt-duckdb",
"sql-client-packages",
]

[tool.hatch.envs.dev-env.env-vars]
MF_TEST_ADAPTER_TYPE="duckdb"

[tool.hatch.envs.postgres-env.env-vars]
MF_SQL_ENGINE_URL="postgresql://metricflow@localhost:5432/metricflow"
MF_SQL_ENGINE_PASSWORD="metricflowing"
Expand Down