-
Notifications
You must be signed in to change notification settings - Fork 996
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixes 8428: make it possible to choice a sampling method type when we…
… create profile ingestion for the Snowflake (#17831) * Add test for existing code * Add sampling method at ingestion. * add samplingMethodType into UI * modify init method to use new parameter. * create descriptions * execute isort * fix an unintended change. * apply py_format * close section * specify init arguments * fix bug * apply py_format --------- Co-authored-by: Teddy <teddy.crepineau@gmail.com>
- Loading branch information
1 parent
08c13e5
commit 3d8e301
Showing
12 changed files
with
181 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
92 changes: 92 additions & 0 deletions
92
ingestion/tests/unit/profiler/sqlalchemy/snowflake/test_sampling_method.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
from unittest import TestCase | ||
from unittest.mock import patch | ||
from uuid import uuid4 | ||
|
||
from sqlalchemy import Column, Integer | ||
from sqlalchemy.orm import declarative_base | ||
from sqlalchemy.sql.selectable import CTE | ||
|
||
from metadata.generated.schema.entity.data.table import Column as EntityColumn | ||
from metadata.generated.schema.entity.data.table import ( | ||
ColumnName, | ||
DataType, | ||
ProfileSampleType, | ||
SamplingMethodType, | ||
Table, | ||
) | ||
from metadata.generated.schema.entity.services.connections.database.snowflakeConnection import ( | ||
SnowflakeConnection, | ||
) | ||
from metadata.profiler.api.models import ProfileSampleConfig | ||
from metadata.profiler.interface.sqlalchemy.profiler_interface import ( | ||
SQAProfilerInterface, | ||
) | ||
from metadata.profiler.processor.sampler.sqlalchemy.snowflake.sampler import ( | ||
SnowflakeSampler, | ||
) | ||
|
||
Base = declarative_base() | ||
|
||
|
||
class User(Base): | ||
__tablename__ = "users" | ||
id = Column(Integer, primary_key=True) | ||
|
||
|
||
class SampleTest(TestCase): | ||
table_entity = Table( | ||
id=uuid4(), | ||
name="user", | ||
columns=[ | ||
EntityColumn( | ||
name=ColumnName("id"), | ||
dataType=DataType.INT, | ||
), | ||
], | ||
) | ||
|
||
snowflake_conn = SnowflakeConnection( | ||
username="myuser", account="myaccount", warehouse="mywarehouse" | ||
) | ||
|
||
with patch.object( | ||
SQAProfilerInterface, "_convert_table_to_orm_object", return_value=User | ||
): | ||
sqa_profiler_interface = SQAProfilerInterface( | ||
snowflake_conn, None, table_entity, None, None, None, None, None, 5, 43200 | ||
) | ||
session = sqa_profiler_interface.session | ||
|
||
def test_omit_sampling_method_type(self): | ||
""" | ||
use BERNOULLI if sampling method type is not specified. | ||
""" | ||
sampler = SnowflakeSampler( | ||
client=self.session, | ||
table=User, | ||
profile_sample_config=ProfileSampleConfig( | ||
profile_sample_type=ProfileSampleType.PERCENTAGE, profile_sample=50.0 | ||
), | ||
) | ||
query: CTE = sampler.get_sample_query() | ||
assert "FROM users SAMPLE BERNOULLI" in str(query) | ||
|
||
def test_specify_sampling_method_type(self): | ||
""" | ||
use specified sampling method type. | ||
""" | ||
for sampling_method_type in [ | ||
SamplingMethodType.SYSTEM, | ||
SamplingMethodType.BERNOULLI, | ||
]: | ||
sampler = SnowflakeSampler( | ||
client=self.session, | ||
table=User, | ||
profile_sample_config=ProfileSampleConfig( | ||
profile_sample_type=ProfileSampleType.PERCENTAGE, | ||
profile_sample=50.0, | ||
sampling_method_type=sampling_method_type, | ||
), | ||
) | ||
query: CTE = sampler.get_sample_query() | ||
assert f"FROM users SAMPLE {sampling_method_type.value}" in str(query) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters