Skip to content

Commit

Permalink
Merge branch 'main' into issue-17869
Browse files Browse the repository at this point in the history
  • Loading branch information
harshsoni2024 committed Sep 19, 2024
2 parents 4081c69 + 760b8eb commit 620229e
Show file tree
Hide file tree
Showing 140 changed files with 5,887 additions and 2,155 deletions.
1 change: 0 additions & 1 deletion .github/workflows/playwright-mysql-e2e-skip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ on:
- opened
- synchronize
- reopened
- ready_for_review
paths:
- openmetadata-docs/**
- .github/**
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/playwright-mysql-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ on:
- opened
- synchronize
- reopened
- ready_for_review
paths-ignore:
- openmetadata-docs/**
- .github/**
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/playwright-postgresql-e2e-skip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ on:
- opened
- synchronize
- reopened
- ready_for_review
paths:
- openmetadata-docs/**
- .github/**
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/playwright-postgresql-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ on:
- opened
- synchronize
- reopened
- ready_for_review
paths-ignore:
- openmetadata-docs/**
- .github/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,18 @@ ADD COLUMN id VARCHAR(36) GENERATED ALWAYS AS (json ->> '$.id') STORED NOT NULL,
ADD CONSTRAINT UNIQUE (id);

-- Create index on id column
CREATE INDEX data_quality_data_time_series_id_index ON data_quality_data_time_series (id);
CREATE INDEX data_quality_data_time_series_id_index ON data_quality_data_time_series (id);

-- Remove VIRTUAL status column from test_case table and remove
-- testCaseResult state from testCase; fetch from search repo.
ALTER TABLE test_case DROP COLUMN status;
UPDATE test_case SET json = JSON_SET(json, '$.testCaseStatus', JSON_EXTRACT(json, '$.testCaseResult.testCaseStatus'));
ALTER TABLE test_case ADD COLUMN status VARCHAR(56) GENERATED ALWAYS AS (JSON_UNQUOTE(JSON_EXTRACT(json, '$.testCaseStatus'))) STORED;


-- Remove test case result states
UPDATE test_suite
SET json = JSON_REMOVE(json, '$.testCaseResultSummary');

UPDATE test_case
SET json = JSON_REMOVE(json, '$.testCaseResult');
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ SET json = jsonb_set(
FROM test_case tc
WHERE dqdts.entityfqnHash = tc.fqnHash;


-- Add id column to data_quality_data_time_series table
-- after we have added the id values to the records
ALTER TABLE data_quality_data_time_series
Expand All @@ -15,3 +16,17 @@ ADD CONSTRAINT id_unique UNIQUE (id);

-- Create index on id column
CREATE INDEX IF NOT EXISTS data_quality_data_time_series_id_index ON data_quality_data_time_series (id);

-- Remove VIRTUAL status column from test_case table and remove
-- testCaseResult state from testCase; fetch from search repo.
ALTER TABLE test_case DROP COLUMN status;
UPDATE test_case SET json = jsonb_set(json, '{testCaseStatus}', json->'testCaseResult'->'testCaseStatus');
ALTER TABLE test_case ADD COLUMN status VARCHAR(56) GENERATED ALWAYS AS (json ->> 'testCaseStatus') STORED NULL;


-- Remove test case result states
UPDATE test_suite
SET json = json - 'testCaseResultSummary';

UPDATE test_case
SET json = json - 'testCaseResult';
315 changes: 114 additions & 201 deletions ingestion/LICENSE

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,10 @@
"s3fs[boto3]",
*COMMONS["datalake"],
},
"deltalake": {"delta-spark<=2.3.0", "deltalake~=0.17"},
"deltalake": {
"delta-spark<=2.3.0",
"deltalake~=0.17,<0.20",
}, # TODO: remove pinning to under 0.20 after https://github.com/open-metadata/OpenMetadata/issues/17909
"deltalake-storage": {"deltalake~=0.17"},
"deltalake-spark": {"delta-spark<=2.3.0"},
"domo": {VERSIONS["pydomo"]},
Expand Down
19 changes: 19 additions & 0 deletions ingestion/tests/integration/data_quality/test_data_diff.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
from datetime import datetime

import pytest
from pydantic import BaseModel
Expand Down Expand Up @@ -67,6 +68,7 @@ def __init__(self, *args, **kwargs):
),
"POSTGRES_SERVICE.dvdrental.public.customer",
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Success,
failedRows=0,
passedRows=599,
Expand All @@ -85,6 +87,7 @@ def __init__(self, *args, **kwargs):
),
"POSTGRES_SERVICE.dvdrental.public.changed_customer",
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Failed,
failedRows=321,
passedRows=278,
Expand All @@ -99,6 +102,7 @@ def __init__(self, *args, **kwargs):
),
"POSTGRES_SERVICE.dvdrental.public.changed_customer",
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Failed,
failedRows=321,
passedRows=278,
Expand All @@ -114,6 +118,7 @@ def __init__(self, *args, **kwargs):
),
"POSTGRES_SERVICE.dvdrental.public.changed_customer",
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Success,
failedRows=321,
),
Expand All @@ -128,6 +133,7 @@ def __init__(self, *args, **kwargs):
),
"POSTGRES_SERVICE.dvdrental.public.changed_customer",
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Failed,
failedRows=321,
),
Expand All @@ -146,6 +152,7 @@ def __init__(self, *args, **kwargs):
),
"POSTGRES_SERVICE.dvdrental.public.changed_customer",
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Success,
),
),
Expand All @@ -158,6 +165,7 @@ def __init__(self, *args, **kwargs):
),
"POSTGRES_SERVICE.dvdrental.public.customer_without_first_name",
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Failed,
testResultValue=[
TestResultValue(name="removedColumns", value="1"),
Expand All @@ -179,6 +187,7 @@ def __init__(self, *args, **kwargs):
),
"POSTGRES_SERVICE.dvdrental.public.customer_without_first_name",
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Success,
),
),
Expand Down Expand Up @@ -208,6 +217,7 @@ def __init__(self, *args, **kwargs):
),
"MYSQL_SERVICE.default.test.customer",
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Success,
),
),
Expand All @@ -220,6 +230,7 @@ def __init__(self, *args, **kwargs):
),
"MYSQL_SERVICE.default.test.changed_customer",
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Failed,
),
),
Expand Down Expand Up @@ -290,6 +301,9 @@ def test_happy_paths(
fields=["*"],
)
assert "ERROR: Unexpected error" not in test_case_entity.testCaseResult.result
parameters.expected.timestamp = (
test_case_entity.testCaseResult.timestamp
) # timestamp is not deterministic
assert_equal_pydantic_objects(parameters.expected, test_case_entity.testCaseResult)


Expand All @@ -313,6 +327,7 @@ def test_happy_paths(
],
),
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Aborted,
result="Unsupported dialect in param table2.serviceUrl: mongodb",
),
Expand All @@ -331,6 +346,7 @@ def test_happy_paths(
],
),
TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Failed,
result="Tables have 1 different columns:"
"\n Changed columns:"
Expand Down Expand Up @@ -405,6 +421,9 @@ def test_error_paths(
test_case_entity: TestCase = metadata.get_or_create_test_case(
f"{table1.fullyQualifiedName.root}.{parameters.name}"
)
expected.timestamp = (
test_case_entity.testCaseResult.timestamp
) # timestamp is not deterministic
assert_equal_pydantic_objects(expected, test_case_entity.testCaseResult)


Expand Down
11 changes: 11 additions & 0 deletions ingestion/tests/integration/mysql/test_data_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class TestColumnParameter:
],
),
expected_result=TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Failed,
),
),
Expand All @@ -86,6 +87,7 @@ class TestColumnParameter:
],
),
expected_result=TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Failed,
),
),
Expand All @@ -101,6 +103,7 @@ class TestColumnParameter:
],
),
expected_result=TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Failed,
),
),
Expand All @@ -116,6 +119,7 @@ class TestColumnParameter:
],
),
expected_result=TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Success,
),
),
Expand All @@ -131,6 +135,7 @@ class TestColumnParameter:
],
),
expected_result=TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Success,
),
),
Expand All @@ -144,6 +149,7 @@ class TestColumnParameter:
parameterValues=[],
),
expected_result=TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Success,
),
),
Expand All @@ -162,6 +168,7 @@ class TestColumnParameter:
],
),
expected_result=TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Success,
),
),
Expand All @@ -180,6 +187,7 @@ class TestColumnParameter:
],
),
expected_result=TestCaseResult(
timestamp=int(datetime.now().timestamp() * 1000),
testCaseStatus=TestCaseStatus.Failed,
),
),
Expand Down Expand Up @@ -216,6 +224,9 @@ def test_column_test_cases(
nullable=False,
)
cleanup_fqns(TestCase, test_case.fullyQualifiedName.root)
parameters.expected_result.timestamp = (
test_case.testCaseResult.timestamp
) # timestamp is not deterministic
assert_equal_pydantic_objects(
parameters.expected_result,
test_case.testCaseResult,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def ingest(metadata, service, create_data):

ingestion = MetadataWorkflow.create(workflow_config)
ingestion.execute()
ingestion.raise_from_status()
return


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,20 @@ You can easily set up a database service for metadata extraction from Collate Sa
{% image
src="/images/v1.5/getting-started/add-service.png"
alt="Adding Database Service"
height="500px"
caption="Adding Database Service" /%}

2. **Select the database type** you want to use. Enter details such as the name and description to identify the database. In this Case we are selecting `Snowflake`.

{% image
src="/images/v1.5/getting-started/select-service.png"
alt="Selecting Database Service"
height="850px"
caption="Selecting Database Service" /%}

4. **Enter the Connection Details** You can view the available documentation in the side panel for guidance. Also, refer to the connector [documentation](/connectors).

{% image
src="/images/v1.5/getting-started/configure-connector.png"
alt="Updating Connection Details"
height="950px"
caption="Updating Connection Details" /%}

5. **Allow the Collate SaaS IP**. In the Connection Details, you will see the IP Address unique to your cluster, You need to Allow the `IP` to Access the datasource.
Expand All @@ -42,13 +39,19 @@ This step is required only for Collate SaaS. If you are using Hybrid SaaS, you w
{% image
src="/images/v1.5/getting-started/collate-saas-ip.png"
alt="Collate SaaS IP"
height="200px"
caption="Collate SaaS IP" /%}

6. **Test the connection** to verify the status. The test connection will check if the Service is reachable from Collate.

{% image
src="/images/v1.5/getting-started/test-connection.png"
alt="Verifying the Test Connection"
height="350px"
caption="Verifying the Test Connection" /%}
caption="Verifying the Test Connection" /%}

{%inlineCallout
color="violet-70"
bold="Explore Hybrid SaaS"
icon="MdArrowForward"
href="/getting-started/day-1/hybrid-saas"%}
You can read more about Hybrid SaaS.
{%/inlineCallout%}
Loading

0 comments on commit 620229e

Please sign in to comment.