Skip to content

Commit

Permalink
Merge branch 'open-metadata:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Anuj359 authored May 8, 2023
2 parents 929d6ea + e20f75f commit 9b5b4e6
Show file tree
Hide file tree
Showing 290 changed files with 4,846 additions and 3,163 deletions.
28 changes: 28 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Label when source files are for UI team
ui:
- "openmetadata-ui/**/*"
- "package.json"

# Label for Devops when source files is in docker or github
devops:
- "docker/**/*"
- ".github/**/*"

# Label for backend team
backend:
- "openmetadata-service/**/*"
- "openmetadata-clients/**/*"
- "openmetadata-dist/**/*"
- "openmetadata-service/**/*"
- "openmetadata-spec/**/*"

#Label for ingestion, when source file is in ingestion or airflow
ingestion:
- "openmetadata-airflow-apis/**/*"
- "ingestion/**/*"
- "ingestion-core/**/*"

#Label for documentation , when source file is in docs
documentation :
- "openmetadata-docs/**/*"
- "openmetadata-docs-v1/**/*"
40 changes: 40 additions & 0 deletions .github/workflows/cypress-integration-tests-mysql-skip.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: MySQL Cypress Integration Tests

on:
push:
branches:
- main
- '0.[0-9]+.[0-9]+'
- '1.[0-9]+[0-9]+'
paths:
- 'openmetadata-docs/**'
- 'openmetadata-docs-v1/**'
pull_request_target:
types: [labeled, opened, synchronize, reopened]
paths:
- 'openmetadata-docs/**'
- 'openmetadata-docs-v1/**'


jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
os:
- ubuntu-latest
node-version:
- 16.x
steps:
- run: 'echo "Step is not required"'
39 changes: 39 additions & 0 deletions .github/workflows/cypress-integration-tests-postgresql-skip.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: PostgreSQL Cypress Integration Tests

on:
push:
branches:
- main
- '0.[0-9]+.[0-9]+'
- '1.[0-9]+[0-9]+'
paths:
- 'openmetadata-docs/**'
- 'openmetadata-docs-v1/**'
pull_request_target:
types: [labeled, opened, synchronize, reopened]
paths:
- 'openmetadata-docs/**'
- 'openmetadata-docs-v1/**'

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
os:
- ubuntu-latest
node-version:
- 16.x
steps:
- run: 'echo "Step is not required"'
25 changes: 25 additions & 0 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Labeler

on:
pull_request_target:
types:
- opened
- reopened
- edited
- synchronize

jobs:
pr-labels:
# Job to assign PR labels to teams
name: pr-labels
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
steps:
- name: Assign labels
uses: actions/labeler@v4
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
configuration-path: .github/labeler.yml
sync-labels: true
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
- [Features](#features)
- [Try our Sandbox](#try-our-sandbox)
- [Install & Run](#install-and-run-openmetadata)
- [Roadmap](https://docs.open-metadata.org/overview/roadmap)
- [Roadmap](https://docs.open-metadata.org/releases/roadmap)
- [Documentation and support](#documentation-and-support)
- [Contributors](#contributors)
- [License](#license)
Expand Down Expand Up @@ -59,7 +59,7 @@ Here are some of the supported features in a nutshell:

- **And lots more...**

Check out for more [here](https://docs.open-metadata.org/overview/features)
Check out for more [here](https://docs.open-metadata.org/features)

![](./openmetadata-docs/images/readme/lineage.gif)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- Updating the value of SASL Mechanism for Kafka and Redpanda connections
UPDATE messaging_service_entity
SET json = JSON_REPLACE(json, '$.connection.config.saslMechanism', 'PLAIN')
WHERE (serviceType = 'Kafka' OR serviceType = 'Redpanda')
AND JSON_EXTRACT(json, '$.connection.config.saslMechanism') IS NOT NULL
AND JSON_EXTRACT(json, '$.connection.config.saslMechanism') NOT IN ('GSSAPI', 'PLAIN', 'SCRAM-SHA-256', 'SCRAM-SHA-512', 'OAUTHBEARER');
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- Updating the value of SASL Mechanism for Kafka and Redpanda connections
UPDATE messaging_service_entity
SET json = JSONB_SET(json::jsonb, '{connection,config,saslMechanism}', '"PLAIN"')
WHERE (servicetype = 'Kafka' OR serviceType = 'Redpanda')
AND json#>'{connection,config,saslMechanism}' IS NOT NULL
AND json#>'{connection,config,saslMechanism}' NOT IN ('"GSSAPI"', '"PLAIN"', '"SCRAM-SHA-256"', '"SCRAM-SHA-512"', '"OAUTHBEARER"');
2 changes: 2 additions & 0 deletions ingestion/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ ARG INGESTION_DEPENDENCY="all"
RUN pip install --upgrade pip
RUN pip install "openmetadata-managed-apis==1.0.0.0.dev0" --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.9.txt"
RUN pip install "openmetadata-ingestion[${INGESTION_DEPENDENCY}]==1.0.0.0.dev0"
# bump python-daemon for https://github.com/apache/airflow/pull/29916
RUN pip install "python-daemon>=3.0.0"
# remove all airflow providers except for docker
RUN pip freeze | grep "apache-airflow-providers" | grep -v "docker\|http" | xargs pip uninstall -y
# Uninstalling psycopg2-binary and installing psycopg2 instead
Expand Down
3 changes: 3 additions & 0 deletions ingestion/Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ WORKDIR /home/airflow/ingestion
ARG INGESTION_DEPENDENCY="all"
RUN pip install ".[${INGESTION_DEPENDENCY}]"

# bump python-daemon for https://github.com/apache/airflow/pull/29916
RUN pip install "python-daemon>=3.0.0"

# remove all airflow providers except for docker
RUN pip freeze | grep "apache-airflow-providers" | grep -v "docker\|http" | xargs pip uninstall -y

Expand Down
98 changes: 71 additions & 27 deletions ingestion/src/metadata/great_expectations/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import traceback
import warnings
from datetime import datetime, timezone
from typing import Dict, Optional, Union
from typing import Dict, List, Optional, Union

from great_expectations.checkpoint.actions import ValidationAction
from great_expectations.core.batch import Batch
Expand Down Expand Up @@ -89,7 +89,7 @@ def __init__(
config_file_path: str = None,
database_service_name: Optional[str] = None,
ometa_service_name: Optional[str] = None,
test_suite_name: Optional[str] = None,
test_suite_name: str = "great_expectation_default",
):
super().__init__(data_context)
self._ometa_service_name = (
Expand Down Expand Up @@ -134,7 +134,6 @@ def _run( # pylint: disable=arguments-renamed,unused-argument
expectation_suite_identifier: type of expectation suite
checkpoint_identifier: identifier for the checkpoint
"""

check_point_spec = self._get_checkpoint_batch_spec(data_asset)
execution_engine_url = self._get_execution_engine_url(data_asset)
table_entity = self._get_table_entity(
Expand Down Expand Up @@ -261,7 +260,7 @@ def _build_test_case_fqn(self, table_fqn: str, result: Dict) -> str:
schema_name=split_table_fqn[2],
table_name=split_table_fqn[3],
column_name=result["expectation_config"]["kwargs"].get("column"),
test_case_name=result["expectation_config"]["expectation_type"],
test_case_name=f"{self.test_suite_name}-{result['expectation_config']['expectation_type']}",
)

def _build_entity_link_from_fqn(
Expand All @@ -282,6 +281,67 @@ def _build_entity_link_from_fqn(
else f"<#E::table::{table_fqn}>"
)

def _get_test_case_params_value(self, result: dict) -> List[TestCaseParameterValue]:
"""Build test case parameter value from GE test result"""
if "observed_value" not in result["result"]:
return [
TestCaseParameterValue(
name="unexpected_percentage_total",
value=str(0.0),
)
]

return [
TestCaseParameterValue(
name=key,
value=str(value),
)
for key, value in result["expectation_config"]["kwargs"].items()
if key not in {"column", "batch_id"}
]

def _get_test_case_params_definition(
self, result: dict
) -> List[TestCaseParameterDefinition]:
"""Build test case parameter definition from GE test result"""
if "observed_value" not in result["result"]:
return [
TestCaseParameterDefinition(
name="unexpected_percentage_total",
) # type: ignore
]

return [
TestCaseParameterDefinition(
name=key,
) # type: ignore
for key, _ in result["expectation_config"]["kwargs"].items()
if key not in {"column", "batch_id"}
]

def _get_test_result_value(self, result: dict) -> List[TestResultValue]:
"""Get test result value from GE test result
Args:
result (dict): result
Returns:
TestCaseResult: a test case result object
"""
try:
test_result_value = TestResultValue(
name="observed_value",
value=str(result["result"]["observed_value"]),
)
except KeyError:
unexpected_percent_total = result["result"].get("unexpected_percent_total")
test_result_value = TestResultValue(
name="unexpected_percentage_total",
value=str(unexpected_percent_total),
)

return [test_result_value]

def _handle_test_case(self, result: Dict, table_entity: Table):
"""Handle adding test to table entity based on the test case.
Test Definitions will be created on the fly from the results of the
Expand All @@ -295,7 +355,7 @@ def _handle_test_case(self, result: Dict, table_entity: Table):

try:
test_suite = self.ometa_conn.get_or_create_test_suite(
test_suite_name=self.test_suite_name or "great_expectation_default",
test_suite_name=self.test_suite_name,
test_suite_description="Test Suite Created from Great Expectation checkpoint run",
)
test_definition = self.ometa_conn.get_or_create_test_definition(
Expand All @@ -307,13 +367,9 @@ def _handle_test_case(self, result: Dict, table_entity: Table):
if "column" in result["expectation_config"]["kwargs"]
else EntityType.TABLE,
test_platforms=[TestPlatform.GreatExpectations],
test_case_parameter_definition=[
TestCaseParameterDefinition(
name=key,
)
for key, _ in result["expectation_config"]["kwargs"].items()
if key not in {"column", "batch_id"}
],
test_case_parameter_definition=self._get_test_case_params_definition(
result
),
)

test_case_fqn = self._build_test_case_fqn(
Expand All @@ -328,14 +384,7 @@ def _handle_test_case(self, result: Dict, table_entity: Table):
),
test_suite_fqn=test_suite.fullyQualifiedName.__root__,
test_definition_fqn=test_definition.fullyQualifiedName.__root__,
test_case_parameter_values=[
TestCaseParameterValue(
name=key,
value=str(value),
)
for key, value in result["expectation_config"]["kwargs"].items()
if key not in {"column", "batch_id"}
],
test_case_parameter_values=self._get_test_case_params_value(result),
)

self.ometa_conn.add_test_case_results(
Expand All @@ -344,13 +393,8 @@ def _handle_test_case(self, result: Dict, table_entity: Table):
testCaseStatus=TestCaseStatus.Success
if result["success"]
else TestCaseStatus.Failed,
testResultValue=[
TestResultValue(
name="observed_value",
value=str(result["result"].get("observed_value")),
)
],
),
testResultValue=self._get_test_result_value(result),
), # type: ignore
test_case_fqn=test_case.fullyQualifiedName.__root__,
)

Expand Down
12 changes: 5 additions & 7 deletions ingestion/src/metadata/ingestion/lineage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,15 +345,13 @@ def clean_raw_query(cls, raw_query: str) -> Optional[str]:
replace_by=" ", # remove it as it does not add any value to lineage
)

query_no_linebreaks = insensitive_replace(
raw_str=clean_query.strip(),
to_replace="\n", # remove line breaks
replace_by=" ",
)
clean_query = clean_query.replace("\\n", "\n")

if insensitive_match(query_no_linebreaks, ".*merge into .*when matched.*"):
if insensitive_match(
clean_query, r"\s*/\*.*?\*/\s*merge.*into.*?when matched.*?"
):
clean_query = insensitive_replace(
raw_str=query_no_linebreaks,
raw_str=clean_query,
to_replace="when matched.*", # merge into queries specific
replace_by="", # remove it as LineageRunner is not able to perform the lineage
)
Expand Down
2 changes: 1 addition & 1 deletion ingestion/src/metadata/ingestion/ometa/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,8 @@ def _one_request(self, method: str, url: URL, opts: dict, retry: int):
Returns the body json in the 200 status.
"""
retry_codes = self._retry_codes
resp = self._session.request(method, url, **opts)
try:
resp = self._session.request(method, url, **opts)
resp.raise_for_status()
except HTTPError as http_error:
# retry if we hit Rate Limit
Expand Down
Loading

0 comments on commit 9b5b4e6

Please sign in to comment.