Skip to content

Commit

Permalink
Merge branch 'main' into data_insight_refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
IceS2 committed Jul 29, 2024
2 parents 769c16e + 72fd62a commit c8b2937
Show file tree
Hide file tree
Showing 219 changed files with 6,819 additions and 3,528 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,5 @@ runs:
${{ inputs.image }}
sep-tags: ','
tags: |
type=raw,value=${{ inputs.release_version }},enable=${{ inputs.is_ingestion }}
type=raw,value=${{ inputs.release_version }},enable=${{ inputs.is_ingestion == 'true' }}
type=raw,${{ inputs.tag }}
8 changes: 4 additions & 4 deletions .github/workflows/docker-openmetadata-ingestion-base-slim.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ on:
workflow_dispatch:
inputs:
docker_release_tag:
description: "Ingestion Base Slim Docker Image Tag"
description: "Ingestion Base Slim Docker Image Tag (3 digit docker image tag)"
required: true
release_version:
description: "Provide the Release Version"
pypi_release_version:
description: "Provide the Release Version (4 digit docker image tag)"
required: true
push_latest_tag_to_release:
description: "Do you want to update docker image latest tag as well ?"
Expand All @@ -39,7 +39,7 @@ jobs:
tag: ${{ inputs.docker_release_tag }}
push_latest: ${{ inputs.push_latest_tag_to_release }}
is_ingestion: true
release_version: ${{ inputs.release_version }}
release_version: ${{ inputs.pypi_release_version }}
dockerhub_username: ${{ secrets.DOCKERHUB_OPENMETADATA_USERNAME }}
dockerhub_token: ${{ secrets.DOCKERHUB_OPENMETADATA_TOKEN }}

Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/docker-openmetadata-ingestion-base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ on:
workflow_dispatch:
inputs:
docker_release_tag:
description: "Ingestion Base Docker Image Tag"
description: "Ingestion Base Docker Image Tag (3 digit docker image tag)"
required: true
release_version:
description: "Provide the Release Version"
pypi_release_version:
description: "Provide the Release Version (4 digit docker image tag)"
required: true
push_latest_tag_to_release:
description: "Do you want to update docker image latest tag as well ?"
Expand All @@ -39,7 +39,7 @@ jobs:
tag: ${{ inputs.docker_release_tag }}
push_latest: ${{ inputs.push_latest_tag_to_release }}
is_ingestion: true
release_version: ${{ inputs.release_version }}
release_version: ${{ inputs.pypi_release_version }}
dockerhub_username: ${{ secrets.DOCKERHUB_OPENMETADATA_USERNAME }}
dockerhub_token: ${{ secrets.DOCKERHUB_OPENMETADATA_TOKEN }}

Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/docker-openmetadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ on:
workflow_dispatch:
inputs:
docker_release_tag:
description: "Ingestion Docker Image Tag"
description: "Ingestion Docker Image Tag (3 digit docker image tag)"
required: true
release_version:
description: "Provide the Release Version"
pypi_release_version:
description: "Provide the Release Version (4 digit docker image tag)"
required: true
push_latest_tag_to_release:
description: "Mark this as latest tag as well ?"
Expand All @@ -39,7 +39,7 @@ jobs:
tag: ${{ inputs.docker_release_tag }}
push_latest: ${{ inputs.push_latest_tag_to_release }}
is_ingestion: true
release_version: ${{ inputs.release_version }}
release_version: ${{ inputs.pypi_release_version }}
dockerhub_username: ${{ secrets.DOCKERHUB_OPENMETADATA_USERNAME }}
dockerhub_token: ${{ secrets.DOCKERHUB_OPENMETADATA_TOKEN }}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,24 @@ WHERE JSON_UNQUOTE(json->'$.scheduleType') = 'Scheduled';
-- recreate all scheduled apps
DELETE FROM apps_marketplace
WHERE JSON_UNQUOTE(json->'$.scheduleType') = 'Scheduled';

ALTER table thread_entity DROP COLUMN entityId;

-- Add entityRef column to thread_entity table
UPDATE thread_entity
SET json = JSON_SET(
JSON_REMOVE(
JSON_REMOVE(json, '$.entityId'),
'$.entityType'
),
'$.entityRef',
JSON_OBJECT(
'id', JSON_UNQUOTE(JSON_EXTRACT(json, '$.entityId')),
'type', JSON_UNQUOTE(JSON_EXTRACT(json, '$.entityType'))
)
)
WHERE JSON_CONTAINS_PATH(json, 'one', '$.entityId') OR JSON_CONTAINS_PATH(json, 'one', '$.entityType');

-- Add entityId and type column to thread_entity table
ALTER table thread_entity ADD COLUMN entityId VARCHAR(36) GENERATED ALWAYS AS (json ->> '$.entityRef.id');
ALTER table thread_entity ADD COLUMN entityType VARCHAR(36) GENERATED ALWAYS AS (json ->> '$.entityRef.type');
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,22 @@ WHERE json->>'scheduleType' = 'Scheduled';
-- recreate all scheduled apps
DELETE FROM apps_marketplace
WHERE json->>'scheduleType' = 'Scheduled';

ALTER TABLE thread_entity DROP COLUMN entityId;

-- Add entityRef column to thread_entity table
UPDATE thread_entity
SET json = jsonb_set(
json - 'entityId' - 'entityType',
'{entityRef}',
jsonb_build_object(
'id', json->>'entityId',
'type', json->>'entityType'
),
true
)
WHERE jsonb_exists(json, 'entityId') OR jsonb_exists(json, 'entityType');

-- Add entityId and type column to thread_entity table
ALTER TABLE thread_entity ADD COLUMN entityId VARCHAR(36) GENERATED ALWAYS AS (json->'entityRef'->>'id') STORED;
ALTER TABLE thread_entity ADD COLUMN entityType VARCHAR(36) GENERATED ALWAYS AS (json->'entityRef'->>'type') STORED;
2 changes: 2 additions & 0 deletions conf/openmetadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ server:
rootPath: '/api/*'
applicationConnectors:
- type: http
bindHost: ${SERVER_HOST:-0.0.0.0}
port: ${SERVER_PORT:-8585}
adminConnectors:
- type: http
bindHost: ${SERVER_HOST:-0.0.0.0}
port: ${SERVER_ADMIN_PORT:-8586}

# Above configuration for running http is fine for dev and testing.
Expand Down
4 changes: 2 additions & 2 deletions ingestion/Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ ENV PIP_QUIET=1
RUN pip install --upgrade pip

WORKDIR /home/airflow/openmetadata-airflow-apis
RUN pip install "."
RUN PIP_CONSTRAINT=/home/airflow/openmetadata-airflow-apis/constraints.txt pip install "."

WORKDIR /home/airflow/ingestion

# Argument to provide for Ingestion Dependencies to install. Defaults to all
ARG INGESTION_DEPENDENCY="all"
RUN pip install ".[${INGESTION_DEPENDENCY}]"
RUN PIP_CONSTRAINT=/home/airflow/ingestion/constraints.txt pip install ".[${INGESTION_DEPENDENCY}]"

# Temporary workaround for https://github.com/open-metadata/OpenMetadata/issues/9593
RUN echo "Image built for $(uname -m)"
Expand Down
10 changes: 5 additions & 5 deletions ingestion/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,23 @@ endif

.PHONY: install
install: ## Install the ingestion module to the current environment
python -m pip install $(INGESTION_DIR)/
PIP_CONSTRAINT=$(INGESTION_DIR)/constraints.txt python -m pip install $(INGESTION_DIR)/

.PHONY: install_dev
install_dev: ## Install the ingestion module with dev dependencies
python -m pip install "$(INGESTION_DIR)[dev]/"
PIP_CONSTRAINT=$(INGESTION_DIR)/constraints.txt python -m pip install "$(INGESTION_DIR)[dev]/"

.PHONY: install_test
install_test: ## Install the ingestion module with test dependencies
python -m pip install "$(INGESTION_DIR)[test]/"
PIP_CONSTRAINT=$(INGESTION_DIR)/constraints.txt python -m pip install "$(INGESTION_DIR)[test]/"

.PHONY: install_all
install_all: ## Install the ingestion module with all dependencies
python -m pip install "$(INGESTION_DIR)[all]/"
PIP_CONSTRAINT=$(INGESTION_DIR)/constraints.txt python -m pip install "$(INGESTION_DIR)[all]/"

.PHONY: install_apis
install_apis: ## Install the REST APIs module to the current environment
python -m pip install $(ROOT_DIR)/openmetadata-airflow-apis/ setuptools==69.0.2
PIP_CONSTRAINT=$(ROOT_DIR)/openmetadata-airflow-apis/constraints.txt python -m pip install $(ROOT_DIR)/openmetadata-airflow-apis/ setuptools==69.0.2

.PHONY: lint
lint: ## Run pylint on the Python sources to analyze the codebase
Expand Down
1 change: 1 addition & 0 deletions ingestion/constraints.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
setuptools<72
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
import yaml
from airflow import DAG

from metadata.workflow.workflow_output_handler import print_status

try:
from airflow.operators.python import PythonOperator
except ModuleNotFoundError:
Expand Down Expand Up @@ -63,7 +61,7 @@ def metadata_ingestion_workflow():
workflow = MetadataWorkflow.create(workflow_config)
workflow.execute()
workflow.raise_from_status()
print_status(workflow)
workflow.print_status()
workflow.stop()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
import yaml
from airflow import DAG

from metadata.workflow.workflow_output_handler import print_status

try:
from airflow.operators.python import PythonOperator
except ModuleNotFoundError:
Expand Down Expand Up @@ -72,7 +70,7 @@ def metadata_ingestion_workflow():
workflow = MetadataWorkflow.create(workflow_config)
workflow.execute()
workflow.raise_from_status()
print_status(workflow)
workflow.print_status()
workflow.stop()


Expand Down
4 changes: 1 addition & 3 deletions ingestion/examples/airflow/dags/airflow_sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
import yaml
from airflow import DAG

from metadata.workflow.workflow_output_handler import print_status

try:
from airflow.operators.python import PythonOperator
except ModuleNotFoundError:
Expand Down Expand Up @@ -62,7 +60,7 @@ def metadata_ingestion_workflow():
workflow = MetadataWorkflow.create(workflow_config)
workflow.execute()
workflow.raise_from_status()
print_status(workflow)
workflow.print_status()
workflow.stop()


Expand Down
4 changes: 1 addition & 3 deletions ingestion/examples/airflow/dags/airflow_sample_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

from airflow import DAG

from metadata.workflow.workflow_output_handler import print_status

try:
from airflow.operators.python import PythonOperator
except ModuleNotFoundError:
Expand Down Expand Up @@ -88,7 +86,7 @@ def metadata_ingestion_workflow():
workflow = UsageWorkflow.create(workflow_config)
workflow.execute()
workflow.raise_from_status()
print_status(workflow)
workflow.print_status()
workflow.stop()


Expand Down
3 changes: 1 addition & 2 deletions ingestion/operators/docker/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from metadata.workflow.metadata import MetadataWorkflow
from metadata.workflow.profiler import ProfilerWorkflow
from metadata.workflow.usage import UsageWorkflow
from metadata.workflow.workflow_output_handler import print_status

WORKFLOW_MAP = {
PipelineType.metadata.value: MetadataWorkflow,
Expand Down Expand Up @@ -107,7 +106,7 @@ def main():
workflow = workflow_class.create(workflow_config)
workflow.execute()
workflow.raise_from_status()
print_status(workflow)
workflow.print_status()
workflow.stop()


Expand Down
11 changes: 10 additions & 1 deletion ingestion/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,17 @@ ignore = [
"src/metadata/readers/*",
"src/metadata/timer/*",
"src/metadata/utils/*",
"src/metadata/workflow/*",
"src/metadata/workflow/base.py",
"src/metadata/workflow/application.py",
"src/metadata/workflow/data_insight.py",
"src/metadata/workflow/data_quality.py",
"src/metadata/workflow/ingestion.py",
"src/metadata/workflow/metadata.py",
"src/metadata/workflow/profiler.py",
"src/metadata/workflow/usage.py",
"src/metadata/workflow/workflow_status_mixin.py",
]

reportDeprecated = false
reportMissingTypeStubs = false
reportAny = false
1 change: 1 addition & 0 deletions ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@
DATA_DIFF["clickhouse"],
},
"dagster": {
"croniter<3",
VERSIONS["pymysql"],
"psycopg2-binary",
VERSIONS["geoalchemy2"],
Expand Down
5 changes: 1 addition & 4 deletions ingestion/src/_openmetadata_testutils/postgres/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@
import pytest
from testcontainers.postgres import PostgresContainer

from ingestion.src._openmetadata_testutils.helpers.docker import (
copy_dir_to_container,
try_bind,
)
from _openmetadata_testutils.helpers.docker import copy_dir_to_container, try_bind


@pytest.fixture(autouse=True, scope="session")
Expand Down
3 changes: 1 addition & 2 deletions ingestion/src/metadata/cli/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from metadata.config.common import load_config_file
from metadata.utils.logger import cli_logger
from metadata.workflow.application import ApplicationWorkflow
from metadata.workflow.application_output_handler import print_status

logger = cli_logger()

Expand All @@ -42,5 +41,5 @@ def run_app(config_path: Path) -> None:

workflow.execute()
workflow.stop()
print_status(workflow)
workflow.print_status()
workflow.raise_from_status()
15 changes: 8 additions & 7 deletions ingestion/src/metadata/cli/dataquality.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@
from pathlib import Path

from metadata.config.common import load_config_file
from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import (
PipelineType,
)
from metadata.utils.logger import cli_logger
from metadata.workflow.data_quality import TestSuiteWorkflow
from metadata.workflow.workflow_output_handler import (
WorkflowType,
print_init_error,
print_status,
)
from metadata.workflow.workflow_init_error_handler import WorkflowInitErrorHandler

logger = cli_logger()

Expand All @@ -42,10 +41,12 @@ def run_test(config_path: Path) -> None:
workflow = TestSuiteWorkflow.create(workflow_config_dict)
except Exception as exc:
logger.debug(traceback.format_exc())
print_init_error(exc, workflow_config_dict, WorkflowType.TEST)
WorkflowInitErrorHandler.print_init_error(
exc, workflow_config_dict, PipelineType.TestSuite
)
sys.exit(1)

workflow.execute()
workflow.stop()
print_status(workflow)
workflow.print_status()
workflow.raise_from_status()
15 changes: 8 additions & 7 deletions ingestion/src/metadata/cli/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@
from pathlib import Path

from metadata.config.common import load_config_file
from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import (
PipelineType,
)
from metadata.utils.logger import cli_logger
from metadata.workflow.metadata import MetadataWorkflow
from metadata.workflow.workflow_output_handler import (
WorkflowType,
print_init_error,
print_status,
)
from metadata.workflow.workflow_init_error_handler import WorkflowInitErrorHandler

logger = cli_logger()

Expand All @@ -42,10 +41,12 @@ def run_ingest(config_path: Path) -> None:
logger.debug(f"Using config: {workflow.config}")
except Exception as exc:
logger.debug(traceback.format_exc())
print_init_error(exc, config_dict, WorkflowType.INGEST)
WorkflowInitErrorHandler.print_init_error(
exc, config_dict, PipelineType.metadata
)
sys.exit(1)

workflow.execute()
workflow.stop()
print_status(workflow)
workflow.print_status()
workflow.raise_from_status()
Loading

0 comments on commit c8b2937

Please sign in to comment.