From 63bd57c083a584edf3d72be662c5a5371bb9f5da Mon Sep 17 00:00:00 2001 From: Prajwal214 <167504578+Prajwal214@users.noreply.github.com> Date: Thu, 26 Sep 2024 17:46:08 +0530 Subject: [PATCH] Docs: Removing Data Insights Airflow SDK Docs (#18013) Co-authored-by: Prajwal Pandit --- .../data-insights/airflow-sdk.md | 117 ------------------ openmetadata-docs/content/v1.5.x/menu.md | 2 - .../data-insights/airflow-sdk.md | 117 ------------------ .../content/v1.6.x-SNAPSHOT/menu.md | 2 - 4 files changed, 238 deletions(-) delete mode 100644 openmetadata-docs/content/v1.5.x/how-to-guides/data-insights/airflow-sdk.md delete mode 100644 openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-insights/airflow-sdk.md diff --git a/openmetadata-docs/content/v1.5.x/how-to-guides/data-insights/airflow-sdk.md b/openmetadata-docs/content/v1.5.x/how-to-guides/data-insights/airflow-sdk.md deleted file mode 100644 index c1c0a36442eb..000000000000 --- a/openmetadata-docs/content/v1.5.x/how-to-guides/data-insights/airflow-sdk.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Run Data Insights using Airflow SDK -slug: /how-to-guides/data-insights/airflow-sdk ---- - -# Run Data Insights using Airflow SDK - -## 1. Define the YAML Config - -This is a sample config for Data Insights: - -```yaml -source: - type: dataInsight - serviceName: OpenMetadata - sourceConfig: - config: - type: MetadataToElasticSearch -processor: - type: data-insight-processor - config: {} -sink: - type: elasticsearch - config: - es_host: localhost - es_port: 9200 - recreate_indexes: false -workflowConfig: - loggerLevel: DEBUG - openMetadataServerConfig: - hostPort: '' - authProvider: openmetadata - securityConfig: - jwtToken: '{bot_jwt_token}' -``` - -### Source Configuration - Source Config - -- To send the metadata to OpenMetadata, it needs to be specified as `type: MetadataToElasticSearch`. - -### Processor Configuration - -- To send the metadata to OpenMetadata, it needs to be specified as `type: data-insight-processor`. - -### Workflow Configuration - -The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation. - -For a simple, local installation using our docker containers, this looks like: - -```yaml -workflowConfig: - openMetadataServerConfig: - hostPort: 'http://localhost:8585/api' - authProvider: openmetadata - securityConfig: - jwtToken: '{bot_jwt_token}' -``` - -We support different security providers. You can find their definitions [here](https://github.com/open-metadata/OpenMetadata/tree/main/openmetadata-spec/src/main/resources/json/schema/security/client). -You can find the different implementation of the ingestion below. - -## 2. Prepare the Data Insights DAG - -Create a Python file in your Airflow DAGs directory with the following contents: - -```python -import pathlib -import yaml -from datetime import timedelta -from airflow import DAG -from metadata.workflow.data_insight import DataInsightWorkflow -from metadata.workflow.workflow_output_handler import print_status - -try: - from airflow.operators.python import PythonOperator -except ModuleNotFoundError: - from airflow.operators.python_operator import PythonOperator - -from metadata.config.common import load_config_file -from airflow.utils.dates import days_ago - -default_args = { - "owner": "user_name", - "email": ["username@org.com"], - "email_on_failure": False, - "retries": 3, - "retry_delay": timedelta(minutes=5), - "execution_timeout": timedelta(minutes=60) -} - -config = """ - -""" - -def metadata_ingestion_workflow(): - workflow_config = yaml.safe_load(config) - workflow = DataInsightWorkflow.create(workflow_config) - workflow.execute() - workflow.raise_from_status() - print_status(workflow) - workflow.stop() - -with DAG( - "sample_data", - default_args=default_args, - description="An example DAG which runs a OpenMetadata ingestion workflow", - start_date=days_ago(1), - is_paused_upon_creation=False, - schedule_interval='*/5 * * * *', - catchup=False, -) as dag: - ingest_task = PythonOperator( - task_id="ingest_using_recipe", - python_callable=metadata_ingestion_workflow, - ) -``` diff --git a/openmetadata-docs/content/v1.5.x/menu.md b/openmetadata-docs/content/v1.5.x/menu.md index 1358b635ba44..85f3fff9f410 100644 --- a/openmetadata-docs/content/v1.5.x/menu.md +++ b/openmetadata-docs/content/v1.5.x/menu.md @@ -859,8 +859,6 @@ site_menu: url: /how-to-guides/data-insights/ingestion - category: How-to Guides / Data Insights / Key Performance Indicators (KPI) url: /how-to-guides/data-insights/kpi - - category: How-to Guides / Data Insights / Run Data Insights using Airflow SDK - url: /how-to-guides/data-insights/airflow-sdk - category: How-to Guides / Data Insights / Run Elasticsearch Reindex using Airflow SDK url: /how-to-guides/data-insights/elasticsearch-reindex - category: How-to Guides / Data Insights / Data Insights Report diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-insights/airflow-sdk.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-insights/airflow-sdk.md deleted file mode 100644 index c1c0a36442eb..000000000000 --- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/how-to-guides/data-insights/airflow-sdk.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -title: Run Data Insights using Airflow SDK -slug: /how-to-guides/data-insights/airflow-sdk ---- - -# Run Data Insights using Airflow SDK - -## 1. Define the YAML Config - -This is a sample config for Data Insights: - -```yaml -source: - type: dataInsight - serviceName: OpenMetadata - sourceConfig: - config: - type: MetadataToElasticSearch -processor: - type: data-insight-processor - config: {} -sink: - type: elasticsearch - config: - es_host: localhost - es_port: 9200 - recreate_indexes: false -workflowConfig: - loggerLevel: DEBUG - openMetadataServerConfig: - hostPort: '' - authProvider: openmetadata - securityConfig: - jwtToken: '{bot_jwt_token}' -``` - -### Source Configuration - Source Config - -- To send the metadata to OpenMetadata, it needs to be specified as `type: MetadataToElasticSearch`. - -### Processor Configuration - -- To send the metadata to OpenMetadata, it needs to be specified as `type: data-insight-processor`. - -### Workflow Configuration - -The main property here is the `openMetadataServerConfig`, where you can define the host and security provider of your OpenMetadata installation. - -For a simple, local installation using our docker containers, this looks like: - -```yaml -workflowConfig: - openMetadataServerConfig: - hostPort: 'http://localhost:8585/api' - authProvider: openmetadata - securityConfig: - jwtToken: '{bot_jwt_token}' -``` - -We support different security providers. You can find their definitions [here](https://github.com/open-metadata/OpenMetadata/tree/main/openmetadata-spec/src/main/resources/json/schema/security/client). -You can find the different implementation of the ingestion below. - -## 2. Prepare the Data Insights DAG - -Create a Python file in your Airflow DAGs directory with the following contents: - -```python -import pathlib -import yaml -from datetime import timedelta -from airflow import DAG -from metadata.workflow.data_insight import DataInsightWorkflow -from metadata.workflow.workflow_output_handler import print_status - -try: - from airflow.operators.python import PythonOperator -except ModuleNotFoundError: - from airflow.operators.python_operator import PythonOperator - -from metadata.config.common import load_config_file -from airflow.utils.dates import days_ago - -default_args = { - "owner": "user_name", - "email": ["username@org.com"], - "email_on_failure": False, - "retries": 3, - "retry_delay": timedelta(minutes=5), - "execution_timeout": timedelta(minutes=60) -} - -config = """ - -""" - -def metadata_ingestion_workflow(): - workflow_config = yaml.safe_load(config) - workflow = DataInsightWorkflow.create(workflow_config) - workflow.execute() - workflow.raise_from_status() - print_status(workflow) - workflow.stop() - -with DAG( - "sample_data", - default_args=default_args, - description="An example DAG which runs a OpenMetadata ingestion workflow", - start_date=days_ago(1), - is_paused_upon_creation=False, - schedule_interval='*/5 * * * *', - catchup=False, -) as dag: - ingest_task = PythonOperator( - task_id="ingest_using_recipe", - python_callable=metadata_ingestion_workflow, - ) -``` diff --git a/openmetadata-docs/content/v1.6.x-SNAPSHOT/menu.md b/openmetadata-docs/content/v1.6.x-SNAPSHOT/menu.md index 1ac1c7790fa7..d8a305c97f0e 100644 --- a/openmetadata-docs/content/v1.6.x-SNAPSHOT/menu.md +++ b/openmetadata-docs/content/v1.6.x-SNAPSHOT/menu.md @@ -872,8 +872,6 @@ site_menu: url: /how-to-guides/data-insights/ingestion - category: How-to Guides / Data Insights / Key Performance Indicators (KPI) url: /how-to-guides/data-insights/kpi - - category: How-to Guides / Data Insights / Run Data Insights using Airflow SDK - url: /how-to-guides/data-insights/airflow-sdk - category: How-to Guides / Data Insights / Run Elasticsearch Reindex using Airflow SDK url: /how-to-guides/data-insights/elasticsearch-reindex - category: How-to Guides / Data Insights / Data Insights Report