Skip to content

Commit

Permalink
Merge branch 'main' into fix-appConfig
Browse files Browse the repository at this point in the history
  • Loading branch information
pmbrull authored Nov 23, 2023
2 parents 92641d9 + 312b038 commit ddb249b
Show file tree
Hide file tree
Showing 37 changed files with 370 additions and 75 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ SNYK_ARGS := --severity-threshold=high
.PHONY: snyk-ingestion-report
snyk-ingestion-report: ## Uses Snyk CLI to validate the ingestion code and container. Don't stop the execution
@echo "Validating Ingestion container..."
docker build -t openmetadata-ingestion:scan -f ingestion/Dockerfile .
snyk container test openmetadata-ingestion:scan --file=ingestion/Dockerfile $(SNYK_ARGS) --json > security-report/ingestion-docker-scan.json | true;
docker build -t openmetadata-ingestion:scan -f ingestion/Dockerfile.ci .
snyk container test openmetadata-ingestion:scan --file=ingestion/Dockerfile.ci $(SNYK_ARGS) --json > security-report/ingestion-docker-scan.json | true;
@echo "Validating ALL ingestion dependencies. Make sure the venv is activated."
cd ingestion; \
pip freeze > scan-requirements.txt; \
Expand Down
Empty file.
Empty file.
Empty file.
12 changes: 12 additions & 0 deletions bootstrap/sql/migrations/native/1.2.3/postgres/schemaChanges.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

-- fixed Query for updating viewParsingTimeoutLimit
UPDATE ingestion_pipeline_entity
SET json = jsonb_set(
json::jsonb #- '{sourceConfig,config,viewParsingTimeoutLimit}',
'{sourceConfig,config,queryParsingTimeoutLimit}',
(json #> '{sourceConfig,config,viewParsingTimeoutLimit}')::jsonb,
true
)
WHERE json #>> '{pipelineType}' = 'metadata'
AND json #>> '{sourceConfig,config,type}' = 'DatabaseMetadata'
AND json #>> '{sourceConfig,config,viewParsingTimeoutLimit}' is not null;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Rename customMetricsProfile to customMetrics
UPDATE profiler_data_time_series
SET json = REPLACE(json, '"customMetricsProfile"', '"customMetrics"');

-- Delete customMetricsProfile from entity_extension
-- This was not supported on the processing side before 1.3.
DELETE FROM openmetadata_db.entity_extension ee
where extension like '%customMetrics';
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Rename customMetricsProfile to customMetrics
UPDATE profiler_data_time_series
SET json = REPLACE(json::text, '"customMetricsProfile"', '"customMetrics"')::jsonb;

-- Delete customMetricsProfile from entity_extension
-- This was not supported on the processing side before 1.3.
DELETE FROM entity_extension ee
where extension like '%customMetrics';
2 changes: 1 addition & 1 deletion ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"boto3": "boto3>=1.20,<2.0", # No need to add botocore separately. It's a dep from boto3
"geoalchemy2": "GeoAlchemy2~=0.12",
"google-cloud-storage": "google-cloud-storage==1.43.0",
"great-expectations": "great-expectations~=0.17.0",
"great-expectations": "great-expectations~=0.18.0",
"grpc-tools": "grpcio-tools>=1.47.2",
"msal": "msal~=1.2",
"neo4j": "neo4j~=5.3.0",
Expand Down
10 changes: 10 additions & 0 deletions ingestion/src/metadata/ingestion/source/database/athena/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,13 @@ class AthenaQueryExecution(BaseModel):

class AthenaQueryExecutionList(BaseModel):
QueryExecutions: Optional[List[AthenaQueryExecution]]


class WorkGroup(BaseModel):
Name: Optional[str]
State: Optional[str]


class WorkGroupsList(BaseModel):
WorkGroups: Optional[List[WorkGroup]] = []
NextToken: Optional[str]
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
Athena Query parser module
"""

import traceback
from abc import ABC
from math import ceil

Expand All @@ -27,6 +28,7 @@
from metadata.ingestion.source.database.athena.models import (
AthenaQueryExecutionList,
QueryExecutionIdsResponse,
WorkGroupsList,
)
from metadata.ingestion.source.database.query_parser_source import QueryParserSource
from metadata.utils.constants import QUERY_WITH_DBT, QUERY_WITH_OM_VERSION
Expand All @@ -36,6 +38,8 @@

ATHENA_QUERY_PAGINATOR_LIMIT = 50

ATHENA_ENABLED_WORK_GROUP_STATE = "ENABLED"


class AthenaQueryParserSource(QueryParserSource, ABC):
"""
Expand All @@ -59,23 +63,67 @@ def create(cls, config_dict, metadata: OpenMetadata):
)
return cls(config, metadata)

def get_queries(self):
query_limit = ceil(
self.source_config.resultLimit / ATHENA_QUERY_PAGINATOR_LIMIT
)
paginator = self.client.get_paginator("list_query_executions")
paginator_response = paginator.paginate()
for response in paginator_response:
response_obj = QueryExecutionIdsResponse(**response)
if response_obj.QueryExecutionIds:
query_details_response = self.client.batch_get_query_execution(
QueryExecutionIds=response_obj.QueryExecutionIds
def _get_work_group_response(self, next_token: str, is_first_call: bool = False):
if is_first_call:
return self.client.list_work_groups()
return self.client.list_work_groups(NextToken=next_token)

def get_work_groups(self) -> str:
"""
Method to get list of names of athena work groups
"""
next_token = None
is_first_call = True
try:
while True:
work_group_list = self._get_work_group_response(
next_token, is_first_call
)
query_details_list = AthenaQueryExecutionList(**query_details_response)
yield query_details_list
query_limit -= 1
if not query_limit:
break
response_obj = WorkGroupsList(**work_group_list)
for work_group in response_obj.WorkGroups:
if (
work_group.State
and work_group.State.upper() == ATHENA_ENABLED_WORK_GROUP_STATE
):
yield work_group.Name
next_token = response_obj.NextToken
is_first_call = False
if next_token is None:
break
except Exception as exc:
logger.debug(f"Failed to fetch work groups due to: {exc}")
logger.debug(traceback.format_exc())
if is_first_call:
# if it fails for the first api call, most likely due to insufficient
# permissions then still fetch the queries with default workgroup
yield None

def get_queries(self):
"""
Method to fetch queries from all work groups
"""
for work_group in self.get_work_groups():
query_limit = ceil(
self.source_config.resultLimit / ATHENA_QUERY_PAGINATOR_LIMIT
)
paginator = self.client.get_paginator("list_query_executions")
if work_group:
paginator_response = paginator.paginate(WorkGroup=work_group)
else:
paginator_response = paginator.paginate()
for response in paginator_response:
response_obj = QueryExecutionIdsResponse(**response)
if response_obj.QueryExecutionIds:
query_details_response = self.client.batch_get_query_execution(
QueryExecutionIds=response_obj.QueryExecutionIds
)
query_details_list = AthenaQueryExecutionList(
**query_details_response
)
yield query_details_list
query_limit -= 1
if not query_limit:
break

def is_not_dbt_or_om_query(self, query_text: str) -> bool:
return not (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"""
Athena usage module
"""
from typing import Iterable, Optional
from typing import Iterable

from metadata.generated.schema.type.tableQuery import TableQueries, TableQuery
from metadata.ingestion.source.database.athena.query_parser import (
Expand All @@ -32,7 +32,7 @@ class AthenaUsageSource(AthenaQueryParserSource, UsageSource):
Athena Usage Source
"""

def yield_table_queries(self) -> Optional[Iterable[TableQuery]]:
def yield_table_queries(self) -> Iterable[TableQueries]:
"""
Method to yield TableQueries
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ And is defined as:
"athena:ListQueryExecutions",
"athena:StartQueryExecution",
"athena:GetQueryExecution",
"athena:ListWorkGroups",
"athena:GetQueryResults",
"athena:BatchGetQueryExecution"
],
Expand Down
4 changes: 2 additions & 2 deletions openmetadata-docs/content/v1.2.x/deployment/docker/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ This docker compose file contains only the docker compose services for OpenMetad
You can also run the below command to fetch the docker compose file directly from the terminal -

```bash
wget https://github.com/open-metadata/OpenMetadata/releases/download/1.2.0-release/docker-compose-openmetadata.yml
wget https://github.com/open-metadata/OpenMetadata/releases/download/1.2.2-release/docker-compose-openmetadata.yml
```

### 3. Update Environment Variables required for OpenMetadata Dependencies
Expand Down Expand Up @@ -191,7 +191,7 @@ You can validate that all containers are up by running with command `docker ps`.
```commandline
❯ docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
470cc8149826 openmetadata/server:1.2.0 "./openmetadata-star…" 45 seconds ago Up 43 seconds 3306/tcp, 9200/tcp, 9300/tcp, 0.0.0.0:8585-8586->8585-8586/tcp openmetadata_server
470cc8149826 openmetadata/server:1.2.2 "./openmetadata-star…" 45 seconds ago Up 43 seconds 3306/tcp, 9200/tcp, 9300/tcp, 0.0.0.0:8585-8586->8585-8586/tcp openmetadata_server
```

In a few seconds, you should be able to access the OpenMetadata UI at [http://localhost:8585](http://localhost:8585)
Expand Down
6 changes: 3 additions & 3 deletions openmetadata-docs/content/v1.2.x/deployment/ingestion/mwaa.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ To install the package, we need to update the `requirements.txt` file from the M
openmetadata-ingestion[<plugin>]==x.y.z
```

Where `x.y.z` is the version of the OpenMetadata ingestion package. Note that the version needs to match the server version. If we are using the server at 1.2.0, then the ingestion package needs to also be 1.2.0.
Where `x.y.z` is the version of the OpenMetadata ingestion package. Note that the version needs to match the server version. If we are using the server at 1.2.2, then the ingestion package needs to also be 1.2.2.

The plugin parameter is a list of the sources that we want to ingest. An example would look like this `openmetadata-ingestion[mysql,snowflake,s3]==1.2.0`.
The plugin parameter is a list of the sources that we want to ingest. An example would look like this `openmetadata-ingestion[mysql,snowflake,s3]==1.2.2`.

A DAG deployed using a Python Operator would then look like follows

Expand Down Expand Up @@ -106,7 +106,7 @@ We will now describe the steps, following the official AWS documentation.

- The cluster needs a task to run in `FARGATE` mode.
- The required image is `docker.getcollate.io/openmetadata/ingestion-base:x.y.z`
- The same logic as above applies. The `x.y.z` version needs to match the server version. For example, `docker.getcollate.io/openmetadata/ingestion-base:1.2.0`
- The same logic as above applies. The `x.y.z` version needs to match the server version. For example, `docker.getcollate.io/openmetadata/ingestion-base:1.2.2`

We have tested this process with a Task Memory of 512MB and Task CPU (unit) of 256. This can be tuned depending on the amount of metadata that needs to be ingested.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ WORKDIR /home/
COPY <my-organization-certs> .
RUN update-ca-certificates
```
where `docker.getcollate.io/openmetadata/server:x.y.z` needs to point to the same version of the OpenMetadata server, for example `docker.getcollate.io/openmetadata/server:1.2.0`.
where `docker.getcollate.io/openmetadata/server:x.y.z` needs to point to the same version of the OpenMetadata server, for example `docker.getcollate.io/openmetadata/server:1.2.2`.
This image needs to be built and published to the container registry of your choice.
### 2. Update your openmetadata helm values yaml
Expand Down Expand Up @@ -95,7 +95,7 @@ COPY setup.py .
RUN pip install --no-deps .
```

where `docker.getcollate.io/openmetadata/ingestion:x.y.z` needs to point to the same version of the OpenMetadata server, for example `docker.getcollate.io/openmetadata/ingestion:1.2.0`.
where `docker.getcollate.io/openmetadata/ingestion:x.y.z` needs to point to the same version of the OpenMetadata server, for example `docker.getcollate.io/openmetadata/ingestion:1.2.2`.
This image needs to be built and published to the container registry of your choice.

### 2. Update the airflow in openmetadata dependencies values YAML
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ Verify with the below command to see the latest release available locally.
```commandline
helm search repo open-metadata --versions
> NAME CHART VERSION APP VERSION DESCRIPTION
open-metadata/openmetadata 1.2.1 1.2.0 A Helm chart for OpenMetadata on Kubernetes
open-metadata/openmetadata 1.2.0 1.2.0 A Helm chart for OpenMetadata on Kubernetes
open-metadata/openmetadata 1.2.4 1.2.2 A Helm chart for OpenMetadata on Kubernetes
open-metadata/openmetadata 1.2.3 1.2.2 A Helm chart for OpenMetadata on Kubernetes
...
open-metadata/openmetadata-dependencies 1.2.1 1.2.0 Helm Dependencies for OpenMetadata
open-metadata/openmetadata-dependencies 1.2.0 1.2.0 Helm Dependencies for OpenMetadata
open-metadata/openmetadata-dependencies 1.2.4 1.2.2 Helm Dependencies for OpenMetadata
open-metadata/openmetadata-dependencies 1.2.3 1.2.2 Helm Dependencies for OpenMetadata
...
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,15 @@ The latest version is at the top of the page
You can use the curl or wget command as well to fetch the docker compose files from your terminal -

```commandline
curl -sL -o docker-compose.yml https://github.com/open-metadata/OpenMetadata/releases/download/1.2.0-release/docker-compose.yml
curl -sL -o docker-compose.yml https://github.com/open-metadata/OpenMetadata/releases/download/1.2.2-release/docker-compose.yml
curl -sL -o docker-compose-postgres.yml https://github.com/open-metadata/OpenMetadata/releases/download/1.2.0-release/docker-compose-postgres.yml
curl -sL -o docker-compose-postgres.yml https://github.com/open-metadata/OpenMetadata/releases/download/1.2.2-release/docker-compose-postgres.yml
```

```commandline
wget -O https://github.com/open-metadata/OpenMetadata/releases/download/1.2.0-release/docker-compose.yml
wget -O https://github.com/open-metadata/OpenMetadata/releases/download/1.2.2-release/docker-compose.yml
wget -O https://github.com/open-metadata/OpenMetadata/releases/download/1.2.0-release/docker-compose-postgres.yml
wget -O https://github.com/open-metadata/OpenMetadata/releases/download/1.2.2-release/docker-compose-postgres.yml
```

### 3. Start the Docker Compose Services
Expand Down Expand Up @@ -166,10 +166,10 @@ You can validate that all containers are up by running with command `docker ps`.
```commandline
❯ docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
470cc8149826 openmetadata/server:1.2.0 "./openmetadata-star…" 45 seconds ago Up 43 seconds 3306/tcp, 9200/tcp, 9300/tcp, 0.0.0.0:8585-8586->8585-8586/tcp openmetadata_server
63578aacbff5 openmetadata/ingestion:1.2.0 "./ingestion_depende…" 45 seconds ago Up 43 seconds 0.0.0.0:8080->8080/tcp openmetadata_ingestion
470cc8149826 openmetadata/server:1.2.2 "./openmetadata-star…" 45 seconds ago Up 43 seconds 3306/tcp, 9200/tcp, 9300/tcp, 0.0.0.0:8585-8586->8585-8586/tcp openmetadata_server
63578aacbff5 openmetadata/ingestion:1.2.2 "./ingestion_depende…" 45 seconds ago Up 43 seconds 0.0.0.0:8080->8080/tcp openmetadata_ingestion
9f5ee8334f4b docker.elastic.co/elasticsearch/elasticsearch:7.16.3 "/tini -- /usr/local…" 45 seconds ago Up 44 seconds 0.0.0.0:9200->9200/tcp, 0.0.0.0:9300->9300/tcp openmetadata_elasticsearch
08947ab3424b openmetadata/db:1.2.0 "/entrypoint.sh mysq…" 45 seconds ago Up 44 seconds (healthy) 3306/tcp, 33060-33061/tcp openmetadata_mysql
08947ab3424b openmetadata/db:1.2.2 "/entrypoint.sh mysq…" 45 seconds ago Up 44 seconds (healthy) 3306/tcp, 33060-33061/tcp openmetadata_mysql
```

In a few seconds, you should be able to access the OpenMetadata UI at [http://localhost:8585](http://localhost:8585)
Expand Down
Loading

0 comments on commit ddb249b

Please sign in to comment.