diff --git a/ingestion/examples/workflows/azuresql_lineage.yaml b/ingestion/examples/workflows/azuresql_lineage.yaml new file mode 100644 index 000000000000..7b4032ccd206 --- /dev/null +++ b/ingestion/examples/workflows/azuresql_lineage.yaml @@ -0,0 +1,17 @@ +source: + type: azuresql-lineage + serviceName: azuresql + sourceConfig: + config: + type: DatabaseLineage + queryLogDuration: 1 + resultLimit: 10000 +sink: + type: metadata-rest + config: {} +workflowConfig: + openMetadataServerConfig: + hostPort: http://localhost:8585/api + authProvider: openmetadata + securityConfig: + jwtToken: "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" diff --git a/ingestion/examples/workflows/azuresql_usage.yaml b/ingestion/examples/workflows/azuresql_usage.yaml new file mode 100644 index 000000000000..dbf059de2da1 --- /dev/null +++ b/ingestion/examples/workflows/azuresql_usage.yaml @@ -0,0 +1,32 @@ +source: + type: azuresql-usage + serviceName: azuresql + serviceConnection: + config: + type: AzureSQL + database: database + username: username + password: password + hostPort: hostport + driver: ODBC Driver 17 for SQL Server + sourceConfig: + config: + type: DatabaseUsage + queryLogDuration: '1' +processor: + type: query-parser + config: {} +stage: + type: table-usage + config: + filename: /tmp/azuresql_usage +bulkSink: + type: metadata-usage + config: + filename: /tmp/azuresql_usage +workflowConfig: + openMetadataServerConfig: + hostPort: http://localhost:8585/api + authProvider: openmetadata + securityConfig: + jwtToken: "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" diff --git a/ingestion/src/metadata/ingestion/source/database/azuresql/lineage.py b/ingestion/src/metadata/ingestion/source/database/azuresql/lineage.py new file mode 100644 index 000000000000..c6af39e26338 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/azuresql/lineage.py @@ -0,0 +1,25 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +AZURESQL lineage module +""" + +from metadata.ingestion.source.database.azuresql.query_parser import ( + AzuresqlQueryParserSource, +) +from metadata.ingestion.source.database.mssql.lineage import MssqlLineageSource + + +class AzuresqlLineageSource(AzuresqlQueryParserSource, MssqlLineageSource): + """ + Extending MssqlLineageSource + + """ diff --git a/ingestion/src/metadata/ingestion/source/database/azuresql/metadata.py b/ingestion/src/metadata/ingestion/source/database/azuresql/metadata.py index 1b315a35a6eb..974364278350 100644 --- a/ingestion/src/metadata/ingestion/source/database/azuresql/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/azuresql/metadata.py @@ -10,8 +10,12 @@ # limitations under the License. """Azure SQL source module""" +import traceback +from typing import Iterable + from sqlalchemy.dialects.mssql.base import MSDialect, ischema_names +from metadata.generated.schema.entity.data.database import Database from metadata.generated.schema.entity.services.connections.database.azureSQLConnection import ( AzureSQLConnection, ) @@ -29,11 +33,16 @@ get_table_comment, get_view_definition, ) +from metadata.utils import fqn +from metadata.utils.filters import filter_by_database +from metadata.utils.logger import ingestion_logger from metadata.utils.sqlalchemy_utils import ( get_all_table_comments, get_all_view_definitions, ) +logger = ingestion_logger() + ischema_names.update( { "nvarchar": create_sqlalchemy_type("NVARCHAR"), @@ -76,3 +85,41 @@ def create(cls, config_dict, metadata_config: OpenMetadataConnection): f"Expected AzureSQLConnection, but got {connection}" ) return cls(config, metadata_config) + + def get_database_names(self) -> Iterable[str]: + + if not self.config.serviceConnection.__root__.config.ingestAllDatabases: + configured_db = self.config.serviceConnection.__root__.config.database + self.set_inspector(database_name=configured_db) + yield configured_db + else: + results = self.connection.execute( + "SELECT name FROM master.sys.databases order by name" + ) + for res in results: + row = list(res) + new_database = row[0] + database_fqn = fqn.build( + self.metadata, + entity_type=Database, + service_name=self.context.database_service.name.__root__, + database_name=new_database, + ) + + if filter_by_database( + self.source_config.databaseFilterPattern, + database_fqn + if self.source_config.useFqnForFiltering + else new_database, + ): + self.status.filter(database_fqn, "Database Filtered Out") + continue + + try: + self.set_inspector(database_name=new_database) + yield new_database + except Exception as exc: + logger.debug(traceback.format_exc()) + logger.error( + f"Error trying to connect to database {new_database}: {exc}" + ) diff --git a/ingestion/src/metadata/ingestion/source/database/azuresql/query_parser.py b/ingestion/src/metadata/ingestion/source/database/azuresql/query_parser.py new file mode 100644 index 000000000000..c3f860feaa36 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/azuresql/query_parser.py @@ -0,0 +1,45 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +AzureSQL usage module +""" +from abc import ABC + +from metadata.generated.schema.entity.services.connections.database.azureSQLConnection import ( + AzureSQLConnection, +) +from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import ( + OpenMetadataConnection, +) +from metadata.generated.schema.metadataIngestion.workflow import ( + Source as WorkflowSource, +) +from metadata.ingestion.api.source import InvalidSourceException +from metadata.ingestion.source.database.query_parser_source import QueryParserSource + + +class AzuresqlQueryParserSource(QueryParserSource, ABC): + """ + AzureSQL base for Usage and Lineage + """ + + filters: str + + @classmethod + def create(cls, config_dict, metadata_config: OpenMetadataConnection): + """Create class instance""" + config: WorkflowSource = WorkflowSource.parse_obj(config_dict) + connection: AzureSQLConnection = config.serviceConnection.__root__.config + if not isinstance(connection, AzureSQLConnection): + raise InvalidSourceException( + f"Expected Azuresql Connection, but got {connection}" + ) + return cls(config, metadata_config) diff --git a/ingestion/src/metadata/ingestion/source/database/azuresql/usage.py b/ingestion/src/metadata/ingestion/source/database/azuresql/usage.py new file mode 100644 index 000000000000..e46ca3937a89 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/azuresql/usage.py @@ -0,0 +1,25 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +AzureSQL usage module +""" + +from metadata.ingestion.source.database.azuresql.query_parser import ( + AzuresqlQueryParserSource, +) +from metadata.ingestion.source.database.mssql.usage import MssqlUsageSource + + +class AzuresqlUsageSource(AzuresqlQueryParserSource, MssqlUsageSource): + """ + Extending MssqlUsageSource + + """ diff --git a/openmetadata-docs/content/v1.2.0-SNAPSHOT/connectors/database/azuresql/yaml.md b/openmetadata-docs/content/v1.2.0-SNAPSHOT/connectors/database/azuresql/yaml.md index d6b57c8cf1d1..940a3c3a5f4b 100644 --- a/openmetadata-docs/content/v1.2.0-SNAPSHOT/connectors/database/azuresql/yaml.md +++ b/openmetadata-docs/content/v1.2.0-SNAPSHOT/connectors/database/azuresql/yaml.md @@ -131,6 +131,8 @@ The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetada **includeTables**: true or false, to ingest table data. Default is true. +**ingestAllDatabases**: Ingest data from all databases in Azuresql. You can use databaseFilterPattern on top of this. + **includeViews**: true or false, to ingest views definitions. **databaseFilterPattern**, **schemaFilterPattern**, **tableFilterPattern**: Note that the filter supports regex as include or exclude. You can find examples [here](/connectors/ingestion/workflows/metadata/filter-patterns/database) diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/azureSQLConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/azureSQLConnection.json index cb8441541982..0ffe945cab82 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/azureSQLConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/azureSQLConnection.json @@ -59,6 +59,12 @@ "type": "string", "default": "ODBC Driver 18 for SQL Server" }, + "ingestAllDatabases": { + "title": "Ingest All Databases", + "description": "Ingest data from all databases in Azuresql. You can use databaseFilterPattern on top of this.", + "type": "boolean", + "default": false + }, "connectionOptions": { "title": "Connection Options", "$ref": "../connectionBasicType.json#/definitions/connectionOptions" @@ -71,6 +77,14 @@ "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" }, + "supportsUsageExtraction": { + "title": "Supports Usage Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsUsageExtraction" + }, + "supportsLineageExtraction": { + "title": "Supports Lineage Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsLineageExtraction" + }, "supportsDBTExtraction": { "$ref": "../connectionBasicType.json#/definitions/supportsDBTExtraction" }, diff --git a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/AzureSQL.md b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/AzureSQL.md index 822a8eedef25..b2841e864e0d 100644 --- a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/AzureSQL.md +++ b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/AzureSQL.md @@ -63,6 +63,11 @@ In case of Docker or Kubernetes deployments, this driver comes out of the box wi $$ +$$section +### Ingest All Databases $(id="ingestAllDatabases") +If ticked, the workflow will be able to ingest all database in the cluster. If not ticked, the workflow will only ingest tables from the database set above. +$$ + $$section ### Connection Options $(id="connectionOptions")