diff --git a/ingestion/src/metadata/examples/workflows/tableau.yaml b/ingestion/src/metadata/examples/workflows/tableau.yaml index b7a368a2e742..d1918e70254c 100644 --- a/ingestion/src/metadata/examples/workflows/tableau.yaml +++ b/ingestion/src/metadata/examples/workflows/tableau.yaml @@ -18,6 +18,7 @@ source: siteName: site_name siteUrl: site_url apiVersion: api_version + paginationLimit: 10 sourceConfig: config: type: DashboardMetadata diff --git a/ingestion/src/metadata/ingestion/source/dashboard/tableau/client.py b/ingestion/src/metadata/ingestion/source/dashboard/tableau/client.py index f04a820337b6..f96c6803f93f 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/tableau/client.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/tableau/client.py @@ -11,6 +11,7 @@ """ Wrapper module of TableauServerConnection client """ +import math import traceback from typing import Any, Callable, Dict, List, Optional @@ -23,9 +24,11 @@ TABLEAU_GET_WORKBOOKS_PARAM_DICT, ) from metadata.ingestion.source.dashboard.tableau.models import ( + DataSource, TableauChart, TableauDashboard, TableauDatasources, + TableauDatasourcesConnection, TableauOwner, ) from metadata.ingestion.source.dashboard.tableau.queries import ( @@ -49,7 +52,13 @@ class TableauClient: _client: TableauServerConnection - def __init__(self, config: Dict[str, Dict[str, Any]], env: str, ssl_verify: bool): + def __init__( + self, + config: Dict[str, Dict[str, Any]], + env: str, + ssl_verify: bool, + pagination_limit: int, + ): # ssl_verify is typed as a `bool` in TableauServerConnection # However, it is passed as `verify=self.ssl_verify` in each `requests` call. # In requests (https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification) @@ -60,6 +69,7 @@ def __init__(self, config: Dict[str, Dict[str, Any]], env: str, ssl_verify: bool ssl_verify=ssl_verify, ) self._client.sign_in().json() + self.pagination_limit = pagination_limit @cached_property def server_info(self) -> Callable: @@ -106,15 +116,25 @@ def get_charts(self) -> List[TableauChart]: ) ] - def get_datasources(self): + def _query_datasources( + self, entities_per_page: int, offset: int + ) -> Optional[TableauDatasources]: + """ + Method to query the graphql endpoint to get data sources + """ try: datasources_graphql_result = self._client.metadata_graphql_query( - query=TABLEAU_DATASOURCES_QUERY + query=TABLEAU_DATASOURCES_QUERY.format( + first=entities_per_page, offset=offset + ) ) if datasources_graphql_result: resp = datasources_graphql_result.json() if resp and resp.get("data"): - return TableauDatasources(**resp.get("data")) + tableau_datasource_connection = TableauDatasourcesConnection( + **resp.get("data") + ) + return tableau_datasource_connection.embeddedDatasourcesConnection except Exception: logger.debug(traceback.format_exc()) logger.warning( @@ -124,7 +144,32 @@ def get_datasources(self): "https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_start.html" "#enable-the-tableau-metadata-api-for-tableau-server\n" ) - return TableauDatasources(embeddedDatasources=[]) + return None + + def get_datasources(self) -> Optional[List[DataSource]]: + """ + Paginate and get the list of all data sources + """ + try: + # Query the graphql endpoint once to get total count of data sources + tableau_datasource = self._query_datasources(entities_per_page=1, offset=1) + entities_per_page = min(50, self.pagination_limit) + indexes = math.ceil(tableau_datasource.totalCount / entities_per_page) + + # Paginate the results + data_sources = [] + for index in range(indexes): + offset = index * entities_per_page + tableau_datasource = self._query_datasources( + entities_per_page=entities_per_page, offset=offset + ) + if tableau_datasource: + data_sources.extend(tableau_datasource.nodes) + return data_sources + except Exception: + logger.debug(traceback.format_exc()) + logger.warning("Unable to fetch Data Sources") + return None def sign_out(self) -> None: self._client.sign_out() diff --git a/ingestion/src/metadata/ingestion/source/dashboard/tableau/connection.py b/ingestion/src/metadata/ingestion/source/dashboard/tableau/connection.py index 0697e98729cf..6002903947b2 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/tableau/connection.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/tableau/connection.py @@ -55,6 +55,7 @@ def get_connection(connection: TableauConnection) -> TableauClient: config=tableau_server_config, env=connection.env, ssl_verify=get_verify_ssl(connection.sslConfig), + pagination_limit=connection.paginationLimit, ) except Exception as exc: logger.debug(traceback.format_exc()) diff --git a/ingestion/src/metadata/ingestion/source/dashboard/tableau/metadata.py b/ingestion/src/metadata/ingestion/source/dashboard/tableau/metadata.py index 635f5086986b..4dc6e2680cd6 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/tableau/metadata.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/tableau/metadata.py @@ -108,10 +108,9 @@ def prepare(self): chart for chart in charts if chart.workbook.id == workbook.id ] - for data_model in data_models.embeddedDatasources: - for downstream_workbooks in data_model.downstreamWorkbooks or []: - if downstream_workbooks.luid == workbook.id: - workbook.dataModels.append(data_model) + for data_model in data_models or []: + if data_model.workbook and data_model.workbook.luid == workbook.id: + workbook.dataModels.append(data_model) # collect all the tags from charts and workbooks before yielding final entities if self.source_config.includeTags: diff --git a/ingestion/src/metadata/ingestion/source/dashboard/tableau/models.py b/ingestion/src/metadata/ingestion/source/dashboard/tableau/models.py index d41304c680e3..98e50cd237e4 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/tableau/models.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/tableau/models.py @@ -100,7 +100,7 @@ class DatasourceField(BaseModel): description: Optional[str] -class DownstreamWorkbook(BaseModel): +class Workbook(BaseModel): id: str luid: str name: str @@ -131,12 +131,17 @@ class DataSource(BaseModel): id: str name: str fields: Optional[List[DatasourceField]] - downstreamWorkbooks: Optional[List[DownstreamWorkbook]] + workbook: Optional[Workbook] upstreamTables: Optional[List[UpstreamTable]] class TableauDatasources(BaseModel): - embeddedDatasources: Optional[List[DataSource]] + nodes: Optional[List[DataSource]] + totalCount: Optional[int] + + +class TableauDatasourcesConnection(BaseModel): + embeddedDatasourcesConnection: Optional[TableauDatasources] class TableauChart(TableauBaseModel): diff --git a/ingestion/src/metadata/ingestion/source/dashboard/tableau/queries.py b/ingestion/src/metadata/ingestion/source/dashboard/tableau/queries.py index 0ec637208bda..573f57ad14ea 100644 --- a/ingestion/src/metadata/ingestion/source/dashboard/tableau/queries.py +++ b/ingestion/src/metadata/ingestion/source/dashboard/tableau/queries.py @@ -14,46 +14,49 @@ """ TABLEAU_DATASOURCES_QUERY = """ -query { - embeddedDatasources { - id - name - fields { +{{ + embeddedDatasourcesConnection(first: {first}, offset: {offset} ) {{ + nodes {{ id name - upstreamColumns{ + fields {{ id name - remoteType - } - fullyQualifiedName - description - } - downstreamWorkbooks { - id - luid - name - } - upstreamTables { - id - luid - name - fullName - schema - referencedByQueries { - id - name - query - } - columns { + upstreamColumns{{ + id + name + remoteType + }} + fullyQualifiedName + description + }} + workbook {{ id + luid name - } - database { + }} + upstreamTables {{ id + luid name - } - } - } -} + fullName + schema + referencedByQueries {{ + id + name + query + }} + columns {{ + id + name + }} + database {{ + id + name + }} + }} + }} + totalCount + }} +}} """ diff --git a/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/airflow.md b/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/airflow.md index 565506b5db23..f7b20c26f8e7 100644 --- a/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/airflow.md +++ b/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/airflow.md @@ -116,6 +116,12 @@ This is a sample config for Tableau: {% /codeInfo %} +{% codeInfo srNumber=18 %} + +**paginationLimit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information. + +{% /codeInfo %} + #### Source Configuration - Source Config {% codeInfo srNumber=8 %} @@ -186,6 +192,9 @@ source: ```yaml {% srNumber=7 %} apiVersion: api_version ``` +```yaml {% srNumber=18 %} + paginationLimit: pagination_limit +``` ```yaml {% srNumber=8 %} sourceConfig: config: diff --git a/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/cli.md b/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/cli.md index b106b8ecbbff..7649344afcc6 100644 --- a/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/cli.md +++ b/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/cli.md @@ -116,6 +116,12 @@ This is a sample config for Tableau: {% /codeInfo %} +{% codeInfo srNumber=11 %} + +**paginationLimit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information. + +{% /codeInfo %} + #### Source Configuration - Source Config {% codeInfo srNumber=8 %} @@ -186,6 +192,9 @@ source: ```yaml {% srNumber=7 %} apiVersion: api_version ``` +```yaml {% srNumber=11 %} + paginationLimit: pagination_limit +``` ```yaml {% srNumber=8 %} sourceConfig: config: diff --git a/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/index.md b/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/index.md index 997e2f50142d..6970b82f4a94 100644 --- a/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/index.md +++ b/openmetadata-docs/content/v1.1.0-snapshot/connectors/dashboard/tableau/index.md @@ -221,6 +221,7 @@ For more information to get a Personal Access Token please visit this [link](htt - **Site Name**: This corresponds to the `contentUrl` attribute in the Tableau REST API. The `site_name` is the portion of the URL that follows the `/site/` in the URL. - **Site URL**: If it is empty, the default Tableau site name will be used. - **Environment**: The config object can have multiple environments. The default environment is defined as `tableau_prod`, and you can change this if needed by specifying an `env` parameter. +- **Pagination Limit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information. {% /extraContent %} diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/tableauConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/tableauConnection.json index e1cdc84eff5d..7469a76eea55 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/tableauConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/dashboard/tableauConnection.json @@ -61,6 +61,12 @@ "type": "string", "default": "tableau_prod" }, + "paginationLimit": { + "title": "Pagination Limit", + "description": "Pagination limit used while querying the tableau metadata API for getting data sources", + "type": "integer", + "default": 10 + }, "verifySSL": { "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/verifySSL", "default": "no-ssl" diff --git a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Dashboard/Tableau.md b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Dashboard/Tableau.md index f5373a220f44..0dbdd7e14458 100644 --- a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Dashboard/Tableau.md +++ b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Dashboard/Tableau.md @@ -85,6 +85,12 @@ $$section The config object can have multiple environments. The default environment is defined as `tableau_prod`, and you can change this if needed by specifying an `env` parameter. $$ +$$section +### Pagination Limit $(id="paginationLimit") + +The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information. +$$ + $$section ### Verify SSL $(id="verifySSL")