Skip to content

Commit

Permalink
Added pagination Tableau data sources graphql (#12187)
Browse files Browse the repository at this point in the history
* Added pagination tableau graphql

* changed downstream workbook
  • Loading branch information
OnkarVO7 authored Jun 28, 2023
1 parent df7f5a7 commit acf25f4
Show file tree
Hide file tree
Showing 11 changed files with 131 additions and 46 deletions.
1 change: 1 addition & 0 deletions ingestion/src/metadata/examples/workflows/tableau.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ source:
siteName: site_name
siteUrl: site_url
apiVersion: api_version
paginationLimit: 10
sourceConfig:
config:
type: DashboardMetadata
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"""
Wrapper module of TableauServerConnection client
"""
import math
import traceback
from typing import Any, Callable, Dict, List, Optional

Expand All @@ -23,9 +24,11 @@
TABLEAU_GET_WORKBOOKS_PARAM_DICT,
)
from metadata.ingestion.source.dashboard.tableau.models import (
DataSource,
TableauChart,
TableauDashboard,
TableauDatasources,
TableauDatasourcesConnection,
TableauOwner,
)
from metadata.ingestion.source.dashboard.tableau.queries import (
Expand All @@ -49,7 +52,13 @@ class TableauClient:

_client: TableauServerConnection

def __init__(self, config: Dict[str, Dict[str, Any]], env: str, ssl_verify: bool):
def __init__(
self,
config: Dict[str, Dict[str, Any]],
env: str,
ssl_verify: bool,
pagination_limit: int,
):
# ssl_verify is typed as a `bool` in TableauServerConnection
# However, it is passed as `verify=self.ssl_verify` in each `requests` call.
# In requests (https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification)
Expand All @@ -60,6 +69,7 @@ def __init__(self, config: Dict[str, Dict[str, Any]], env: str, ssl_verify: bool
ssl_verify=ssl_verify,
)
self._client.sign_in().json()
self.pagination_limit = pagination_limit

@cached_property
def server_info(self) -> Callable:
Expand Down Expand Up @@ -106,15 +116,25 @@ def get_charts(self) -> List[TableauChart]:
)
]

def get_datasources(self):
def _query_datasources(
self, entities_per_page: int, offset: int
) -> Optional[TableauDatasources]:
"""
Method to query the graphql endpoint to get data sources
"""
try:
datasources_graphql_result = self._client.metadata_graphql_query(
query=TABLEAU_DATASOURCES_QUERY
query=TABLEAU_DATASOURCES_QUERY.format(
first=entities_per_page, offset=offset
)
)
if datasources_graphql_result:
resp = datasources_graphql_result.json()
if resp and resp.get("data"):
return TableauDatasources(**resp.get("data"))
tableau_datasource_connection = TableauDatasourcesConnection(
**resp.get("data")
)
return tableau_datasource_connection.embeddedDatasourcesConnection
except Exception:
logger.debug(traceback.format_exc())
logger.warning(
Expand All @@ -124,7 +144,32 @@ def get_datasources(self):
"https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_start.html"
"#enable-the-tableau-metadata-api-for-tableau-server\n"
)
return TableauDatasources(embeddedDatasources=[])
return None

def get_datasources(self) -> Optional[List[DataSource]]:
"""
Paginate and get the list of all data sources
"""
try:
# Query the graphql endpoint once to get total count of data sources
tableau_datasource = self._query_datasources(entities_per_page=1, offset=1)
entities_per_page = min(50, self.pagination_limit)
indexes = math.ceil(tableau_datasource.totalCount / entities_per_page)

# Paginate the results
data_sources = []
for index in range(indexes):
offset = index * entities_per_page
tableau_datasource = self._query_datasources(
entities_per_page=entities_per_page, offset=offset
)
if tableau_datasource:
data_sources.extend(tableau_datasource.nodes)
return data_sources
except Exception:
logger.debug(traceback.format_exc())
logger.warning("Unable to fetch Data Sources")
return None

def sign_out(self) -> None:
self._client.sign_out()
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def get_connection(connection: TableauConnection) -> TableauClient:
config=tableau_server_config,
env=connection.env,
ssl_verify=get_verify_ssl(connection.sslConfig),
pagination_limit=connection.paginationLimit,
)
except Exception as exc:
logger.debug(traceback.format_exc())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,9 @@ def prepare(self):
chart for chart in charts if chart.workbook.id == workbook.id
]

for data_model in data_models.embeddedDatasources:
for downstream_workbooks in data_model.downstreamWorkbooks or []:
if downstream_workbooks.luid == workbook.id:
workbook.dataModels.append(data_model)
for data_model in data_models or []:
if data_model.workbook and data_model.workbook.luid == workbook.id:
workbook.dataModels.append(data_model)

# collect all the tags from charts and workbooks before yielding final entities
if self.source_config.includeTags:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class DatasourceField(BaseModel):
description: Optional[str]


class DownstreamWorkbook(BaseModel):
class Workbook(BaseModel):
id: str
luid: str
name: str
Expand Down Expand Up @@ -131,12 +131,17 @@ class DataSource(BaseModel):
id: str
name: str
fields: Optional[List[DatasourceField]]
downstreamWorkbooks: Optional[List[DownstreamWorkbook]]
workbook: Optional[Workbook]
upstreamTables: Optional[List[UpstreamTable]]


class TableauDatasources(BaseModel):
embeddedDatasources: Optional[List[DataSource]]
nodes: Optional[List[DataSource]]
totalCount: Optional[int]


class TableauDatasourcesConnection(BaseModel):
embeddedDatasourcesConnection: Optional[TableauDatasources]


class TableauChart(TableauBaseModel):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,46 +14,49 @@
"""

TABLEAU_DATASOURCES_QUERY = """
query {
embeddedDatasources {
id
name
fields {
{{
embeddedDatasourcesConnection(first: {first}, offset: {offset} ) {{
nodes {{
id
name
upstreamColumns{
fields {{
id
name
remoteType
}
fullyQualifiedName
description
}
downstreamWorkbooks {
id
luid
name
}
upstreamTables {
id
luid
name
fullName
schema
referencedByQueries {
id
name
query
}
columns {
upstreamColumns{{
id
name
remoteType
}}
fullyQualifiedName
description
}}
workbook {{
id
luid
name
}
database {
}}
upstreamTables {{
id
luid
name
}
}
}
}
fullName
schema
referencedByQueries {{
id
name
query
}}
columns {{
id
name
}}
database {{
id
name
}}
}}
}}
totalCount
}}
}}
"""
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ This is a sample config for Tableau:

{% /codeInfo %}

{% codeInfo srNumber=18 %}

**paginationLimit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.

{% /codeInfo %}

#### Source Configuration - Source Config

{% codeInfo srNumber=8 %}
Expand Down Expand Up @@ -186,6 +192,9 @@ source:
```yaml {% srNumber=7 %}
apiVersion: api_version
```
```yaml {% srNumber=18 %}
paginationLimit: pagination_limit
```
```yaml {% srNumber=8 %}
sourceConfig:
config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ This is a sample config for Tableau:

{% /codeInfo %}

{% codeInfo srNumber=11 %}

**paginationLimit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.

{% /codeInfo %}

#### Source Configuration - Source Config

{% codeInfo srNumber=8 %}
Expand Down Expand Up @@ -186,6 +192,9 @@ source:
```yaml {% srNumber=7 %}
apiVersion: api_version
```
```yaml {% srNumber=11 %}
paginationLimit: pagination_limit
```
```yaml {% srNumber=8 %}
sourceConfig:
config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ For more information to get a Personal Access Token please visit this [link](htt
- **Site Name**: This corresponds to the `contentUrl` attribute in the Tableau REST API. The `site_name` is the portion of the URL that follows the `/site/` in the URL.
- **Site URL**: If it is empty, the default Tableau site name will be used.
- **Environment**: The config object can have multiple environments. The default environment is defined as `tableau_prod`, and you can change this if needed by specifying an `env` parameter.
- **Pagination Limit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.

{% /extraContent %}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@
"type": "string",
"default": "tableau_prod"
},
"paginationLimit": {
"title": "Pagination Limit",
"description": "Pagination limit used while querying the tableau metadata API for getting data sources",
"type": "integer",
"default": 10
},
"verifySSL": {
"$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/verifySSL",
"default": "no-ssl"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ $$section
The config object can have multiple environments. The default environment is defined as `tableau_prod`, and you can change this if needed by specifying an `env` parameter.
$$

$$section
### Pagination Limit $(id="paginationLimit")
The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.
$$

$$section
### Verify SSL $(id="verifySSL")
Expand Down

0 comments on commit acf25f4

Please sign in to comment.