Skip to content

Commit

Permalink
Added project filter pattern to dashboard entity (#12925)
Browse files Browse the repository at this point in the history
  • Loading branch information
OnkarVO7 authored Sep 5, 2023
1 parent dc8e59e commit a3ca8b6
Show file tree
Hide file tree
Showing 20 changed files with 210 additions and 22 deletions.
1 change: 1 addition & 0 deletions ingestion/examples/workflows/metabase.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ source:
type: DashboardMetadata
dashboardFilterPattern: {}
chartFilterPattern: {}
projectFilterPattern: {}
sink:
type: metadata-rest
config: {}
Expand Down
4 changes: 4 additions & 0 deletions ingestion/examples/workflows/powerbi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ source:
includes:
- Supplier Quality Analysis Sample
- "Customer"
projectFilterPattern:
includes:
- Supplier Quality Analysis Sample
- "Customer"
sink:
type: metadata-rest
config: {}
Expand Down
1 change: 1 addition & 0 deletions ingestion/examples/workflows/tableau.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ source:
type: DashboardMetadata
dashboardFilterPattern: {}
chartFilterPattern: {}
projectFilterPattern: {}
sink:
type: metadata-rest
config: {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.connections import get_connection, get_test_connection_fn
from metadata.utils import fqn
from metadata.utils.filters import filter_by_dashboard
from metadata.utils.filters import filter_by_dashboard, filter_by_project
from metadata.utils.logger import ingestion_logger

logger = ingestion_logger()
Expand Down Expand Up @@ -447,6 +447,18 @@ def get_dashboard(self) -> Any:

try:
dashboard_details = self.get_dashboard_details(dashboard)
self.context.project_name = ( # pylint: disable=assignment-from-none
self.get_project_name(dashboard_details=dashboard_details)
)
if self.context.project_name and filter_by_project(
self.source_config.projectFilterPattern,
self.context.project_name,
):
self.status.filter(
self.context.project_name,
"Project / Workspace Filtered Out",
)
continue
except Exception as exc:
logger.debug(traceback.format_exc())
logger.warning(
Expand Down Expand Up @@ -501,3 +513,14 @@ def check_database_schema_name(self, database_schema_name: str):
return None

return database_schema_name

def get_project_name( # pylint: disable=unused-argument, useless-return
self, dashboard_details: Any
) -> Optional[str]:
"""
Get the project / workspace / folder / collection name of the dashboard
"""
logger.debug(
f"Projects are not supported for {self.service_connection.type.name}"
)
return None
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"""Metabase source module"""

import traceback
from typing import Iterable, List, Optional
from typing import Any, Iterable, List, Optional

from metadata.generated.schema.api.data.createChart import CreateChartRequest
from metadata.generated.schema.api.data.createDashboard import CreateDashboardRequest
Expand Down Expand Up @@ -100,25 +100,25 @@ def get_dashboard_details(self, dashboard: MetabaseDashboard) -> dict:
"""
return self.client.get_dashboard_details(dashboard.id)

def _get_collection_name(self, collection_id: Optional[str]) -> Optional[str]:
def get_project_name(self, dashboard_details: Any) -> Optional[str]:
"""
Method to search the dataset using id in the workspace dict
Method to get the project name by searching the dataset using id in the workspace dict
"""
try:
if collection_id:
if dashboard_details.collection_id:
collection_name = next(
(
collection.name
for collection in self.collections
if collection.id == collection_id
if collection.id == dashboard_details.collection_id
),
None,
)
return collection_name
except Exception as exc: # pylint: disable=broad-except
logger.debug(traceback.format_exc())
logger.warning(
f"Error fetching the collection details for [{collection_id}]: {exc}"
f"Error fetching the collection details for [{dashboard_details.collection_id}]: {exc}"
)
return None

Expand All @@ -138,9 +138,7 @@ def yield_dashboard(
sourceUrl=dashboard_url,
displayName=dashboard_details.name,
description=dashboard_details.description,
project=self._get_collection_name(
collection_id=dashboard_details.collection_id
),
project=self.context.project_name,
charts=[
fqn.build(
self.metadata,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from metadata.ingestion.source.dashboard.dashboard_service import DashboardServiceSource
from metadata.ingestion.source.dashboard.powerbi.models import (
Dataset,
Group,
PowerBIDashboard,
PowerBIReport,
PowerBiTable,
Expand All @@ -54,6 +55,7 @@
filter_by_chart,
filter_by_dashboard,
filter_by_datamodel,
filter_by_project,
)
from metadata.utils.helpers import clean_uri
from metadata.utils.logger import ingestion_logger
Expand All @@ -80,12 +82,32 @@ def __init__(

def prepare(self):
if self.service_connection.useAdminApis:
self.get_admin_workspace_data()
groups = self.get_admin_workspace_data()
else:
self.get_org_workspace_data()
groups = self.get_org_workspace_data()
if groups:
self.workspace_data = self.get_filtered_workspaces(groups)
return super().prepare()

def get_org_workspace_data(self):
def get_filtered_workspaces(self, groups: List[Group]) -> List[Group]:
"""
Method to get the workspaces filtered by project filter pattern
"""
filtered_groups = []
for group in groups:
if filter_by_project(
self.source_config.projectFilterPattern,
group.name,
):
self.status.filter(
group.name,
"Workspace Filtered Out",
)
continue
filtered_groups.append(group)
return filtered_groups

def get_org_workspace_data(self) -> Optional[List[Group]]:
"""
fetch all the group workspace ids
"""
Expand Down Expand Up @@ -121,12 +143,13 @@ def get_org_workspace_data(self):
)
or []
)
self.workspace_data = groups
return groups

def get_admin_workspace_data(self):
def get_admin_workspace_data(self) -> Optional[List[Group]]:
"""
fetch all the workspace ids
"""
groups = []
workspaces = self.client.fetch_all_workspaces()
if workspaces:
workspace_id_list = [workspace.id for workspace in workspaces]
Expand Down Expand Up @@ -155,7 +178,7 @@ def get_admin_workspace_data(self):
response = self.client.fetch_workspace_scan_result(
scan_id=workspace_scan.id
)
self.workspace_data.extend(
groups.extend(
[
active_workspace
for active_workspace in response.workspaces
Expand All @@ -166,7 +189,8 @@ def get_admin_workspace_data(self):
logger.error("Error in fetching dashboards and charts")
count += 1
else:
logger.error("Unable to fetch any Powerbi workspaces")
logger.error("Unable to fetch any PowerBI workspaces")
return groups or None

@classmethod
def create(cls, config_dict, metadata_config: OpenMetadataConnection):
Expand All @@ -180,7 +204,7 @@ def create(cls, config_dict, metadata_config: OpenMetadataConnection):

def get_dashboard(self) -> Any:
"""
Method to iterate through dashboard lists filter dashbaords & yield dashboard details
Method to iterate through dashboard lists filter dashboards & yield dashboard details
"""
for workspace in self.workspace_data:
self.context.workspace = workspace
Expand Down Expand Up @@ -369,7 +393,7 @@ def yield_dashboard(
workspace_id=self.context.workspace.id,
dashboard_id=dashboard_details.id,
),
project=str(self.context.workspace.name),
project=self.get_project_name(dashboard_details=dashboard_details),
displayName=dashboard_details.displayName,
dashboardType=DashboardType.Dashboard,
charts=[
Expand All @@ -391,7 +415,7 @@ def yield_dashboard(
workspace_id=self.context.workspace.id,
dashboard_id=dashboard_details.id,
),
project=str(self.context.workspace.name),
project=self.get_project_name(dashboard_details=dashboard_details),
displayName=dashboard_details.name,
service=self.context.dashboard_service.fullyQualifiedName.__root__,
)
Expand Down Expand Up @@ -668,3 +692,16 @@ def _fetch_dataset_workspace(self, dataset_id: Optional[str]) -> Optional[str]:
return next(iter(workspace_names), None)

return None

def get_project_name(self, dashboard_details: Any) -> Optional[str]:
"""
Get the project / workspace / folder / collection name of the dashboard
"""
try:
return str(self.context.workspace.name)
except Exception as exc:
logger.debug(traceback.format_exc())
logger.warning(
f"Error fetching project name for {dashboard_details.id}: {exc}"
)
return None
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
Tableau source module
"""
import traceback
from typing import Iterable, List, Optional, Set
from typing import Any, Iterable, List, Optional, Set

from metadata.generated.schema.api.data.createChart import CreateChartRequest
from metadata.generated.schema.api.data.createDashboard import CreateDashboardRequest
Expand Down Expand Up @@ -215,7 +215,7 @@ def yield_dashboard(
name=dashboard_details.id,
displayName=dashboard_details.name,
description=dashboard_details.description,
project=dashboard_details.project.name,
project=self.get_project_name(dashboard_details=dashboard_details),
charts=[
fqn.build(
self.metadata,
Expand Down Expand Up @@ -460,3 +460,16 @@ def get_column_info(self, data_source: DataSource) -> Optional[List[Column]]:
logger.debug(traceback.format_exc())
logger.warning(f"Error to yield datamodel column: {exc}")
return datasource_columns

def get_project_name(self, dashboard_details: Any) -> Optional[str]:
"""
Get the project / workspace / folder / collection name of the dashboard
"""
try:
return dashboard_details.project.name
except Exception as exc:
logger.debug(traceback.format_exc())
logger.warning(
f"Error fetching project name for {dashboard_details.id}: {exc}"
)
return None
15 changes: 15 additions & 0 deletions ingestion/src/metadata/utils/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,21 @@ def filter_by_datamodel(
return _filter(datamodel_filter_pattern, datamodel_name)


def filter_by_project(
project_filter_pattern: Optional[FilterPattern], project_name: str
) -> bool:
"""
Return True if the project needs to be filtered, False otherwise
Include takes precedence over exclude
:param project_filter_pattern: Model defining project filtering logic
:param project_name: project name
:return: True for filtering, False otherwise
"""
return _filter(project_filter_pattern, project_name)


def filter_by_search_index(
search_index_filter_pattern: Optional[FilterPattern], search_index_name: str
) -> bool:
Expand Down
2 changes: 2 additions & 0 deletions ingestion/tests/unit/topology/dashboard/test_metabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
sourceUrl="http://metabase.com/dashboard/1-test-db",
charts=[],
service=FullyQualifiedEntityName(__root__="mock_metabase"),
project="Test Collection",
)
]

Expand Down Expand Up @@ -227,6 +228,7 @@ def __init__(self, methodName, get_connection, test_connection) -> None:
)
self.metabase.client = SimpleNamespace()
self.metabase.context.__dict__["dashboard_service"] = MOCK_DASHBOARD_SERVICE
self.metabase.context.__dict__["project_name"] = "Test Collection"

def test_dashboard_name(self):
assert (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ slug: /connectors/dashboard/domo-dashboard/yaml
| Owners | {% icon iconName="check" /%} |
| Tags | {% icon iconName="cross" /%} |
| Datamodels | {% icon iconName="cross" /%} |
| Projects | {% icon iconName="cross" /%} |
| Lineage | {% icon iconName="cross" /%} |

In this section, we provide guides and references to use the DomoDashboard connector.
Expand Down Expand Up @@ -101,6 +102,7 @@ The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetada

- **dbServiceNames**: Database Service Names for ingesting lineage if the source supports it.
- **dashboardFilterPattern**, **chartFilterPattern**, **dataModelFilterPattern**: Note that all of them support regex as include or exclude. E.g., "My dashboard, My dash.*, .*Dashboard".
- **projectFilterPattern**: Filter the dashboards, charts and data sources by projects. Note that all of them support regex as include or exclude. E.g., "My project, My proj.*, .*Project".
- **includeOwners**: Set the 'Include Owners' toggle to control whether to include owners to the ingested entity if the owner email matches with a user stored in the OM server as part of metadata ingestion. If the ingested entity already exists and has an owner, the owner will not be overwritten.
- **includeTags**: Set the 'Include Tags' toggle to control whether to include tags in metadata ingestion.
- **includeDataModels**: Set the 'Include Data Models' toggle to control whether to include tags as part of metadata ingestion.
Expand Down Expand Up @@ -167,6 +169,13 @@ source:
# excludes:
# - chart3
# - chart4
# projectFilterPattern:
# includes:
# - project1
# - project2
# excludes:
# - project3
# - project4
```
```yaml {% srNumber=7 %}
sink:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ slug: /connectors/dashboard/looker/yaml
| Owners | {% icon iconName="check" /%} |
| Tags | {% icon iconName="cross" /%} |
| Datamodels | {% icon iconName="check" /%} |
| Projects | {% icon iconName="cross" /%} |
| Lineage | {% icon iconName="check" /%} |

In this section, we provide guides and references to use the Looker connector.
Expand Down Expand Up @@ -124,6 +125,7 @@ The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetada

- **dbServiceNames**: Database Service Names for ingesting lineage if the source supports it.
- **dashboardFilterPattern**, **chartFilterPattern**, **dataModelFilterPattern**: Note that all of them support regex as include or exclude. E.g., "My dashboard, My dash.*, .*Dashboard".
- **projectFilterPattern**: Filter the dashboards, charts and data sources by projects. Note that all of them support regex as include or exclude. E.g., "My project, My proj.*, .*Project".
- **includeOwners**: Set the 'Include Owners' toggle to control whether to include owners to the ingested entity if the owner email matches with a user stored in the OM server as part of metadata ingestion. If the ingested entity already exists and has an owner, the owner will not be overwritten.
- **includeTags**: Set the 'Include Tags' toggle to control whether to include tags in metadata ingestion.
- **includeDataModels**: Set the 'Include Data Models' toggle to control whether to include tags as part of metadata ingestion.
Expand Down Expand Up @@ -191,6 +193,13 @@ source:
# excludes:
# - chart3
# - chart4
# projectFilterPattern:
# includes:
# - project1
# - project2
# excludes:
# - project3
# - project4

```
```yaml {% srNumber=6 %}
Expand Down
Loading

0 comments on commit a3ca8b6

Please sign in to comment.