From 7214e6d970487bd201dcec7e56ae054f7e4bba0e Mon Sep 17 00:00:00 2001 From: Iakov Gan Date: Fri, 11 Oct 2024 21:08:29 +0200 Subject: [PATCH 1/2] allow-multi-database-export --- cid/export.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cid/export.py b/cid/export.py index ce671200..03f15476 100644 --- a/cid/export.py +++ b/cid/export.py @@ -115,8 +115,7 @@ def export_analysis(qs, athena, glue): dataset_references = [] datasets = {} - all_views = [] - all_databases = [] + all_views_and_databases = [] for dataset_arn in analysis['DataSetArns']: dependency_views = [] dataset_id = dataset_arn.split('/')[-1] @@ -154,15 +153,19 @@ def export_analysis(qs, athena, glue): and 'Schema' in value['RelationalTable']: logger.debug(f"Dataset {dataset.raw['DataSetId']} looks like classic athena dataset") value['RelationalTable']['DataSourceArn'] = '${athena_datasource_arn}' - all_databases.append(value['RelationalTable']['Schema']) - value['RelationalTable']['Schema'] = '${athena_database_name}' + database_name = value['RelationalTable']['Schema'] + discovered_databases = list(set([d for _, d in all_views_and_databases])) # default database is the first one + if not discovered_databases or discovered_databases[0] == database_name: + value['RelationalTable']['Schema'] = '${athena_database_name}' + else: + logger.warning('Database {database_name} is not default, you will need to make it a parameter.') #TODO: can be added on code level athena_source = value['RelationalTable']['Name'] views_name = athena_source.split('.')[-1] dependency_views.append(views_name) if views_name in athena._resources.get('views') and not get_parameters().get('export-known-datasets'): cid_print(f' Athena view {views_name} is in resources. Skipping') else: - all_views.append(views_name) + all_views_and_databases.append((views_name, database_name) elif 'CustomSql' in value and 'DataSourceArn' in value['CustomSql']: logger.debug(f"Dataset {dataset.raw['DataSetId']} looks like CustomSql athena dataset") value['CustomSql']['DataSourceArn'] = '${athena_datasource_arn}' @@ -195,7 +198,7 @@ def export_analysis(qs, athena, glue): all_databases = list(set(all_databases)) if len(all_databases) > 1: - raise CidCritical(f'CID only supports one database. Multiple used: {all_databases}') + logger.warning(f'CID only supports one database. Multiple used: {all_databases}') # fixme need to fix database if all_databases: athena.DatabaseName = all_databases[0] From ee1ff2fff7b82c168e114b5d09b1cc34d2699f86 Mon Sep 17 00:00:00 2001 From: Iakov GAN <82834333+iakov-aws@users.noreply.github.com> Date: Thu, 24 Oct 2024 14:09:04 +0200 Subject: [PATCH 2/2] Update cid/export.py --- cid/export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cid/export.py b/cid/export.py index 03f15476..209fabaf 100644 --- a/cid/export.py +++ b/cid/export.py @@ -165,7 +165,7 @@ def export_analysis(qs, athena, glue): if views_name in athena._resources.get('views') and not get_parameters().get('export-known-datasets'): cid_print(f' Athena view {views_name} is in resources. Skipping') else: - all_views_and_databases.append((views_name, database_name) + all_views_and_databases.append((views_name, database_name)) elif 'CustomSql' in value and 'DataSourceArn' in value['CustomSql']: logger.debug(f"Dataset {dataset.raw['DataSetId']} looks like CustomSql athena dataset") value['CustomSql']['DataSourceArn'] = '${athena_datasource_arn}'