diff --git a/BigQueryUtils.py b/BigQueryUtils.py index 62a3a6b..97ad27b 100644 --- a/BigQueryUtils.py +++ b/BigQueryUtils.py @@ -126,7 +126,7 @@ def copy_tag(self, tag_creator_account, tag_invoker_account, job_uuid, table_nam asset_name = tagged_table asset_name = asset_name.replace("/datasets/", "/dataset/").replace("/tables/", "/table/") - print('asset_name: ', asset_name) + #print('asset_name: ', asset_name) success = self.insert_history_row(tag_creator_account, tag_invoker_account, job_uuid, table_id, asset_name, tagged_values) @@ -390,12 +390,6 @@ def create_history_table(self, dataset_id, table_name, fields): # writes tag history record def insert_history_row(self, tag_creator_account, tag_invoker_account, job_uuid, table_id, asset_name, tagged_values): - print('enter insert_history_row') - print('job_uuid:', job_uuid) - print('table_id:', table_id) - print('asset_name:', asset_name) - print('tagged_values:', tagged_values) - success = True row = {'event_time': datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%S.%f') + ' UTC', 'asset_name': asset_name, @@ -419,14 +413,16 @@ def insert_history_row(self, tag_creator_account, tag_invoker_account, job_uuid, row[tagged_value['field_id']]= json.dumps(tagged_value['field_value'], default=str) row[tagged_value['field_id']]= tagged_value['field_value'] - print('insert row: ' + str(row)) + #print('insert row: ' + str(row)) row_to_insert = [row,] try: status = self.client.insert_rows_json(table_id, row_to_insert) if len(status) > 0: - print('Inserted row into tag history table. Return status: ', status) + print('Inserted row into tag history table. Return status: ', status) + else: + print('Inserted row into tag history table.') except Exception as e: print('Error while writing to tag history table:', e) diff --git a/DataCatalogController.py b/DataCatalogController.py index 40c0529..273c347 100644 --- a/DataCatalogController.py +++ b/DataCatalogController.py @@ -40,10 +40,6 @@ BIGQUERY_REGION = config['DEFAULT']['BIGQUERY_REGION'] -# this variable is needed for tagging filesets -if 'CLOUD_STORAGE_REGION' in config['DEFAULT']: - CLOUD_STORAGE_REGION = config['DEFAULT']['CLOUD_STORAGE_REGION'] - USER_AGENT = 'cloud-solutions/datacatalog-tag-engine-v2' class DataCatalogController: @@ -158,7 +154,7 @@ def get_template(self, included_fields=None): def check_if_tag_exists(self, parent, column=None): - print('enter check_if_tag_exists') + print(f'enter check_if_tag_exists, parent: {parent}') tag_exists = False tag_id = "" @@ -375,20 +371,20 @@ def apply_dynamic_column_config(self, fields, columns_query, uri, job_uuid, conf target_columns = [] # columns in the table which need to be tagged columns_query = self.parse_query_expression(uri, columns_query) - #print('columns_query:', columns_query) + print('columns_query:', columns_query) rows = self.bq_client.query(columns_query).result() num_columns = 0 for row in rows: for column in row: - #print('column:', column) + print('column:', column) target_columns.append(column) num_columns += 1 if num_columns == 0: # no columns to tag - msg = f"Error could not find columns to tag. Please check column_query parameter in your config. Current value: {column_query}" + msg = f"Error could not find columns to tag. Please check column_query parameter in your config. Current value: {columns_query}" log_error(msg, None, job_uuid) op_status = constants.ERROR return op_status @@ -1216,8 +1212,10 @@ def apply_export_config(self, config_uuid, target_project, target_dataset, targe return export_status - def apply_import_config(self, job_uuid, config_uuid, tag_dict, tag_history, overwrite=False): + def apply_import_config(self, job_uuid, config_uuid, data_asset_type, data_asset_region, tag_dict, tag_history, overwrite=False): + print(f'apply_import_config: {job_uuid}, {config_uuid}, {data_asset_type}, {data_asset_region}, {tag_dict}, {tag_history}') + op_status = constants.SUCCESS if 'project' in tag_dict: @@ -1228,48 +1226,71 @@ def apply_import_config(self, job_uuid, config_uuid, tag_dict, tag_history, over op_status = constants.ERROR return op_status - if ('dataset' not in tag_dict): - if ('entry_group' not in tag_dict or 'fileset' not in tag_dict): - msg = "Error: could not find required fields in CSV. Expecting either dataset or entry_group in CSV" + if data_asset_type == constants.BQ_ASSET: + if 'dataset' not in tag_dict: + msg = "Error: could not find the required dataset field in the CSV" log_error_tag_dict(msg, None, job_uuid, tag_dict) op_status = constants.ERROR return op_status + else: + entry_type = constants.DATASET + dataset = tag_dict['dataset'] + + if 'table' in tag_dict: + table = tag_dict['table'] + entry_type = constants.BQ_TABLE - if 'dataset' in tag_dict: - dataset = tag_dict['dataset'] - entry_type = constants.DATASET - - if 'table' in tag_dict: - table = tag_dict['table'] - entry_type = constants.TABLE - - if 'entry_group' in tag_dict: - entry_group = tag_dict['entry_group'] - entry_type = constants.FILESET - - if 'fileset' in tag_dict: - fileset = tag_dict['fileset'] + if data_asset_type == constants.FILESET_ASSET: + if 'entry_group' not in tag_dict or 'fileset' not in tag_dict: + msg = "Error: could not find the required fields in the CSV. Missing entry_group or fileset or both" + log_error_tag_dict(msg, None, job_uuid, tag_dict) + op_status = constants.ERROR + return op_status else: - msg = "Error: could not find required fields in CSV. Expecting entry_group and fileset in CSV" + entry_type = constants.FILESET + entry_group = tag_dict['entry_group'] + fileset = tag_dict['fileset'] + + if data_asset_type == constants.SPAN_ASSET: + if 'instance' not in tag_dict or 'database' not in tag_dict or 'table' not in tag_dict: + msg = "Error: could not find the required fields in the CSV. The required fields for Spanner are instance, database, and table" log_error_tag_dict(msg, None, job_uuid, tag_dict) op_status = constants.ERROR return op_status - + else: + entry_type = constants.SPAN_TABLE + instance = tag_dict['instance'] + database = tag_dict['database'] + + if 'schema' in tag_dict: + schema = tag_dict['schema'] + table = tag_dict['table'] + table = f"`{schema}.{table}`" + else: + table = tag_dict['table'] + if entry_type == constants.DATASET: - resource = '//bigquery.googleapis.com/projects/{}/datasets/{}'.format(project, dataset) + resource = f'//bigquery.googleapis.com/projects/{project}/datasets/{dataset}' request = datacatalog.LookupEntryRequest() request.linked_resource=resource - if entry_type == constants.TABLE: - resource = '//bigquery.googleapis.com/projects/{}/datasets/{}/tables/{}'.format(project, dataset, table) + if entry_type == constants.BQ_TABLE: + resource = f'//bigquery.googleapis.com/projects/{project}/datasets/{dataset}/tables/{table}' request = datacatalog.LookupEntryRequest() request.linked_resource=resource if entry_type == constants.FILESET: - resource = '//datacatalog.googleapis.com/projects/{}/locations/{}/entryGroups/{}/entries/{}'.format(project, CLOUD_STORAGE_REGION, entry_group, fileset) + resource = f'//datacatalog.googleapis.com/projects/{project}/locations/{data_asset_region}/entryGroups/{entry_group}/entries/{fileset}' request = datacatalog.LookupEntryRequest() request.linked_resource=resource - + + if entry_type == constants.SPAN_TABLE: + resource = f'spanner:{project}.regional-{data_asset_region}.{instance}.{database}.{table}' + request = datacatalog.LookupEntryRequest() + request.fully_qualified_name=resource + request.project=project + request.location=data_asset_region + try: entry = self.client.lookup_entry(request) except Exception as e: @@ -1278,6 +1299,16 @@ def apply_import_config(self, job_uuid, config_uuid, tag_dict, tag_history, over op_status = constants.ERROR return op_status + # format uri for storing in tag history table + if data_asset_type == constants.BQ_ASSET: + uri = entry.linked_resource.replace('//bigquery.googleapis.com/projects/', '') + if data_asset_type == constants.SPAN_ASSET: + uri = entry.linked_resource.replace('///projects/', '').replace('instances', 'instance').replace('databases', 'database') + '/table/' + table.replace('`', '') + if data_asset_type == constants.FILESET_ASSET: + uri = entry.linked_resource.replace('//datacatalog.googleapis.com/projects/', '').replace('locations', 'location').replace('entryGroups', 'entry_group').replace('entries', 'entry') + + target_column = None + if 'column' in tag_dict: target_column = tag_dict['column'] @@ -1289,11 +1320,8 @@ def apply_import_config(self, job_uuid, config_uuid, tag_dict, tag_history, over op_status = constants.ERROR return op_status - uri = entry.linked_resource.replace('//bigquery.googleapis.com/projects/', '') + '/column/' + target_column - else: - target_column = None - uri = entry.linked_resource.replace('//bigquery.googleapis.com/projects/', '') - + uri = uri + '/column/' + target_column + try: tag_exists, tag_id = self.check_if_tag_exists(parent=entry.name, column=target_column) @@ -1315,7 +1343,8 @@ def apply_import_config(self, job_uuid, config_uuid, tag_dict, tag_history, over for field_name in tag_dict: if field_name == 'project' or field_name == 'dataset' or field_name == 'table' or \ - field_name == 'column' or field_name == 'entry_group' or field_name == 'fileset': + field_name == 'column' or field_name == 'entry_group' or field_name == 'fileset' or \ + field_name == 'instance' or field_name == 'database' or field_name == 'schema': continue field_type = None @@ -2163,18 +2192,19 @@ def update_tag_subset(self, template_id, template_project, template_region, entr target_scopes=SCOPES, lifetime=1200) - template_id = 'data_governance' + template_id = 'data_sensitivity' template_project = 'tag-engine-run' template_region = 'us-central1' - fields = [{'field_type': 'enum', 'field_id': 'data_domain', 'enum_values': ['ENG', 'PRODUCT', 'OPERATIONS', 'LOGISTICS', 'FINANCE', 'HR', 'LEGAL', 'MARKETING', 'SALES', 'CONSUMER', 'GOVERNMENT'], 'is_required': True, 'display_name': 'Data Domain', 'order': 10, 'query_expression': "select 'LOGISTICS'"}, {'field_type': 'enum', 'field_id': 'broad_data_category', 'enum_values': ['CONTENT', 'METADATA', 'CONFIGURATION'], 'is_required': True, 'display_name': 'Broad Data Category', 'order': 9, 'query_expression': "select 'CONTENT'"}] - columns_query = "select 'unique_key', 'created_date', 'incident.city', 'incident.county'" - uri = 'tag-engine-run/datasets/cities_311/tables/austin_311_service_requests' - job_uuid = '3291b93804d211ef9d2549bd5e1feaa2' - config_uuid = '6fb997443e0311ef9f5242004e494300' - template_uuid = 'fa8aa3007f1711eebe2b4f918967d564' - tag_history = False + job_uuid = 'df0ddb3e477511ef95dc42004e494300' + config_uuid = '3404d03a477a11ef995442004e494300' + data_asset_type = 'fileset' + data_asset_region = 'us-central1' + tag_dict = {'project': 'tag-engine-run', 'entry_group': 'sakila_eg', 'fileset': 'staff', 'column': 'first_name', 'sensitive_field': 'TRUE', 'sensitive_type': 'Sensitive_Personal_Identifiable_Information'} + tag_history = True + overwrite = True dcu = DataCatalogController(credentials, target_service_account, 'scohen@gcp.solutions', template_id, template_project, template_region) - dcu.apply_dynamic_column_config(fields, columns_query, uri, job_uuid, config_uuid, template_uuid, tag_history, batch_mode=False) - \ No newline at end of file + dcu.apply_import_config(job_uuid, config_uuid, data_asset_type, data_asset_region, tag_dict, tag_history, overwrite) + + \ No newline at end of file diff --git a/README.md b/README.md index f48c400..9c716bb 100644 --- a/README.md +++ b/README.md @@ -21,16 +21,13 @@ Follow the steps below to deploy Tag Engine with Terraform. Alternatively, you may choose to deploy Tag Engine with [gcloud commands](https://github.com/GoogleCloudPlatform/datacatalog-tag-engine/tree/cloud-run/docs/manual_deployment.md) instead of running the Terraform. -
- -1. Create (or designate) two service accounts:

- +1. Create (or designate) two service accounts:
- A service account that runs the Tag Engine Cloud Run services (both API and UI). This account is referred to as `TAG_ENGINE_SA`. - A service account that sources the metadata from BigQuery or Cloud Storage, and then performs the tagging in Data Catalog. This account is referred to as `TAG_CREATOR_SA`.
See [Creating Service Accounts](https://cloud.google.com/iam/docs/service-accounts-create) for more details. - Why do we need two different service accounts? The key benefit of decoupling them is to allow individual teams to have their own Tag Creator SA. This account has permissions to read specific data assets in BigQuery and Cloud Storage. For example, the Finance team can have a different Tag Creator SA from the Finance team if they own different data assets. The Tag Engine admin then links each invoker account (either service or user) to a specific Tag Creator SA. Invoker accounts call Tag Engine through either the API or UI. This allows the Tag Engine admin to run and maintain a single instance of Tag Engine, as opposed to one instance per team.

+ Why do we need two different service accounts? The key benefit of decoupling them is to allow individual teams to have their own Tag Creator SA. This account has permissions to read specific data assets in BigQuery and Cloud Storage. For example, the Finance team can have a different Tag Creator SA from the Finance team if they own different data assets. The Tag Engine admin then links each invoker account (either service or user) to a specific Tag Creator SA. Invoker accounts call Tag Engine through either the API or UI. This allows the Tag Engine admin to run and maintain a single instance of Tag Engine, as opposed to one instance per team.
2. Create an OAuth client: @@ -42,14 +39,13 @@ Alternatively, you may choose to deploy Tag Engine with [gcloud commands](https: Name: tag-engine-oauth
Authorized redirects URIs: Leave this field blank for now. Click Create
- Download the credentials as `te_client_secret.json` and place the file in the root of the `datacatalog-tag-engine` directory

+ Download the credentials as `te_client_secret.json` and place the file in the root of the `datacatalog-tag-engine` directory
Note: The client secret file is required for establishing the authorization flow from the UI. 3. Make a copy of `datacatalog-tag-engine/tagengine.ini.tpl` naming the new copy `datacatalog-tag-engine/tagengine.ini`. 4. Open `datacatalog-tag-engine/tagengine.ini` and set the following variables in this file: - ``` TAG_ENGINE_SA TAG_CREATOR_SA @@ -59,6 +55,8 @@ Alternatively, you may choose to deploy Tag Engine with [gcloud commands](https: FIRESTORE_REGION FIRESTORE_DATABASE BIGQUERY_REGION + FILESET_REGION + SPANNER_REGION ENABLE_AUTH OAUTH_CLIENT_CREDENTIALS ENABLE_TAG_HISTORY @@ -98,8 +96,6 @@ Alternatively, you may choose to deploy Tag Engine with [gcloud commands](https: When the Terraform finishes running, it should output two URIs. One for the API service (which looks like this https://tag-engine-api-xxxxxxxxxxxxx.a.run.app) and another for the UI service (which looks like this https://tag-engine-ui-xxxxxxxxxxxxx.a.run.app).

- -

### Part 2: Testing your Tag Engine API setup 1. Create the sample `data_governance` tag template: @@ -111,7 +107,6 @@ Alternatively, you may choose to deploy Tag Engine with [gcloud commands](https: ``` The previous command creates the `data_governance` tag template in the `$DATA_CATALOG_PROJECT` and `$DATA_CATALOG_REGION`. -
2. Grant permissions to invoker account (user or service) @@ -153,8 +148,6 @@ Alternatively, you may choose to deploy Tag Engine with [gcloud commands](https: Very important: Tag Engine requires that these roles be directly attached to your invoker account(s). -
- 3. Generate an IAM token (aka Bearer token) for authenticating to Tag Engine: If you are invoking Tag Engine with a user account, run `gcloud auth login` and authenticate with your user account. @@ -163,7 +156,6 @@ Alternatively, you may choose to deploy Tag Engine with [gcloud commands](https: ``` export IAM_TOKEN=$(gcloud auth print-identity-token) ``` -
4. Create your first Tag Engine configuration: @@ -186,7 +178,6 @@ Alternatively, you may choose to deploy Tag Engine with [gcloud commands](https: ``` {"config_type":"DYNAMIC_TAG_TABLE","config_uuid":"facb59187f1711eebe2b4f918967d564"} ``` -
5. Run your first job: @@ -218,7 +209,6 @@ Alternatively, you may choose to deploy Tag Engine with [gcloud commands](https: The job metadata parameter gets written into a BigQuery table that is associated with the job_uuid. -
6. View your job status: @@ -281,39 +271,36 @@ Alternatively, you may choose to deploy Tag Engine with [gcloud commands](https: 10. Create a tag configuration by selecting one of the options from this page. If you encounter a 500 error, open the Cloud Run logs for `tag-engine-ui` to troubleshoot. -
### Part 4: Troubleshooting There is a known issue with the Terraform. If you encounter the error `The requested URL was not found on this server` when you try to create a configuration from the API, the issue is that the container didn't build correctly. Try to rebuild and redeploy the Cloud Run API service with this command: - ``` - cd datacatalog-tag-engine - gcloud run deploy tag-engine-api \ - --source . \ - --platform managed \ - --region $TAG_ENGINE_REGION \ - --no-allow-unauthenticated \ - --ingress=all \ - --memory=4G \ - --timeout=60m \ - --service-account=$TAG_ENGINE_SA - ``` +``` + cd datacatalog-tag-engine + gcloud run deploy tag-engine-api \ + --source . \ + --platform managed \ + --region $TAG_ENGINE_REGION \ + --no-allow-unauthenticated \ + --ingress=all \ + --memory=4G \ + --timeout=60m \ + --service-account=$TAG_ENGINE_SA +``` Then, call the `ping` endpoint as follows: - ``` - curl $TAG_ENGINE_URL/ping -H "Authorization: Bearer $IAM_TOKEN" - ``` +``` + curl $TAG_ENGINE_URL/ping -H "Authorization: Bearer $IAM_TOKEN" +``` You should see the following response: - ``` - Tag Engine is alive - ``` - -
+``` + Tag Engine is alive +``` ### Part 5: Local Development and Testing @@ -345,8 +332,6 @@ pytest Open `examples/unit_test.sh` and go through the different methods for interracting with Tag Engine, including `configure_tag_history`, `create_static_asset_config`, `create_dynamic_column_config`, etc.
-
- 2. Explore the script samples: There are multiple test scripts in Python in the `examples/scripts` folder. These are intended to help you get started with the Tag Engine API. @@ -368,8 +353,6 @@ pytest python purge_inactive_configs.py ``` -
- 3. Explore sample workflows: The `extensions/orchestration/` folder contains some sample workflows implemented in Cloud Workflow. The `trigger_tag_export.yaml` and `trigger_tag_export_import.yaml` show how to orchestrate Tag Engine jobs. To run the workflows, enable the Cloud Workflows API (`workflows.googleapis.com`) and then follow these steps: diff --git a/TagEngineStoreHandler.py b/TagEngineStoreHandler.py index d16940e..8f1a232 100644 --- a/TagEngineStoreHandler.py +++ b/TagEngineStoreHandler.py @@ -964,11 +964,12 @@ def write_tag_restore_config(self, service_account, source_template_uuid, source def write_tag_import_config(self, service_account, template_uuid, template_id, template_project, template_region, \ - metadata_import_location, tag_history, overwrite=True): + data_asset_type, data_asset_region, metadata_import_location, tag_history, \ + overwrite=True): print('** write_tag_import_config **') - # check to see if this config already exists and if so, return it + # check if this config already exists and if so, return it coll_ref = self.db.collection('import_configs') query = coll_ref.where(filter=FieldFilter('template_uuid', '==', template_uuid)) query = query.where(filter=FieldFilter('metadata_import_location', '==', metadata_import_location)) @@ -986,7 +987,7 @@ def write_tag_import_config(self, service_account, template_uuid, template_id, t config_uuid = uuid.uuid1().hex doc_ref = coll_ref.document(config_uuid) - doc_ref.set({ + config_dict = { 'config_uuid': config_uuid, 'config_type': 'TAG_IMPORT', 'config_status': 'ACTIVE', @@ -999,20 +1000,39 @@ def write_tag_import_config(self, service_account, template_uuid, template_id, t 'tag_history': tag_history, 'overwrite': overwrite, 'service_account': service_account - }) + } + + if data_asset_type != None: + config_dict['data_asset_type'] = data_asset_type + + if data_asset_region != None: + config_dict['data_asset_region'] = data_asset_region + + doc_ref.set(config_dict) return config_uuid - def update_tag_import_config(self, config_uuid, metadata_import_location): + def update_tag_import_config(self, config_uuid, data_asset_type, data_asset_region, metadata_import_location): print('** update_tag_import_config **') success = True config_ref = self.db.collection("import_configs").document(config_uuid) + updated_config = {} + + if data_asset_type != None: + updated_config['data_asset_type'] = data_asset_type + + if data_asset_region != None: + updated_config['data_asset_region'] = data_asset_region + + if metadata_import_location != None: + updated_config['metadata_import_location'] = metadata_import_location + try: - config_ref.update({"metadata_import_location": metadata_import_location}) + config_ref.update(updated_config) except Exception as e: msg = 'Error updating config {}'.format(config_uuid) diff --git a/constants.py b/constants.py index 6b4899b..ff9016f 100644 --- a/constants.py +++ b/constants.py @@ -14,16 +14,21 @@ SUCCESS = 0 ERROR = -1 -TABLE = 'TABLE' -DATASET = 'DATASET' -FILESET = 'FILESET' +BQ_ASSET = 'bigquery' +FILESET_ASSET = 'fileset' +SPAN_ASSET = 'spanner' +DATASET = 1 +BQ_TABLE = 2 +SPAN_TABLE = 3 +FILESET = 4 TAG_CREATED = 'TAG_CREATED' TAG_UPDATED = 'TAG_UPDATED' BQ_DATASET_TAG = 1 BQ_TABLE_TAG = 2 BQ_COLUMN_TAG = 3 BQ_RES = 'BQ' -GCS_RES = 'GCS' +GCS_RES = 'GCS' +SPAN_RES = 'SPAN' # future, not yet implemented PUBSUB_RES = 'PUBSUB' # future, not yet implemented STATIC_TAG = 1 DYNAMIC_TAG = 2 \ No newline at end of file diff --git a/deploy/cloud_run.tf b/deploy/cloud_run.tf index 3ae9ceb..2f634b5 100644 --- a/deploy/cloud_run.tf +++ b/deploy/cloud_run.tf @@ -40,9 +40,9 @@ EOF } depends_on = [google_artifact_registry_repository.image_registry, google_project_service.tag_engine_project, - google_project_iam_binding.storage_object_get, - google_project_iam_binding.log_writer, - google_project_iam_binding.repo_admin] + google_project_iam_member.storage_object_get, + google_project_iam_member.log_writer, + google_project_iam_member.repo_admin] } resource "google_cloud_run_v2_service" "api_service" { @@ -110,7 +110,7 @@ ${self.triggers.full_image_path} \ EOF } - depends_on = [google_artifact_registry_repository.image_registry, google_project_service.tag_engine_project, google_project_iam_binding.storage_object_get, google_project_iam_binding.log_writer, google_project_iam_binding.repo_admin] + depends_on = [google_artifact_registry_repository.image_registry, google_project_service.tag_engine_project, google_project_iam_member.storage_object_get, google_project_iam_member.log_writer, google_project_iam_member.repo_admin] } resource "google_cloud_run_v2_service" "ui_service" { diff --git a/deploy/firestore.tf b/deploy/firestore.tf index 5c41091..aca9dfc 100644 --- a/deploy/firestore.tf +++ b/deploy/firestore.tf @@ -40,12 +40,12 @@ resource "null_resource" "firestore_indexes" { firestore_db = google_firestore_database.create.id } provisioner "local-exec" { - command = "python create_indexes.py create ${var.firestore_project} ${var.firestore_database}" + command = "python3 create_indexes.py create ${var.firestore_project} ${var.firestore_database}" } provisioner "local-exec" { when = destroy - command = "python create_indexes.py destroy ${self.triggers.firestore_project} ${self.triggers.firestore_database}" + command = "python3 create_indexes.py destroy ${self.triggers.firestore_project} ${self.triggers.firestore_database}" } depends_on = [google_firestore_database.create, null_resource.install_packages] diff --git a/deploy/iam_bindings.tf b/deploy/iam_bindings.tf index 16bace3..2e841f8 100644 --- a/deploy/iam_bindings.tf +++ b/deploy/iam_bindings.tf @@ -2,155 +2,155 @@ # Create the two custom IAM roles (needed by the SENSITIVE_COLUMN_CONFIG type) # ************************************************** # -resource "google_project_iam_custom_role" "bigquery_schema_update" { - project = var.bigquery_project - role_id = "BigQuerySchemaUpdate" - title = "BigQuery Schema Update" - description = "Custom role for updating the schema of a BigQuery table with policy tags" - permissions = ["bigquery.tables.setCategory"] - depends_on = [google_project_service.tag_engine_project] +resource "google_project_iam_custom_role" "bigquery_schema_update" { + project = var.bigquery_project + role_id = "BigQuerySchemaUpdate" + title = "BigQuery Schema Update" + description = "Custom role for updating the schema of a BigQuery table with policy tags" + permissions = ["bigquery.tables.setCategory"] + depends_on = [google_project_service.tag_engine_project] } -resource "google_project_iam_custom_role" "policy_tag_reader" { - project = var.tag_engine_project - role_id = "PolicyTagReader" - title = "BigQuery Policy Tag Reader" - description = "Read Policy Tag Taxonomy" - permissions = ["datacatalog.taxonomies.get","datacatalog.taxonomies.list"] - depends_on = [google_project_service.tag_engine_project] +resource "google_project_iam_custom_role" "policy_tag_reader" { + project = var.data_catalog_project + role_id = "PolicyTagReader" + title = "BigQuery Policy Tag Reader" + description = "Read Policy Tag Taxonomy" + permissions = ["datacatalog.taxonomies.get", "datacatalog.taxonomies.list"] + depends_on = [google_project_service.tag_engine_project] } # ************************************************** # # Create the project level policy bindings for tag_engine_sa # ************************************************** # -resource "google_project_iam_binding" "enqueuer" { - project = var.tag_engine_project - role = "roles/cloudtasks.enqueuer" - members = ["serviceAccount:${var.tag_engine_sa}"] +resource "google_project_iam_member" "enqueuer" { + project = var.tag_engine_project + role = "roles/cloudtasks.enqueuer" + member = "serviceAccount:${var.tag_engine_sa}" depends_on = [google_project_service.tag_engine_project] } -resource "google_project_iam_binding" "taskRunner" { - project = var.tag_engine_project - role = "roles/cloudtasks.taskRunner" - members = ["serviceAccount:${var.tag_engine_sa}"] +resource "google_project_iam_member" "taskRunner" { + project = var.tag_engine_project + role = "roles/cloudtasks.taskRunner" + member = "serviceAccount:${var.tag_engine_sa}" depends_on = [google_project_service.tag_engine_project] } - -resource "google_project_iam_binding" "user" { - project = var.firestore_project - role = "roles/datastore.user" - members = ["serviceAccount:${var.tag_engine_sa}"] + +resource "google_project_iam_member" "user" { + project = var.firestore_project + role = "roles/datastore.user" + member = "serviceAccount:${var.tag_engine_sa}" depends_on = [google_project_service.firestore_project] } -resource "google_project_iam_binding" "indexAdmin" { - project = var.firestore_project - role = "roles/datastore.indexAdmin" - members = ["serviceAccount:${var.tag_engine_sa}"] +resource "google_project_iam_member" "indexAdmin" { + project = var.firestore_project + role = "roles/datastore.indexAdmin" + member = "serviceAccount:${var.tag_engine_sa}" depends_on = [google_project_service.firestore_project] } -resource "google_project_iam_binding" "invoker" { - project = var.tag_engine_project - role = "roles/run.invoker" - members = ["serviceAccount:${var.tag_engine_sa}"] +resource "google_project_iam_member" "invoker" { + project = var.tag_engine_project + role = "roles/run.invoker" + member = "serviceAccount:${var.tag_engine_sa}" depends_on = [google_project_service.tag_engine_project] } -resource "google_project_iam_binding" "storage_object_get" { - project = var.tag_engine_project - role = "roles/storage.objectViewer" - members = ["serviceAccount:${var.tag_engine_sa}"] +resource "google_project_iam_member" "storage_object_get" { + project = var.tag_engine_project + role = "roles/storage.objectViewer" + member = "serviceAccount:${var.tag_engine_sa}" depends_on = [google_project_service.tag_engine_project] } -resource "google_project_iam_binding" "log_writer" { - project = var.tag_engine_project - role = "roles/logging.logWriter" - members = ["serviceAccount:${var.tag_engine_sa}"] +resource "google_project_iam_member" "log_writer" { + project = var.tag_engine_project + role = "roles/logging.logWriter" + member = "serviceAccount:${var.tag_engine_sa}" depends_on = [google_project_service.tag_engine_project] } -resource "google_project_iam_binding" "repo_admin" { - project = var.tag_engine_project - role = "roles/artifactregistry.repoAdmin" - members = ["serviceAccount:${var.tag_engine_sa}"] +resource "google_project_iam_member" "repo_admin" { + project = var.tag_engine_project + role = "roles/artifactregistry.repoAdmin" + member = "serviceAccount:${var.tag_engine_sa}" depends_on = [google_project_service.tag_engine_project] } - + # ************************************************************ # # Create the project level policy bindings for tag_creator_sa # ************************************************************ # -resource "google_project_iam_binding" "tagEditor" { - project = var.tag_engine_project - role = "roles/datacatalog.tagEditor" - members = ["serviceAccount:${var.tag_creator_sa}"] +resource "google_project_iam_member" "tagEditor" { + project = var.tag_engine_project + role = "roles/datacatalog.tagEditor" + member = "serviceAccount:${var.tag_creator_sa}" depends_on = [google_project_service.tag_engine_project] } - -resource "google_project_iam_binding" "tagTemplateUser" { - project = var.tag_engine_project - role = "roles/datacatalog.tagTemplateUser" - members = ["serviceAccount:${var.tag_creator_sa}"] + +resource "google_project_iam_member" "tagTemplateUser" { + project = var.tag_engine_project + role = "roles/datacatalog.tagTemplateUser" + member = "serviceAccount:${var.tag_creator_sa}" depends_on = [google_project_service.tag_engine_project] } -resource "google_project_iam_binding" "tagTemplateViewer" { - project = var.tag_engine_project - role = "roles/datacatalog.tagTemplateViewer" - members = ["serviceAccount:${var.tag_creator_sa}"] +resource "google_project_iam_member" "tagTemplateViewer" { + project = var.tag_engine_project + role = "roles/datacatalog.tagTemplateViewer" + member = "serviceAccount:${var.tag_creator_sa}" depends_on = [google_project_service.tag_engine_project] } -resource "google_project_iam_binding" "viewer" { - project = var.data_catalog_project - role = "roles/datacatalog.viewer" - members = ["serviceAccount:${var.tag_creator_sa}"] +resource "google_project_iam_member" "viewer" { + project = var.data_catalog_project + role = "roles/datacatalog.viewer" + member = "serviceAccount:${var.tag_creator_sa}" depends_on = [google_project_service.tag_engine_project] } -resource "google_project_iam_binding" "dataEditor" { - project = var.bigquery_project - role = "roles/bigquery.dataEditor" - members = ["serviceAccount:${var.tag_creator_sa}"] +resource "google_project_iam_member" "dataEditor" { + project = var.bigquery_project + role = "roles/bigquery.dataEditor" + member = "serviceAccount:${var.tag_creator_sa}" depends_on = [google_project_service.bigquery_project] } -resource "google_project_iam_binding" "jobUser" { - project = var.tag_engine_project - role = "roles/bigquery.jobUser" - members = ["serviceAccount:${var.tag_creator_sa}"] +resource "google_project_iam_member" "jobUser" { + project = var.tag_engine_project + role = "roles/bigquery.jobUser" + member = "serviceAccount:${var.tag_creator_sa}" depends_on = [google_project_service.tag_engine_project] } - -resource "google_project_iam_binding" "metadataViewer" { - project = var.bigquery_project - role = "roles/bigquery.metadataViewer" - members = ["serviceAccount:${var.tag_creator_sa}"] + +resource "google_project_iam_member" "metadataViewer" { + project = var.bigquery_project + role = "roles/bigquery.metadataViewer" + member = "serviceAccount:${var.tag_creator_sa}" depends_on = [google_project_service.bigquery_project] } -resource "google_project_iam_binding" "loggingViewer" { - project = var.tag_engine_project - role = "roles/logging.viewer" - members = ["serviceAccount:${var.tag_creator_sa}"] +resource "google_project_iam_member" "loggingViewer" { + project = var.tag_engine_project + role = "roles/logging.viewer" + member = "serviceAccount:${var.tag_creator_sa}" depends_on = [google_project_service.tag_engine_project] } -resource "google_project_iam_binding" "BigQuerySchemaUpdate" { - project = var.bigquery_project - role = "projects/${var.bigquery_project}/roles/BigQuerySchemaUpdate" - members = ["serviceAccount:${var.tag_creator_sa}"] +resource "google_project_iam_member" "BigQuerySchemaUpdate" { + project = var.bigquery_project + role = "projects/${var.bigquery_project}/roles/BigQuerySchemaUpdate" + member = "serviceAccount:${var.tag_creator_sa}" depends_on = [google_project_iam_custom_role.bigquery_schema_update] } -resource "google_project_iam_binding" "PolicyTagReader" { - project = var.data_catalog_project - role = "projects/${var.tag_engine_project}/roles/PolicyTagReader" - members = ["serviceAccount:${var.tag_creator_sa}"] +resource "google_project_iam_member" "PolicyTagReader" { + project = var.data_catalog_project + role = "projects/${var.data_catalog_project}/roles/PolicyTagReader" + member = "serviceAccount:${var.tag_creator_sa}" depends_on = [google_project_iam_custom_role.policy_tag_reader] } @@ -158,40 +158,32 @@ resource "google_project_iam_binding" "PolicyTagReader" { # Create the service account policy bindings for tag_engine_sa # ************************************************************ # -resource "google_service_account_iam_binding" "serviceAccountUser_tag_engine_sa" { +resource "google_service_account_iam_member" "serviceAccountUser_tag_engine_sa" { service_account_id = "projects/${var.tag_engine_project}/serviceAccounts/${var.tag_engine_sa}" - role = "roles/iam.serviceAccountUser" - members = [ - "serviceAccount:${var.tag_engine_sa}", - ] - depends_on = [google_project_service.tag_engine_project] + role = "roles/iam.serviceAccountUser" + member = "serviceAccount:${var.tag_engine_sa}" + depends_on = [google_project_service.tag_engine_project] } -resource "google_service_account_iam_binding" "serviceAccountUser_tag_creator_sa" { +resource "google_service_account_iam_member" "serviceAccountUser_tag_creator_sa" { service_account_id = "projects/${var.tag_engine_project}/serviceAccounts/${var.tag_creator_sa}" - role = "roles/iam.serviceAccountUser" - members = [ - "serviceAccount:${var.tag_engine_sa}", - ] - depends_on = [google_project_service.tag_engine_project] + role = "roles/iam.serviceAccountUser" + member = "serviceAccount:${var.tag_engine_sa}" + depends_on = [google_project_service.tag_engine_project] } -resource "google_service_account_iam_binding" "serviceAccountViewer_tag_creator_sa" { +resource "google_service_account_iam_member" "serviceAccountViewer_tag_creator_sa" { service_account_id = "projects/${var.tag_engine_project}/serviceAccounts/${var.tag_creator_sa}" - role = "roles/iam.serviceAccountViewer" - members = [ - "serviceAccount:${var.tag_engine_sa}", - ] - depends_on = [google_project_service.tag_engine_project] + role = "roles/iam.serviceAccountViewer" + member = "serviceAccount:${var.tag_engine_sa}" + depends_on = [google_project_service.tag_engine_project] } -resource "google_service_account_iam_binding" "serviceAccountTokenCreator_tag_creator_sa" { +resource "google_service_account_iam_member" "serviceAccountTokenCreator_tag_creator_sa" { service_account_id = "projects/${var.tag_engine_project}/serviceAccounts/${var.tag_creator_sa}" - role = "roles/iam.serviceAccountTokenCreator" - members = [ - "serviceAccount:${var.tag_engine_sa}", - ] - depends_on = [google_project_service.tag_engine_project] + role = "roles/iam.serviceAccountTokenCreator" + member = "serviceAccount:${var.tag_engine_sa}" + depends_on = [google_project_service.tag_engine_project] } # ************************************************************ # @@ -204,7 +196,7 @@ module "storage_bucket-iam-bindings" { mode = "additive" bindings = { - "roles/storage.legacyBucketReader" = [ + "roles/storage.legacyBucketReader" = [ "serviceAccount:${var.tag_creator_sa}", ] } diff --git a/docs/manual_deployment.md b/docs/manual_deployment.md index 46927b9..1edc7ec 100644 --- a/docs/manual_deployment.md +++ b/docs/manual_deployment.md @@ -41,8 +41,7 @@ If multiple teams want to share a single instance of Tag Engine and they own dif - Create an OAuth client ID from API Credentials. Set the `Authorized redirect URI` to `https://[TAG_ENGINE_DOMAIN]/oauth2callback`, where [TAG_ENGINE_DOMAIN] is your actual domain name (e.g. `https://tagengine.app/oauth2callback`). If you are planning to use the Cloud Run service URL, you can leave this field empty for now, and populate it at the end once you know your Cloud Run service URL for the Tag Engine UI. - - Download the OAuth client secret and save the json file to the root of your local Tag Engine repository as `te_client_secret.json`.

- + - Download the OAuth client secret and save the json file to the root of your local Tag Engine repository as `te_client_secret.json`. 4. Open `tagengine.ini` and set the following variables in this file. @@ -55,6 +54,8 @@ If multiple teams want to share a single instance of Tag Engine and they own dif FIRESTORE_REGION FIRESTORE_DATABASE BIGQUERY_REGION + FILESET_REGION + SPANNER_REGION OAUTH_CLIENT_CREDENTIALS ENABLE_AUTH TAG_HISTORY_PROJECT @@ -70,7 +71,7 @@ If multiple teams want to share a single instance of Tag Engine and they own dif - The variable `ENABLE_AUTH` is a boolean. When set to `True`, Tag Engine verifies that the end user is authorized to use `TAG_CREATOR_SA` prior to processing their tag requests. This is the recommended value. - - The `tagengine.ini` file also has two additional variables, `INJECTOR_QUEUE` and `WORK_QUEUE`. These determine the names of the cloud tasks queues. You do not need to change them. The queues are created in step 6 of this setup.

+ - The `tagengine.ini` file also has two additional variables, `INJECTOR_QUEUE` and `WORK_QUEUE`. These determine the names of the cloud tasks queues. You do not need to change them. The queues are created in step 6 of this setup. 5. Enable the required Google Cloud APIs: @@ -92,21 +93,19 @@ If multiple teams want to share a single instance of Tag Engine and they own dif ``` gcloud config set project $DATA_CATALOG_PROJECT gcloud services enable datacatalog.googleapis.com - ``` -
+ ``` 6. Create the two cloud task queues. The first queue is used to queue the entire job while the second is used to queue individual work items. If a task fails, a second one will get created due to `max-attempts=2`: ``` gcloud config set project $TAG_ENGINE_PROJECT - + gcloud tasks queues create tag-engine-injector-queue \ --location=$TAG_ENGINE_REGION --max-attempts=2 --max-concurrent-dispatches=100 gcloud tasks queues create tag-engine-work-queue \ --location=$TAG_ENGINE_REGION --max-attempts=2 --max-concurrent-dispatches=100 ``` -
7. Create the Firestore database and indexes. @@ -115,7 +114,7 @@ If multiple teams want to share a single instance of Tag Engine and they own dif gcloud firestore databases create --database=$FIRESTORE_DATABASE --project=$FIRESTORE_PROJECT --location=$FIRESTORE_REGION ``` - If you're not able to create the Firestore database in your preferred region, consult [the available](https://cloud.google.com/firestore/docs/locations) regions and choose the nearest region to what you set `TAG_ENGINE_REGION`.

+ If you're not able to create the Firestore database in your preferred region, consult [the available](https://cloud.google.com/firestore/docs/locations) regions and choose the nearest region to what you set `TAG_ENGINE_REGION`. ``` pip install google-cloud-firestore @@ -124,32 +123,10 @@ If multiple teams want to share a single instance of Tag Engine and they own dif cd .. ``` - Creating the indexes can take a few minutes. As the indexes get created, you will see them show up in the Firestore console. There should be about 36 indexes in total.

- - -
+ Creating the indexes can take a few minutes. As the indexes get created, you will see them show up in the Firestore console. There should be about 36 indexes in total. -8. Create two custom IAM roles which are required by `SENSITIVE_COLUMN_CONFIG`. If you are not planning to use this configuration type, you can skip this step. This configuration type creates policy tags on sensitive columns: - - ``` - gcloud config set project $BIGQUERY_PROJECT - gcloud iam roles create BigQuerySchemaUpdate \ - --project $BIGQUERY_PROJECT \ - --title BigQuerySchemaUpdate \ - --description "Update table schema with policy tags" \ - --permissions bigquery.tables.setCategory - - gcloud config set project $DATA_CATALOG_PROJECT - gcloud iam roles create PolicyTagReader \ - --project $DATA_CATALOG_PROJECT \ - --title PolicyTagReader \ - --description "Read Policy Tag Taxonomy" \ - --permissions datacatalog.taxonomies.get,datacatalog.taxonomies.list - ``` -
- - -9. Grant the required IAM roles to the service accounts `$TAG_ENGINE_SA` and `$TAG_CREATOR_SA`: + +8. Grant the required IAM roles to the service accounts `$TAG_ENGINE_SA` and `$TAG_CREATOR_SA`: ``` gcloud projects add-iam-policy-binding $TAG_ENGINE_PROJECT \ @@ -197,24 +174,16 @@ If multiple teams want to share a single instance of Tag Engine and they own dif --member=serviceAccount:$TAG_CREATOR_SA \ --role=roles/datacatalog.viewer - gcloud projects add-iam-policy-binding $DATA_CATALOG_PROJECT \ - --member=serviceAccount:$TAG_CREATOR_SA \ - --role=projects/$DATA_CATALOG_PROJECT/roles/PolicyTagReader - gcloud projects add-iam-policy-binding $BIGQUERY_PROJECT \ --member=serviceAccount:$TAG_CREATOR_SA \ --role=roles/bigquery.dataEditor gcloud projects add-iam-policy-binding $BIGQUERY_PROJECT \ --member=serviceAccount:$TAG_CREATOR_SA \ - --role=roles/bigquery.metadataViewer - - gcloud projects add-iam-policy-binding $BIGQUERY_PROJECT \ - --member=serviceAccount:$TAG_CREATOR_SA \ - --role=projects/$BIGQUERY_PROJECT/roles/BigQuerySchemaUpdate + --role=roles/bigquery.metadataViewer ``` -10. Grant the necessary IAM roles to `$TAG_ENGINE_SA`: +9. Grant the necessary IAM roles to `$TAG_ENGINE_SA`: ``` gcloud iam service-accounts add-iam-policy-binding $TAG_ENGINE_SA \ @@ -230,21 +199,60 @@ If multiple teams want to share a single instance of Tag Engine and they own dif --member=serviceAccount:$TAG_ENGINE_SA --role=roles/iam.serviceAccountTokenCreator --project $DATA_CATALOG_PROJECT ``` -11. Optional step needed only if creating tags from CSV files: +10. Optional step only needed if creating policy tags through Tag Engine. If you are not planning to use the `create_sensitive_config` endpoint, you can skip this step: - Note: If you plan to create tags from CSV files, you also need to ensure that `$TAG_CREATOR_SA` has the - `storage.buckets.get` permission on the GCS bucket where the CSV files are stored. To do that, you can create a custom role with - this permission or assign the `storage.legacyBucketReader` role: + ``` + gcloud config set project $BIGQUERY_PROJECT + + gcloud iam roles create BigQuerySchemaUpdate \ + --project $BIGQUERY_PROJECT \ + --title BigQuerySchemaUpdate \ + --description "Update table schema with policy tags" \ + --permissions bigquery.tables.setCategory + + gcloud projects add-iam-policy-binding $BIGQUERY_PROJECT \ + --member=serviceAccount:$TAG_CREATOR_SA \ + --role=projects/$BIGQUERY_PROJECT/roles/BigQuerySchemaUpdate + + gcloud config set project $DATA_CATALOG_PROJECT + + gcloud iam roles create PolicyTagReader \ + --project $DATA_CATALOG_PROJECT \ + --title PolicyTagReader \ + --description "Read Policy Tag Taxonomy" \ + --permissions datacatalog.taxonomies.get,datacatalog.taxonomies.list + + gcloud projects add-iam-policy-binding $DATA_CATALOG_PROJECT \ + --member=serviceAccount:$TAG_CREATOR_SA \ + --role=projects/$DATA_CATALOG_PROJECT/roles/PolicyTagReader + ``` + +11. Optional step only needed if creating tags on Spanner data assets. If you don't have a Spanner database, you can skip this step: + + ``` + gcloud iam roles create SpannerTagReadWrite \ + --project $SPANNER_PROJECT \ + --title SpannerTagReadWrite \ + --description "Read and Update Spanner metadata" \ + --permissions spanner.databases.get,spanner.databases.updateTag,spanner.instances.updateTag + + gcloud projects add-iam-policy-binding $SPANNER_PROJECT \ + --member=serviceAccount:$TAG_CREATOR_SA \ + --role=projects/$SPANNER_PROJECT/roles/SpannerTagReadWrite + ``` + +12. Optional step needed only if creating tags from CSV files: + + Creating tags from CSV files requires `$TAG_CREATOR_SA` to have the `storage.buckets.get` permission on the GCS bucket in which the CSV files are stored. You can either create a custom role with this permission or assign it `storage.legacyBucketReader` role: ``` gcloud storage buckets add-iam-policy-binding gs:// \ --member=serviceAccount:$TAG_CREATOR_SA' \ --role=roles/storage.legacyBucketReader ``` -
-12. Build and deploy the Cloud Run services: +13. Build and deploy the Cloud Run services: There is one Cloud Run service for the API (`tag-engine-api`) and another for the UI (`tag-engine-ui`). They are both built from the same code base. You can build either one or the other, depending on your needs. The majority of Tag Engine customers use the API service and a few of them also use the UI service. @@ -274,10 +282,7 @@ If multiple teams want to share a single instance of Tag Engine and they own dif --service-account=$TAG_ENGINE_SA ``` -
- - -11. Set the `SERVICE_URL` environment variable: +14. Set the `SERVICE_URL` environment variable: If you are deploying the API, you also need to set the environment variable SERVICE_URL on `tag-engine-api`: @@ -295,8 +300,6 @@ If multiple teams want to share a single instance of Tag Engine and they own dif gcloud run services update tag-engine-ui --set-env-vars SERVICE_URL=$UI_SERVICE_URL ``` -
- This completes the manual setup for Tag Engine. Please consult [Part 2](https://github.com/GoogleCloudPlatform/datacatalog-tag-engine#testa) and [Part 3](https://github.com/GoogleCloudPlatform/datacatalog-tag-engine#testb) for testing your installation and further steps. diff --git a/examples/configs/import/CSV_import_template.xlsx b/examples/configs/import/README.xlsx similarity index 100% rename from examples/configs/import/CSV_import_template.xlsx rename to examples/configs/import/README.xlsx diff --git a/examples/configs/import/finwire_import_column_tags.json b/examples/configs/import/bigquery_column_config.json similarity index 50% rename from examples/configs/import/finwire_import_column_tags.json rename to examples/configs/import/bigquery_column_config.json index de10c6f..264f50c 100644 --- a/examples/configs/import/finwire_import_column_tags.json +++ b/examples/configs/import/bigquery_column_config.json @@ -2,6 +2,8 @@ "template_id": "compliance_template", "template_project": "tag-engine-run", "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/finwire_column_tags.csv", + "metadata_import_location": "gs://tag-import/latest/bigquery_column_tags.csv", + "data_asset_type": "bigquery", + "data_asset_region": "us-central1", "overwrite": true } diff --git a/examples/configs/import/finwire_column_tags.csv b/examples/configs/import/bigquery_column_tags.csv similarity index 100% rename from examples/configs/import/finwire_column_tags.csv rename to examples/configs/import/bigquery_column_tags.csv diff --git a/examples/configs/import/bigquery_dataset_config.json b/examples/configs/import/bigquery_dataset_config.json new file mode 100644 index 0000000..6e59535 --- /dev/null +++ b/examples/configs/import/bigquery_dataset_config.json @@ -0,0 +1,9 @@ +{ + "template_id": "data_governance", + "template_project": "tag-engine-run", + "template_region": "us-central1", + "metadata_import_location": "gs://tag-import/latest/bigquery_dataset_tags.csv", + "data_asset_type": "bigquery", + "data_asset_region": "us-central1", + "overwrite": true +} diff --git a/examples/configs/import/sakila_dataset_tag.csv b/examples/configs/import/bigquery_dataset_tags.csv similarity index 100% rename from examples/configs/import/sakila_dataset_tag.csv rename to examples/configs/import/bigquery_dataset_tags.csv diff --git a/examples/configs/import/bigquery_table_config.json b/examples/configs/import/bigquery_table_config.json new file mode 100644 index 0000000..ca884ae --- /dev/null +++ b/examples/configs/import/bigquery_table_config.json @@ -0,0 +1,9 @@ +{ + "template_id": "data_governance", + "template_project": "tag-engine-run", + "template_region": "us-central1", + "metadata_import_location": "gs://tag-import/latest/bigquery_table_tags.csv", + "data_asset_type": "bigquery", + "data_asset_region": "us-central1", + "overwrite": true +} diff --git a/examples/configs/import/sakila_table_tags.csv b/examples/configs/import/bigquery_table_tags.csv similarity index 100% rename from examples/configs/import/sakila_table_tags.csv rename to examples/configs/import/bigquery_table_tags.csv diff --git a/examples/configs/import/fileset_column_config.json b/examples/configs/import/fileset_column_config.json new file mode 100644 index 0000000..b1dad77 --- /dev/null +++ b/examples/configs/import/fileset_column_config.json @@ -0,0 +1,9 @@ +{ + "template_id": "data_sensitivity", + "template_project": "tag-engine-run", + "template_region": "us-central1", + "metadata_import_location": "gs://tag-import/latest/fileset_column_tags.csv", + "data_asset_type": "fileset", + "data_asset_region": "us-central1", + "overwrite": true +} diff --git a/examples/configs/import/fileset_column_tags.csv b/examples/configs/import/fileset_column_tags.csv new file mode 100644 index 0000000..aa2d4e7 --- /dev/null +++ b/examples/configs/import/fileset_column_tags.csv @@ -0,0 +1,7 @@ +project,entry_group,fileset,column,sensitive_field,sensitive_type +tag-engine-run,sakila_eg,actor,first_name,TRUE,Sensitive_Personal_Identifiable_Information +tag-engine-run,sakila_eg,actor,last_name,TRUE,Sensitive_Personal_Identifiable_Information +tag-engine-run,sakila_eg,customer,first_name,TRUE,Sensitive_Personal_Identifiable_Information +tag-engine-run,sakila_eg,customer,last_name,TRUE,Sensitive_Personal_Identifiable_Information +tag-engine-run,sakila_eg,staff,first_name,TRUE,Sensitive_Personal_Identifiable_Information +tag-engine-run,sakila_eg,staff,last_name,TRUE,Sensitive_Personal_Identifiable_Information diff --git a/examples/configs/import/fileset_table_config.json b/examples/configs/import/fileset_table_config.json new file mode 100644 index 0000000..599b071 --- /dev/null +++ b/examples/configs/import/fileset_table_config.json @@ -0,0 +1,9 @@ +{ + "template_id": "data_governance", + "template_project": "tag-engine-run", + "template_region": "us-central1", + "metadata_import_location": "gs://tag-import/latest/fileset_table_tags.csv", + "data_asset_type": "fileset", + "data_asset_region": "us-central1", + "overwrite": true +} diff --git a/examples/configs/import/sakila_fileset_tags.csv b/examples/configs/import/fileset_table_tags.csv similarity index 100% rename from examples/configs/import/sakila_fileset_tags.csv rename to examples/configs/import/fileset_table_tags.csv diff --git a/examples/configs/import/finwire_import_table_tags.json b/examples/configs/import/finwire_import_table_tags.json deleted file mode 100644 index 322f56d..0000000 --- a/examples/configs/import/finwire_import_table_tags.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "template_id": "data_discovery", - "template_project": "tag-engine-run", - "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/finwire_table_tags.csv", - "overwrite": true -} diff --git a/examples/configs/import/finwire_table_tags.csv b/examples/configs/import/finwire_table_tags.csv deleted file mode 100644 index 234f8e9..0000000 --- a/examples/configs/import/finwire_table_tags.csv +++ /dev/null @@ -1,28 +0,0 @@ -project,dataset,table,data_domain,data_confidentiality,data_product_type,data_owner,operations_owner,data_location,ingestion_frequency -tag-engine-run,finwire,FINWIRE1976Q1_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,finwire,FINWIRE1976Q1_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1976Q1_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1976Q2_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,MONTHLY -tag-engine-run,finwire,FINWIRE1976Q2_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,finwire,FINWIRE1976Q2_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1976Q3_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1976Q3_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,MONTHLY -tag-engine-run,finwire,FINWIRE1976Q3_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1976Q4_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1976Q4_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,finwire,FINWIRE1976Q4_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,MONTHLY -tag-engine-run,finwire,FINWIRE1977Q1_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1977Q1_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1977Q1_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1977Q2_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1977Q2_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,finwire,FINWIRE1977Q2_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1977Q3_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,MONTHLY -tag-engine-run,finwire,FINWIRE1977Q3_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1977Q3_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,finwire,FINWIRE1977Q4_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,finwire,FINWIRE1977Q4_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,finwire,FINWIRE1977Q4_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY - - - diff --git a/examples/configs/import/sakila_column_empty_config.json b/examples/configs/import/sakila_column_empty_config.json deleted file mode 100644 index 3630848..0000000 --- a/examples/configs/import/sakila_column_empty_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "template_id": "data_sensitivity", - "template_project": "tag-engine-run", - "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/sakila_column_empty_values.csv", - "overwrite": true -} diff --git a/examples/configs/import/sakila_column_empty_values.csv b/examples/configs/import/sakila_column_empty_values.csv deleted file mode 100644 index 28182ce..0000000 --- a/examples/configs/import/sakila_column_empty_values.csv +++ /dev/null @@ -1,8 +0,0 @@ -project,dataset,table,column,sensitive_field,sensitive_type -tag-engine-run,sakila_dw,actor,actor_id,, -tag-engine-run,sakila_dw,actor,first_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,actor,last_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,actor,Middle_Name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,actor,Initials,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,actor,FULL_NAME,TRUE,SPII -tag-engine-run,sakila_dw,actor,CONTRACT_NAME,, \ No newline at end of file diff --git a/examples/configs/import/sakila_column_tags.csv b/examples/configs/import/sakila_column_tags.csv deleted file mode 100644 index 0eecb2d..0000000 --- a/examples/configs/import/sakila_column_tags.csv +++ /dev/null @@ -1,18 +0,0 @@ -project,dataset,table,column,sensitive_field,sensitive_type -tag-engine-run,sakila_dw,actor,first_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,actor,last_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,address,address,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,address,phone,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,customer,first_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,customer,last_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,customer,email,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,staff,first_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,staff,last_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,staff,picture,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,staff,email,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,staff,username,TRUE,Sensitive_Personal_Identifiable_Information -tag-engine-run,sakila_dw,staff,password,TRUE,Sensitive_Personal_Identifiable_Information -tag-engine-run,sakila_dw,city,city_id,, -tag-engine-run,sakila_dw,city,city,, -tag-engine-run,sakila_dw,city,country_id,FALSE, -tag-engine-run,sakila_dw,city,last_update,FALSE, \ No newline at end of file diff --git a/examples/configs/import/sakila_column_tags_config.json b/examples/configs/import/sakila_column_tags_config.json deleted file mode 100644 index d543013..0000000 --- a/examples/configs/import/sakila_column_tags_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "template_id": "data_sensitivity", - "template_project": "tag-engine-run", - "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/sakila_column_tags.csv", - "overwrite": true -} diff --git a/examples/configs/import/sakila_column_tags_mix_config.json b/examples/configs/import/sakila_column_tags_mix_config.json deleted file mode 100644 index d8f224b..0000000 --- a/examples/configs/import/sakila_column_tags_mix_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "template_id": "data_sensitivity", - "template_project": "tag-engine-run", - "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/sakila_column_tags_mixed.csv", - "overwrite": true -} diff --git a/examples/configs/import/sakila_column_tags_mixed.csv b/examples/configs/import/sakila_column_tags_mixed.csv deleted file mode 100644 index 9d3bbf4..0000000 --- a/examples/configs/import/sakila_column_tags_mixed.csv +++ /dev/null @@ -1,6 +0,0 @@ -project,dataset,table,column,sensitive_field,sensitive_type -tag-engine-run,sakila_dw,actor,first_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,actor,last_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,actor,Middle_Name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,actor,Initials,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_dw,actor,FULL_NAME,TRUE,Personal_Identifiable_Information \ No newline at end of file diff --git a/examples/configs/import/sakila_dataset_config.json b/examples/configs/import/sakila_dataset_config.json deleted file mode 100644 index c21cf2a..0000000 --- a/examples/configs/import/sakila_dataset_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "template_id": "data_governance", - "template_project": "tag-engine-run", - "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/sakila_dataset_tag.csv", - "overwrite": true -} diff --git a/examples/configs/import/sakila_fileset_column_config.json b/examples/configs/import/sakila_fileset_column_config.json deleted file mode 100644 index b9db0de..0000000 --- a/examples/configs/import/sakila_fileset_column_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "template_id": "data_sensitivity", - "template_project": "tag-engine-run", - "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/sakila_fileset_column_tags.csv", - "overwrite": true -} diff --git a/examples/configs/import/sakila_fileset_column_nested.csv b/examples/configs/import/sakila_fileset_column_nested.csv deleted file mode 100644 index d34003e..0000000 --- a/examples/configs/import/sakila_fileset_column_nested.csv +++ /dev/null @@ -1,5 +0,0 @@ -project,entry_group,fileset,column,sensitive_field,sensitive_type -tag-engine-run,sakila_eg,address,address,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,address,phone,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,address,phone.area_code,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,address,phone.phone_number,TRUE,Personal_Identifiable_Information diff --git a/examples/configs/import/sakila_fileset_column_tags.csv b/examples/configs/import/sakila_fileset_column_tags.csv deleted file mode 100644 index ad11457..0000000 --- a/examples/configs/import/sakila_fileset_column_tags.csv +++ /dev/null @@ -1,20 +0,0 @@ -project,entry_group,fileset,column,sensitive_field,sensitive_type -tag-engine-run,sakila_eg,actor,first_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,actor,last_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,address,address,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,address,phone,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,address,phone.area_code,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,address,phone.phone_number,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,customer,first_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,customer,last_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,customer,email,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,staff,first_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,staff,last_name,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,staff,picture,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,staff,email,TRUE,Personal_Identifiable_Information -tag-engine-run,sakila_eg,staff,username,TRUE,Sensitive_Personal_Identifiable_Information -tag-engine-run,sakila_eg,staff,password,TRUE,Sensitive_Personal_Identifiable_Information -tag-engine-run,sakila_eg,city,city_id,FALSE, -tag-engine-run,sakila_eg,city,city,FALSE, -tag-engine-run,sakila_eg,city,country_id,FALSE, -tag-engine-run,sakila_eg,city,last_update,FALSE, \ No newline at end of file diff --git a/examples/configs/import/sakila_fileset_config.json b/examples/configs/import/sakila_fileset_config.json deleted file mode 100644 index 72e2c91..0000000 --- a/examples/configs/import/sakila_fileset_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "template_id": "data_governance", - "template_project": "tag-engine-run", - "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/sakila_fileset_tags.csv", - "overwrite": true -} diff --git a/examples/configs/import/sakila_table_missing_dataset_table.csv b/examples/configs/import/sakila_table_missing_dataset_table.csv deleted file mode 100644 index 545b0b0..0000000 --- a/examples/configs/import/sakila_table_missing_dataset_table.csv +++ /dev/null @@ -1,16 +0,0 @@ -project,data_domain,broad_data_category,environment,data_origin,data_creation,data_ownership,data_asset_owner,data_confidentiality,data_retention,data_asset_documentation -tag-engine-run,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,OPERATIONS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,OPERATIONS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -tag-engine-run,OPERATIONS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html \ No newline at end of file diff --git a/examples/configs/import/sakila_table_missing_dataset_table.json b/examples/configs/import/sakila_table_missing_dataset_table.json deleted file mode 100644 index 79d81f1..0000000 --- a/examples/configs/import/sakila_table_missing_dataset_table.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "template_id": "data_governance", - "template_project": "tag-engine-run", - "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/sakila_table_missing_dataset_table.csv", - "overwrite": true -} diff --git a/examples/configs/import/sakila_table_missing_project.csv b/examples/configs/import/sakila_table_missing_project.csv deleted file mode 100644 index 74f99b6..0000000 --- a/examples/configs/import/sakila_table_missing_project.csv +++ /dev/null @@ -1,17 +0,0 @@ -dataset,table,data_domain,broad_data_category,environment,data_origin,data_creation,data_ownership,data_asset_owner,data_confidentiality,data_retention,data_asset_documentation -sakila_dw,actor,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,address,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,category,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,city,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,country,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,customer,LOGISTICS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,John Smith,PUBLIC,90_DAYS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,film,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,film_actor,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,film_category,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,film_text,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,inventory,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,language,MARKETING,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,payment,OPERATIONS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,rental,OPERATIONS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,staff,OPERATIONS,CONTENT,DEV,OPEN_DATA, 2023-11-10,THIRD_PARTY_OPS,Emily Doe,PUBLIC,2_YEARS,https://dev.mysql.com/doc/sakila/en/sakila-structure.html -sakila_dw,store,,,,,,,,,, \ No newline at end of file diff --git a/examples/configs/import/sakila_table_missing_project.json b/examples/configs/import/sakila_table_missing_project.json deleted file mode 100644 index 87df020..0000000 --- a/examples/configs/import/sakila_table_missing_project.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "template_id": "data_governance", - "template_project": "tag-engine-run", - "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/sakila_table_missing_project.csv", - "overwrite": true -} diff --git a/examples/configs/import/sakila_table_tags_config.json b/examples/configs/import/sakila_table_tags_config.json deleted file mode 100644 index 9641fa9..0000000 --- a/examples/configs/import/sakila_table_tags_config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "template_id": "data_governance", - "template_project": "tag-engine-run", - "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/sakila_table_tags.csv", - "overwrite": false -} diff --git a/examples/configs/import/spanner_column_config.json b/examples/configs/import/spanner_column_config.json new file mode 100644 index 0000000..bd5cd67 --- /dev/null +++ b/examples/configs/import/spanner_column_config.json @@ -0,0 +1,9 @@ +{ + "template_id": "data_sensitivity", + "template_project": "tag-engine-run", + "template_region": "us-central1", + "metadata_import_location": "gs://tag-import/latest/spanner_column_tags.csv", + "data_asset_type": "spanner", + "data_asset_region": "us-central1", + "overwrite": true +} diff --git a/examples/configs/import/spanner_column_tags.csv b/examples/configs/import/spanner_column_tags.csv index 969358e..b54656b 100644 --- a/examples/configs/import/spanner_column_tags.csv +++ b/examples/configs/import/spanner_column_tags.csv @@ -1,17 +1,17 @@ -project,instance,database,schema,table,column,is_compliant,reason -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,pts,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,recType,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,companyName,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,cik,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,status,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,industryID,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,spRating,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,foundingDate,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,addr_line1,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,addr_line2,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,postalCode,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,city,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,stateProvince,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,country,TRUE,Passed automated controls -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,ceoName,FALSE,Name was used by knowledge graph -tag-engine-run,goog-dev,user-testing,finwire,FINWIRE2024Q3_CMP,description,FALSE,Description was used in financial analysis \ No newline at end of file +project,instance,database,schema,table,column,sensitive_field,sensitive_type +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,pts,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,recType,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,companyName,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,cik,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,status,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,industryID,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,spRating,TRUE,Personal_Identifiable_Information +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,foundingDate,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,addr_line1,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,addr_line2,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,postalCode,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,city,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,stateProvince,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,country,FALSE, +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,ceoName,TRUE,Sensitive_Personal_Identifiable_Information +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,description,FALSE, \ No newline at end of file diff --git a/examples/configs/import/sakila_fileset_column_nested_config.json b/examples/configs/import/spanner_table_config.json similarity index 50% rename from examples/configs/import/sakila_fileset_column_nested_config.json rename to examples/configs/import/spanner_table_config.json index 2987e25..bface62 100644 --- a/examples/configs/import/sakila_fileset_column_nested_config.json +++ b/examples/configs/import/spanner_table_config.json @@ -2,6 +2,8 @@ "template_id": "data_sensitivity", "template_project": "tag-engine-run", "template_region": "us-central1", - "metadata_import_location": "gs://tag-import/csv/sakila_fileset_column_nested.csv", + "metadata_import_location": "gs://tag-import/latest/spanner_table_tags.csv", + "data_asset_type": "spanner", + "data_asset_region": "us-central1", "overwrite": true } diff --git a/examples/configs/import/spanner_table_tags.csv b/examples/configs/import/spanner_table_tags.csv index 12f4e1d..38997ce 100644 --- a/examples/configs/import/spanner_table_tags.csv +++ b/examples/configs/import/spanner_table_tags.csv @@ -1,25 +1,4 @@ -project,instance,database,schema,table,data_domain,data_confidentiality,data_product_type,data_owner,operations_owner,data_location,ingestion_frequency -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q1_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q1_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q1_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q2_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,MONTHLY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q2_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q2_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,MONTHLY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q1_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q1_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q1_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q2_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q2_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q2_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q3_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,MONTHLY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q3_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q3_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q4_FIN,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q4_SEC,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,WEEKLY -tag-engine-run,goog-dev,user-testing,dev,FINWIRE2023Q4_CMP,FINANCE,PUBLIC,DERIVED,John Williams,Emily Doe,us-central1,DAILY - - - +project,instance,database,schema,table,sensitive_field,sensitive_type +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q1_CMP,TRUE,Personal_Identifiable_Information +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q2_CMP,TRUE,Sensitive_Personal_Identifiable_Information +tag-engine-run,goog-dev,user-testing,dev,FINWIRE2024Q3_CMP,FALSE, \ No newline at end of file diff --git a/examples/load/setup/clone_tables_distributed.py b/examples/load/setup/clone_tables_distributed.py index 177ab51..372415b 100644 --- a/examples/load/setup/clone_tables_distributed.py +++ b/examples/load/setup/clone_tables_distributed.py @@ -18,8 +18,6 @@ def make_copies(project, queue_region, queue_name, url, src_table, dest_project, def create_task(project, queue_region, queue_name, url, src_table, dest_project, dest_dataset, start, stop): - print('*** enter create_task ***') - client = tasks_v2.CloudTasksClient() parent = client.queue_path(project, queue_region, queue_name) diff --git a/examples/load/setup/load_tables_distributed.py b/examples/load/setup/load_tables_distributed.py index 1328d35..abe2182 100644 --- a/examples/load/setup/load_tables_distributed.py +++ b/examples/load/setup/load_tables_distributed.py @@ -21,8 +21,6 @@ def load_tables(project, queue_region, queue_name, url, src_uri, dest_project, d def create_task(project, queue_region, queue_name, url, src_table, dest_project, dest_dataset, table_prefix, start, stop): - print('*** enter create_task ***') - client = tasks_v2.CloudTasksClient() parent = client.queue_path(project, queue_region, queue_name) diff --git a/examples/unit_tests.sh b/examples/unit_tests.sh index ca51c3f..d2d6122 100644 --- a/examples/unit_tests.sh +++ b/examples/unit_tests.sh @@ -90,41 +90,57 @@ curl -i -X POST $TAG_ENGINE_URL/trigger_job \ curl -X POST $TAG_ENGINE_URL/get_job_status -d '{"job_uuid":"f106ae4aef4911edb86ee96cdaa8e7ae"}' \ -H "Authorization: Bearer $IAM_TOKEN" -####### Table and column import tags from CSV ####### +####### BigQuery table and column tags from CSV imports ####### -# create the import table config -curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/sakila_table_tags.json \ +# create an import config for tagging tables +curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/bigquery_table_config.json \ -H "Authorization: Bearer $IAM_TOKEN" -# trigger job +# trigger the job curl -i -X POST $TAG_ENGINE_URL/trigger_job \ -d '{"config_type":"TAG_IMPORT","config_uuid":"0e674d78eddd11ed8d3d09299afaece0"}' \ -H "Authorization: Bearer $IAM_TOKEN" -# trigger job with metadata +# alternatively, trigger the job with metadata curl -i -X POST $TAG_ENGINE_URL/trigger_job \ -d '{"config_type":"TAG_IMPORT","config_uuid":"0e674d78eddd11ed8d3d09299afaece0", "job_metadata": {"source": "Collibra", "workflow": "process_sensitive_data"}}' \ -H "Authorization: Bearer $IAM_TOKEN" +# get the job status curl -X POST $TAG_ENGINE_URL/get_job_status -d '{"job_uuid":"2755de6ceddd11ed9e0e3f1388bde9d6"}' \ -H "Authorization: Bearer $IAM_TOKEN" -# create the import column config -curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/sakila_column_tags.json \ +# create an import config for tagging columns +curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/bigquery_column_config.json \ -H "Authorization: Bearer $IAM_TOKEN" -# trigger job +# trigger the job curl -i -X POST $TAG_ENGINE_URL/trigger_job \ -d '{"config_type":"TAG_IMPORT","config_uuid":"426ddac4eddd11ed9e0e3f1388bde9d6"}' \ -H "Authorization: Bearer $IAM_TOKEN" +# get the job status curl -X POST $TAG_ENGINE_URL/get_job_status -d '{"job_uuid":"4abb1a0ceddd11edb0341b486213f8b6"}' \ -H "Authorization: Bearer $IAM_TOKEN" -####### Fileset import tags from CSV ####### +# create an import config for tagging datasets +curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/bigquery_dataset_config.json \ + -H "Authorization: Bearer $IAM_TOKEN" + +# trigger the job +curl -i -X POST $TAG_ENGINE_URL/trigger_job \ + -d '{"config_type":"TAG_IMPORT","config_uuid":"7d4618be478211efa28142004e494300"}' \ + -H "Authorization: Bearer $IAM_TOKEN" + +# get the job status +curl -X POST $TAG_ENGINE_URL/get_job_status -d '{"job_uuid":"75052fd2478211efa28142004e494300"}' \ + -H "Authorization: Bearer $IAM_TOKEN" + -# create the fileset import config -curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/sakila_fileset_config.json \ +####### GCS fileset tags from CSV imports ####### + +# create an import config for tagging filesets +curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/fileset_config.json \ -H "Authorization: Bearer $IAM_TOKEN" # trigger job @@ -132,8 +148,8 @@ curl -i -X POST $TAG_ENGINE_URL/trigger_job \ -d '{"config_type":"TAG_IMPORT","config_uuid":"0e674d78eddd11ed8d3d09299afaece0"}' \ -H "Authorization: Bearer $IAM_TOKEN" -# create the fileset column import config -curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/sakila_fileset_column_config.json \ +# create an import config for tagging fileset columns +curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/fileset_column_config.json \ -H "Authorization: Bearer $IAM_TOKEN" # trigger job @@ -141,6 +157,28 @@ curl -i -X POST $TAG_ENGINE_URL/trigger_job \ -d '{"config_type":"TAG_IMPORT","config_uuid":"0e674d78eddd11ed8d3d09299afaece0"}' \ -H "Authorization: Bearer $IAM_TOKEN" + +####### Spanner tags from CSV imports ####### + +# create an import config for tagging Spanner tables +curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/spanner_table_config.json \ + -H "Authorization: Bearer $IAM_TOKEN" + +# trigger job +curl -i -X POST $TAG_ENGINE_URL/trigger_job \ + -d '{"config_type":"TAG_IMPORT","config_uuid":"0e674d78eddd11ed8d3d09299afaece0"}' \ + -H "Authorization: Bearer $IAM_TOKEN" + +# create an import config for tagging Spanner table columns +curl -X POST $TAG_ENGINE_URL/create_import_config -d @examples/configs/import/spanner_column_config.json \ + -H "Authorization: Bearer $IAM_TOKEN" + +# trigger job +curl -i -X POST $TAG_ENGINE_URL/trigger_job \ + -d '{"config_type":"TAG_IMPORT","config_uuid":"0e674d78eddd11ed8d3d09299afaece0"}' \ + -H "Authorization: Bearer $IAM_TOKEN" + + ####### Restore tags from metadata export ####### # export the metadata diff --git a/main.py b/main.py index 38e7fe3..c715266 100644 --- a/main.py +++ b/main.py @@ -68,7 +68,20 @@ else: FIRESTORE_DB = '(default)' -BIGQUERY_REGION = config['DEFAULT']['BIGQUERY_REGION'].strip() +if 'BIGQUERY_REGION' in config['DEFAULT']: + BIGQUERY_REGION = config['DEFAULT']['BIGQUERY_REGION'].strip() +else: + BIGQUERY_REGION = None + +if 'SPANNER_REGION' in config['DEFAULT']: + SPANNER_REGION = config['DEFAULT']['SPANNER_REGION'].strip() +else: + SPANNER_REGION = None + +if 'FILESET_REGION' in config['DEFAULT']: + FILESET_REGION = config['DEFAULT']['FILESET_REGION'].strip() +else: + FILESET_REGION = None SPLIT_WORK_HANDLER = SERVICE_URL + "/_split_work" RUN_TASK_HANDLER = SERVICE_URL + '/_run_task' @@ -2655,10 +2668,21 @@ def create_import_config(): else: overwrite = True + if 'data_asset_type' in json_request: + data_asset_type = json_request['data_asset_type'] + else: + data_asset_type = None + + if 'data_asset_region' in json_request: + data_asset_region = json_request['data_asset_region'] + else: + data_asset_region = None + tag_history_option, _ = store.read_tag_history_settings() config_uuid = store.write_tag_import_config(tag_creator_sa, template_uuid, template_id, template_project, template_region, \ - metadata_import_location, tag_history_option, overwrite) + data_asset_type, data_asset_region, metadata_import_location, \ + tag_history_option, overwrite) return jsonify(config_uuid=config_uuid, config_type='TAG_IMPORT') @@ -3415,7 +3439,44 @@ def _split_work(): jm.set_job_status(job_uuid, 'ERROR') resp = jsonify(success=False) return resp - + + # infer the data_asset_type if not present in the config + if 'data_asset_type' not in config or config.get('data_asset_type') == None: + if (extracted_tags[0].keys() >= {'dataset'}): + config['data_asset_type'] = 'bigquery' + elif (extracted_tags[0].keys() >= {'entry_group', 'fileset'}): + config['data_asset_type'] = 'fileset' + elif (extracted_tags[0].keys() >= {'instance', 'database'}): + config['data_asset_type'] = 'spanner' + else: + print('Error: unable to determine the data asset type of your config (bigquery, fileset, or spanner). Please add data_asset_type to your config and verify the format of your CSV.') + store.update_job_status(config_uuid, config_type, 'ERROR') + jm.set_job_status(job_uuid, 'ERROR') + resp = jsonify(success=False) + return resp + + # save the update to Firestore + store.update_tag_import_config(config_uuid, config.get('data_asset_type'), None, None) + + # infer the data_asset_region if not present in the config + if 'data_asset_region' not in config or config.get('data_asset_region') == None: + if config.get('data_asset_type') == 'bigquery': + config['data_asset_region'] = BIGQUERY_REGION + elif config.get('data_asset_type') == 'fileset': + config['data_asset_region'] = FILESET_REGION + elif config.get('data_asset_type') == 'spanner': + config['data_asset_region'] = SPANNER_REGION + else: + print('Error: unable to determine the data asset region of your config (us-central1, etc.). Please add data_asset_region to your config or add the appropriate default region variable to tagengine.ini.') + store.update_job_status(config_uuid, config_type, 'ERROR') + jm.set_job_status(job_uuid, 'ERROR') + resp = jsonify(success=False) + return resp + + # save the update to Firestore + store.update_tag_import_config(config_uuid, None, config.get('data_asset_region'), None) + + if config_type == 'TAG_RESTORE': bkp_files = list(re.get_resources(config.get('metadata_export_location'), None)) @@ -3558,8 +3619,8 @@ def _run_task(): creation_status = dcc.apply_export_config(config['config_uuid'], config['target_project'], config['target_dataset'], config['target_region'], uri) if config_type == 'TAG_IMPORT': - creation_status = dcc.apply_import_config(job_uuid, config_uuid, tag_extract, \ - config['tag_history'], config['overwrite']) + creation_status = dcc.apply_import_config(job_uuid, config_uuid, config['data_asset_type'], config['data_asset_region'], \ + tag_extract, config['tag_history'], config['overwrite']) if config_type == 'TAG_RESTORE': creation_status = dcc.apply_restore_config(job_uuid, config_uuid, tag_extract, \ config['tag_history'], config['overwrite']) @@ -3597,7 +3658,7 @@ def _run_task(): @app.route("/version", methods=['GET']) def version(): - return "Welcome to Tag Engine version 2.3.1\n" + return "Welcome to Tag Engine version 2.3.2\n" ####################### TEST METHOD #################################### diff --git a/tagengine.ini.tpl b/tagengine.ini.tpl index c2ddafa..e49c339 100644 --- a/tagengine.ini.tpl +++ b/tagengine.ini.tpl @@ -1,19 +1,20 @@ [DEFAULT] -TAG_ENGINE_SA = tag-engine@solution-workspace.iam.gserviceaccount.com -TAG_CREATOR_SA = tag-creator@dgtoolkit.iam.gserviceaccount.com -TAG_ENGINE_PROJECT = solution-workspace +TAG_ENGINE_SA = tag-engine@tag-engine-run.iam.gserviceaccount.com +TAG_CREATOR_SA = tag-creator@tag-engine-run.iam.gserviceaccount.com +TAG_ENGINE_PROJECT = tag-engine-run TAG_ENGINE_REGION = us-central1 -FIRESTORE_PROJECT = dgtoolkit -FIRESTORE_DB = tag-engine +FIRESTORE_PROJECT = tag-engine-run +FIRESTORE_DB = (default) INJECTOR_QUEUE = tag-engine-injector-queue WORK_QUEUE = tag-engine-work-queue BIGQUERY_REGION = us-central1 -CLOUD_STORAGE_REGION = us-central1 +FILESET_REGION = us-central1 +SPANNER_REGION = us-central1 ENABLE_AUTH = True OAUTH_CLIENT_CREDENTIALS = te_client_secret.json ENABLE_TAG_HISTORY = True -TAG_HISTORY_PROJECT = dgtoolkit +TAG_HISTORY_PROJECT = tag-engine-run TAG_HISTORY_DATASET = tag_history ENABLE_JOB_METADATA = True -JOB_METADATA_PROJECT = dgtoolkit +JOB_METADATA_PROJECT = tag-engine-run JOB_METADATA_DATASET = job_metadata diff --git a/templates/home.html b/templates/home.html index 926f083..3c7a635 100644 --- a/templates/home.html +++ b/templates/home.html @@ -14,7 +14,7 @@
Opps! Tag Engine was unable to retrieve the tag template below. Please ensure that it exists and that you have permissions to it.

{% endif %} -

Tag Engine version: 2.3.1
+
Tag Engine version: 2.3.2

Tag Engine for Data Catalog