diff --git a/.github/workflows/docker-openmetadata-db.yml b/.github/workflows/docker-openmetadata-db.yml
index a2277cb0bf33..b03d47627e94 100644
--- a/.github/workflows/docker-openmetadata-db.yml
+++ b/.github/workflows/docker-openmetadata-db.yml
@@ -31,7 +31,7 @@ jobs:
steps:
- name: Check trigger type
if: ${{ env.input == '' }}
- run: echo "input=1.4.0-SNAPSHOT" >> $GITHUB_ENV
+ run: echo "input=1.3.4" >> $GITHUB_ENV
- name: Check out the Repo
uses: actions/checkout@v3
diff --git a/.github/workflows/docker-openmetadata-ingestion-base-slim.yml b/.github/workflows/docker-openmetadata-ingestion-base-slim.yml
index 0cf5a8cb3096..6aa1dffb00ad 100644
--- a/.github/workflows/docker-openmetadata-ingestion-base-slim.yml
+++ b/.github/workflows/docker-openmetadata-ingestion-base-slim.yml
@@ -31,7 +31,7 @@ jobs:
steps:
- name: Check trigger type
if: ${{ env.input == '' }}
- run: echo "input=1.4.0-SNAPSHOT" >> $GITHUB_ENV
+ run: echo "input=1.3.4" >> $GITHUB_ENV
- name: Check out the Repo
uses: actions/checkout@v3
diff --git a/.github/workflows/docker-openmetadata-ingestion-base.yml b/.github/workflows/docker-openmetadata-ingestion-base.yml
index cd34badb2482..163d7bd94d43 100644
--- a/.github/workflows/docker-openmetadata-ingestion-base.yml
+++ b/.github/workflows/docker-openmetadata-ingestion-base.yml
@@ -31,7 +31,7 @@ jobs:
steps:
- name: Check trigger type
if: ${{ env.input == '' }}
- run: echo "input=1.4.0-SNAPSHOT" >> $GITHUB_ENV
+ run: echo "input=1.3.4" >> $GITHUB_ENV
- name: Check out the Repo
uses: actions/checkout@v3
diff --git a/.github/workflows/docker-openmetadata-ingestion.yml b/.github/workflows/docker-openmetadata-ingestion.yml
index d2c97c330d2c..11c86b611796 100644
--- a/.github/workflows/docker-openmetadata-ingestion.yml
+++ b/.github/workflows/docker-openmetadata-ingestion.yml
@@ -31,7 +31,7 @@ jobs:
steps:
- name: Check trigger type
if: ${{ env.input == '' }}
- run: echo "input=1.4.0-SNAPSHOT" >> $GITHUB_ENV
+ run: echo "input=1.3.4" >> $GITHUB_ENV
- name: Check out the Repo
uses: actions/checkout@v3
diff --git a/.github/workflows/docker-openmetadata-postgres.yml b/.github/workflows/docker-openmetadata-postgres.yml
index 19ccc3f6077d..b8c5672860f9 100644
--- a/.github/workflows/docker-openmetadata-postgres.yml
+++ b/.github/workflows/docker-openmetadata-postgres.yml
@@ -31,7 +31,7 @@ jobs:
steps:
- name: Check trigger type
if: ${{ env.input == '' }}
- run: echo "input=1.4.0-SNAPSHOT" >> $GITHUB_ENV
+ run: echo "input=1.3.4" >> $GITHUB_ENV
- name: Check out the Repo
uses: actions/checkout@v3
diff --git a/.github/workflows/docker-openmetadata-server.yml b/.github/workflows/docker-openmetadata-server.yml
index 32d36cabfc43..8d7008514748 100644
--- a/.github/workflows/docker-openmetadata-server.yml
+++ b/.github/workflows/docker-openmetadata-server.yml
@@ -64,7 +64,7 @@ jobs:
steps:
- name: Check trigger type
id: check_trigger
- run: echo "DOCKER_RELEASE_TAG=1.4.0-SNAPSHOT" >> $GITHUB_OUTPUT
+ run: echo "DOCKER_RELEASE_TAG=1.3.4" >> $GITHUB_OUTPUT
- name: Download application from Artifiact
uses: actions/download-artifact@v2
@@ -129,7 +129,7 @@ jobs:
- name: Check trigger type
id: check_trigger
if: ${{ env.DOCKER_RELEASE_TAG == '' }}
- run: echo "DOCKER_RELEASE_TAG=1.4.0-SNAPSHOT" >> $GITHUB_ENV
+ run: echo "DOCKER_RELEASE_TAG=1.3.4" >> $GITHUB_ENV
- name: Check out the Repo
uses: actions/checkout@v3
diff --git a/bootstrap/sql/migrations/native/1.3.1/mysql/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.3.1/mysql/postDataMigrationSQLScript.sql
index e69de29bb2d1..a3693db8f6a8 100644
--- a/bootstrap/sql/migrations/native/1.3.1/mysql/postDataMigrationSQLScript.sql
+++ b/bootstrap/sql/migrations/native/1.3.1/mysql/postDataMigrationSQLScript.sql
@@ -0,0 +1,13 @@
+-- Update the relation between testDefinition and testCase to 0 (CONTAINS)
+UPDATE entity_relationship
+SET relation = 0
+WHERE fromEntity = 'testDefinition' AND toEntity = 'testCase' AND relation != 0;
+
+-- Update the test definition provider
+-- If the test definition has OpenMetadata as a test platform, then the provider is system, else it is user
+UPDATE test_definition
+SET json = CASE
+ WHEN JSON_CONTAINS(json, '"OpenMetadata"', '$.testPlatforms') THEN JSON_INSERT(json,'$.provider','system')
+ ELSE JSON_INSERT(json,'$.provider','user')
+ END
+;
diff --git a/bootstrap/sql/migrations/native/1.3.1/postgres/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.3.1/postgres/postDataMigrationSQLScript.sql
index e69de29bb2d1..dca6c0b7e3e3 100644
--- a/bootstrap/sql/migrations/native/1.3.1/postgres/postDataMigrationSQLScript.sql
+++ b/bootstrap/sql/migrations/native/1.3.1/postgres/postDataMigrationSQLScript.sql
@@ -0,0 +1,13 @@
+-- Update the relation between testDefinition and testCase to 0 (CONTAINS)
+UPDATE entity_relationship
+SET relation = 0
+WHERE fromEntity = 'testDefinition' AND toEntity = 'testCase' AND relation != 0;
+
+-- Update the test definition provider
+-- If the test definition has OpenMetadata as a test platform, then the provider is system, else it is user
+UPDATE test_definition
+SET json =
+ case
+ when json->'testPlatforms' @> '"OpenMetadata"' then jsonb_set(json,'{provider}','"system"',true)
+ else jsonb_set(json,'{provider}','"user"', true)
+ end;
diff --git a/bootstrap/sql/migrations/native/1.3.2/mysql/schemaChanges.sql b/bootstrap/sql/migrations/native/1.3.2/mysql/schemaChanges.sql
new file mode 100644
index 000000000000..d8d880adcca7
--- /dev/null
+++ b/bootstrap/sql/migrations/native/1.3.2/mysql/schemaChanges.sql
@@ -0,0 +1,3 @@
+ALTER TABLE test_case ADD COLUMN status VARCHAR(56) GENERATED ALWAYS AS (json ->> '$.testCaseResult.testCaseStatus') STORED NULL;
+ALTER TABLE test_case ADD COLUMN entityLink VARCHAR(512) GENERATED ALWAYS AS (json ->> '$.entityLink') STORED NOT NULL;
+
diff --git a/bootstrap/sql/migrations/native/1.3.2/postgres/schemaChanges.sql b/bootstrap/sql/migrations/native/1.3.2/postgres/schemaChanges.sql
new file mode 100644
index 000000000000..957d395251f9
--- /dev/null
+++ b/bootstrap/sql/migrations/native/1.3.2/postgres/schemaChanges.sql
@@ -0,0 +1,2 @@
+ALTER TABLE test_case ADD COLUMN status VARCHAR(56) GENERATED ALWAYS AS (json -> 'testCaseResult' ->> 'testCaseStatus') STORED NULL;
+ALTER TABLE test_case ADD COLUMN entityLink VARCHAR(512) GENERATED ALWAYS AS (json ->> 'entityLink') STORED NOT NULL;
diff --git a/bootstrap/sql/migrations/native/1.3.3/mysql/schemaChanges.sql b/bootstrap/sql/migrations/native/1.3.3/mysql/schemaChanges.sql
new file mode 100644
index 000000000000..fc06c0e58c4b
--- /dev/null
+++ b/bootstrap/sql/migrations/native/1.3.3/mysql/schemaChanges.sql
@@ -0,0 +1,24 @@
+-- Change scheduleType to scheduleTimeline
+
+UPDATE installed_apps
+SET json = JSON_INSERT(
+ JSON_REMOVE(json, '$.appSchedule.scheduleType'),
+ '$.appSchedule.scheduleTimeline',
+ JSON_EXTRACT(json, '$.appSchedule.scheduleType')
+ );
+delete from apps_extension_time_series;
+
+
+-- Change systemApp to system
+UPDATE installed_apps
+SET json = JSON_INSERT(
+ JSON_REMOVE(json, '$.systemApp'),
+ '$.system',
+ JSON_EXTRACT(json, '$.systemApp')
+ );
+UPDATE apps_marketplace
+SET json = JSON_INSERT(
+ JSON_REMOVE(json, '$.systemApp'),
+ '$.system',
+ JSON_EXTRACT(json, '$.systemApp')
+ );
\ No newline at end of file
diff --git a/bootstrap/sql/migrations/native/1.3.3/postgres/schemaChanges.sql b/bootstrap/sql/migrations/native/1.3.3/postgres/schemaChanges.sql
new file mode 100644
index 000000000000..4173eb4a9687
--- /dev/null
+++ b/bootstrap/sql/migrations/native/1.3.3/postgres/schemaChanges.sql
@@ -0,0 +1,32 @@
+-- change scheduleType to scheduleTimeline, this was failing earlier in 1.3.2 so updating it here
+UPDATE installed_apps
+SET json = jsonb_set(
+ json::jsonb,
+ '{appSchedule}',
+ jsonb_set(
+ json->'appSchedule',
+ '{scheduleTimeline}',
+ json->'appSchedule'->'scheduleType'
+ ) - 'scheduleType',
+ true
+ )
+WHERE json->'appSchedule'->>'scheduleType' IS NOT NULL;
+
+delete from apps_extension_time_series;
+
+-- Change systemApp to system, this was failing earlier in 1.3.2 so updating it here
+UPDATE installed_apps
+SET json = jsonb_set(
+ json::jsonb,
+ '{system}',
+ json->'systemApp'
+ ) - 'systemApp'
+WHERE jsonb_exists(json::jsonb, 'systemApp') = true;
+
+UPDATE apps_marketplace
+SET json = jsonb_set(
+ json::jsonb,
+ '{system}',
+ json->'systemApp'
+ ) - 'systemApp'
+WHERE jsonb_exists(json::jsonb, 'systemApp') = true;
diff --git a/bootstrap/sql/migrations/native/1.4.0/mysql/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.4.0/mysql/postDataMigrationSQLScript.sql
deleted file mode 100644
index a3693db8f6a8..000000000000
--- a/bootstrap/sql/migrations/native/1.4.0/mysql/postDataMigrationSQLScript.sql
+++ /dev/null
@@ -1,13 +0,0 @@
--- Update the relation between testDefinition and testCase to 0 (CONTAINS)
-UPDATE entity_relationship
-SET relation = 0
-WHERE fromEntity = 'testDefinition' AND toEntity = 'testCase' AND relation != 0;
-
--- Update the test definition provider
--- If the test definition has OpenMetadata as a test platform, then the provider is system, else it is user
-UPDATE test_definition
-SET json = CASE
- WHEN JSON_CONTAINS(json, '"OpenMetadata"', '$.testPlatforms') THEN JSON_INSERT(json,'$.provider','system')
- ELSE JSON_INSERT(json,'$.provider','user')
- END
-;
diff --git a/bootstrap/sql/migrations/native/1.4.0/mysql/schemaChanges.sql b/bootstrap/sql/migrations/native/1.4.0/mysql/schemaChanges.sql
deleted file mode 100644
index 06035180588e..000000000000
--- a/bootstrap/sql/migrations/native/1.4.0/mysql/schemaChanges.sql
+++ /dev/null
@@ -1,4 +0,0 @@
--- Add the supportsProfiler field to the MongoDB connection configuration
-UPDATE dbservice_entity
-SET json = JSON_INSERT(json, '$.connection.config.supportsProfiler', TRUE)
-WHERE serviceType = 'MongoDB';
\ No newline at end of file
diff --git a/bootstrap/sql/migrations/native/1.4.0/postgres/postDataMigrationSQLScript.sql b/bootstrap/sql/migrations/native/1.4.0/postgres/postDataMigrationSQLScript.sql
deleted file mode 100644
index dca6c0b7e3e3..000000000000
--- a/bootstrap/sql/migrations/native/1.4.0/postgres/postDataMigrationSQLScript.sql
+++ /dev/null
@@ -1,13 +0,0 @@
--- Update the relation between testDefinition and testCase to 0 (CONTAINS)
-UPDATE entity_relationship
-SET relation = 0
-WHERE fromEntity = 'testDefinition' AND toEntity = 'testCase' AND relation != 0;
-
--- Update the test definition provider
--- If the test definition has OpenMetadata as a test platform, then the provider is system, else it is user
-UPDATE test_definition
-SET json =
- case
- when json->'testPlatforms' @> '"OpenMetadata"' then jsonb_set(json,'{provider}','"system"',true)
- else jsonb_set(json,'{provider}','"user"', true)
- end;
diff --git a/bootstrap/sql/migrations/native/1.4.0/postgres/schemaChanges.sql b/bootstrap/sql/migrations/native/1.4.0/postgres/schemaChanges.sql
deleted file mode 100644
index c8b6830c129e..000000000000
--- a/bootstrap/sql/migrations/native/1.4.0/postgres/schemaChanges.sql
+++ /dev/null
@@ -1,4 +0,0 @@
--- Add the supportsProfiler field to the MongoDB connection configuration
-UPDATE dbservice_entity
-SET json = jsonb_set(json::jsonb, '{connection,config,supportsProfiler}', 'true'::jsonb)
-WHERE serviceType = 'MongoDB';
\ No newline at end of file
diff --git a/common/pom.xml b/common/pom.xml
index 42f9bd40227f..ef17715284fc 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -18,7 +18,7 @@
platform
org.open-metadata
- 1.4.0-SNAPSHOT
+ 1.3.4
4.0.0
diff --git a/common/src/main/java/org/openmetadata/common/utils/CommonUtil.java b/common/src/main/java/org/openmetadata/common/utils/CommonUtil.java
index 2d2e0fa4fc91..c4e5c691a9ae 100644
--- a/common/src/main/java/org/openmetadata/common/utils/CommonUtil.java
+++ b/common/src/main/java/org/openmetadata/common/utils/CommonUtil.java
@@ -17,6 +17,7 @@
import java.io.File;
import java.io.IOException;
+import java.lang.reflect.Method;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
@@ -207,4 +208,24 @@ public static URI getUri(String uri) {
}
return null;
}
+
+ public static boolean findChildren(List> list, String methodName, String fqn) {
+ if (list == null || list.isEmpty()) return false;
+ try {
+ Method getChildren = list.get(0).getClass().getMethod(methodName);
+ Method getFQN = list.get(0).getClass().getMethod("getFullyQualifiedName");
+ return list.stream()
+ .anyMatch(
+ o -> {
+ try {
+ return getFQN.invoke(o).equals(fqn)
+ || findChildren((List>) getChildren.invoke(o), methodName, fqn);
+ } catch (Exception e) {
+ return false;
+ }
+ });
+ } catch (Exception e) {
+ return false;
+ }
+ }
}
diff --git a/conf/openmetadata.yaml b/conf/openmetadata.yaml
index 28fa143d0fb3..4c54958679f9 100644
--- a/conf/openmetadata.yaml
+++ b/conf/openmetadata.yaml
@@ -163,6 +163,7 @@ authorizerConfiguration:
enableSecureSocketConnection : ${AUTHORIZER_ENABLE_SECURE_SOCKET:-false}
authenticationConfiguration:
+ clientType: ${AUTHENTICATION_CLIENT_TYPE:-public}
provider: ${AUTHENTICATION_PROVIDER:-basic}
# This is used by auth provider provide response as either id_token or code
responseType: ${AUTHENTICATION_RESPONSE_TYPE:-id_token}
@@ -174,6 +175,22 @@ authenticationConfiguration:
callbackUrl: ${AUTHENTICATION_CALLBACK_URL:-""}
jwtPrincipalClaims: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
enableSelfSignup : ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ oidcConfiguration:
+ id: ${OIDC_CLIENT_ID:-""}
+ type: ${OIDC_TYPE:-""} # google, azure etc.
+ secret: ${OIDC_CLIENT_SECRET:-""}
+ scope: ${OIDC_SCOPE:-"openid email profile"}
+ discoveryUri: ${OIDC_DISCOVERY_URI:-""}
+ useNonce: ${OIDC_USE_NONCE:-true}
+ preferredJwsAlgorithm: ${OIDC_PREFERRED_JWS:-"RS256"}
+ responseType: ${OIDC_RESPONSE_TYPE:-"code"}
+ disablePkce: ${OIDC_DISABLE_PKCE:-true}
+ callbackUrl: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ serverUrl: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ clientAuthenticationMethod: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ tenant: ${OIDC_TENANT:-""}
+ maxClockSkew: ${OIDC_MAX_CLOCK_SKEW:-""}
+ customParams: ${OIDC_CUSTOM_PARAMS:-}
samlConfiguration:
debugMode: ${SAML_DEBUG_MODE:-false}
idp:
@@ -263,7 +280,7 @@ eventMonitoringConfiguration:
eventMonitor: ${EVENT_MONITOR:-prometheus} # Possible values are "prometheus", "cloudwatch"
batchSize: ${EVENT_MONITOR_BATCH_SIZE:-10}
pathPattern: ${EVENT_MONITOR_PATH_PATTERN:-["/api/v1/tables/*", "/api/v1/health-check"]}
- latency: ${EVENT_MONITOR_LATENCY:-[]} # For value p99=0.99, p90=0.90, p50=0.50 etc.
+ latency: ${EVENT_MONITOR_LATENCY:-[0.99, 0.90]} # For value p99=0.99, p90=0.90, p50=0.50 etc.
# it will use the default auth provider for AWS services if parameters are not set
# parameters:
# region: ${OM_MONITOR_REGION:-""}
diff --git a/ingestion/src/metadata/__init__.py b/docker/__init__.py
similarity index 100%
rename from ingestion/src/metadata/__init__.py
rename to docker/__init__.py
diff --git a/docker/development/docker-compose-postgres.yml b/docker/development/docker-compose-postgres.yml
index b742b6104a72..50f0dd0a8640 100644
--- a/docker/development/docker-compose-postgres.yml
+++ b/docker/development/docker-compose-postgres.yml
@@ -98,6 +98,23 @@ services:
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP: ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ AUTHENTICATION_CLIENT_TYPE: ${AUTHENTICATION_CLIENT_TYPE:-public}
+ #For OIDC Authentication, when client is confidential
+ OIDC_CLIENT_ID: ${OIDC_CLIENT_ID:-""}
+ OIDC_TYPE: ${OIDC_TYPE:-""} # google, azure etc.
+ OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET:-""}
+ OIDC_SCOPE: ${OIDC_SCOPE:-"openid email profile"}
+ OIDC_DISCOVERY_URI: ${OIDC_DISCOVERY_URI:-""}
+ OIDC_USE_NONCE: ${OIDC_USE_NONCE:-true}
+ OIDC_PREFERRED_JWS: ${OIDC_PREFERRED_JWS:-"RS256"}
+ OIDC_RESPONSE_TYPE: ${OIDC_RESPONSE_TYPE:-"code"}
+ OIDC_DISABLE_PKCE: ${OIDC_DISABLE_PKCE:-true}
+ OIDC_CALLBACK: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ OIDC_SERVER_URL: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ OIDC_TENANT: ${OIDC_TENANT:-""}
+ OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""}
+ OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}}
# For SAML Authentication
# SAML_DEBUG_MODE: ${SAML_DEBUG_MODE:-false}
# SAML_IDP_ENTITY_ID: ${SAML_IDP_ENTITY_ID:-""}
@@ -292,6 +309,23 @@ services:
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP : ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ AUTHENTICATION_CLIENT_TYPE: ${AUTHENTICATION_CLIENT_TYPE:-public}
+ #For OIDC Authentication, when client is confidential
+ OIDC_CLIENT_ID: ${OIDC_CLIENT_ID:-""}
+ OIDC_TYPE: ${OIDC_TYPE:-""} # google, azure etc.
+ OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET:-""}
+ OIDC_SCOPE: ${OIDC_SCOPE:-"openid email profile"}
+ OIDC_DISCOVERY_URI: ${OIDC_DISCOVERY_URI:-""}
+ OIDC_USE_NONCE: ${OIDC_USE_NONCE:-true}
+ OIDC_PREFERRED_JWS: ${OIDC_PREFERRED_JWS:-"RS256"}
+ OIDC_RESPONSE_TYPE: ${OIDC_RESPONSE_TYPE:-"code"}
+ OIDC_DISABLE_PKCE: ${OIDC_DISABLE_PKCE:-true}
+ OIDC_CALLBACK: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ OIDC_SERVER_URL: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ OIDC_TENANT: ${OIDC_TENANT:-""}
+ OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""}
+ OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}}
# For SAML Authentication
# SAML_DEBUG_MODE: ${SAML_DEBUG_MODE:-false}
# SAML_IDP_ENTITY_ID: ${SAML_IDP_ENTITY_ID:-""}
diff --git a/docker/development/docker-compose.yml b/docker/development/docker-compose.yml
index 09baf7809de8..2a2e91dc149e 100644
--- a/docker/development/docker-compose.yml
+++ b/docker/development/docker-compose.yml
@@ -98,6 +98,23 @@ services:
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP: ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ AUTHENTICATION_CLIENT_TYPE: ${AUTHENTICATION_CLIENT_TYPE:-public}
+ #For OIDC Authentication, when client is confidential
+ OIDC_CLIENT_ID: ${OIDC_CLIENT_ID:-""}
+ OIDC_TYPE: ${OIDC_TYPE:-""} # google, azure etc.
+ OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET:-""}
+ OIDC_SCOPE: ${OIDC_SCOPE:-"openid email profile"}
+ OIDC_DISCOVERY_URI: ${OIDC_DISCOVERY_URI:-""}
+ OIDC_USE_NONCE: ${OIDC_USE_NONCE:-true}
+ OIDC_PREFERRED_JWS: ${OIDC_PREFERRED_JWS:-"RS256"}
+ OIDC_RESPONSE_TYPE: ${OIDC_RESPONSE_TYPE:-"code"}
+ OIDC_DISABLE_PKCE: ${OIDC_DISABLE_PKCE:-true}
+ OIDC_CALLBACK: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ OIDC_SERVER_URL: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ OIDC_TENANT: ${OIDC_TENANT:-""}
+ OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""}
+ OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}}
# For SAML Authentication
# SAML_DEBUG_MODE: ${SAML_DEBUG_MODE:-false}
# SAML_IDP_ENTITY_ID: ${SAML_IDP_ENTITY_ID:-""}
@@ -289,6 +306,23 @@ services:
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP : ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ AUTHENTICATION_CLIENT_TYPE: ${AUTHENTICATION_CLIENT_TYPE:-public}
+ #For OIDC Authentication, when client is confidential
+ OIDC_CLIENT_ID: ${OIDC_CLIENT_ID:-""}
+ OIDC_TYPE: ${OIDC_TYPE:-""} # google, azure etc.
+ OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET:-""}
+ OIDC_SCOPE: ${OIDC_SCOPE:-"openid email profile"}
+ OIDC_DISCOVERY_URI: ${OIDC_DISCOVERY_URI:-""}
+ OIDC_USE_NONCE: ${OIDC_USE_NONCE:-true}
+ OIDC_PREFERRED_JWS: ${OIDC_PREFERRED_JWS:-"RS256"}
+ OIDC_RESPONSE_TYPE: ${OIDC_RESPONSE_TYPE:-"code"}
+ OIDC_DISABLE_PKCE: ${OIDC_DISABLE_PKCE:-true}
+ OIDC_CALLBACK: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ OIDC_SERVER_URL: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ OIDC_TENANT: ${OIDC_TENANT:-""}
+ OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""}
+ OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}}
# For SAML Authentication
# SAML_DEBUG_MODE: ${SAML_DEBUG_MODE:-false}
# SAML_IDP_ENTITY_ID: ${SAML_IDP_ENTITY_ID:-""}
diff --git a/docker/docker-compose-ingestion/docker-compose-ingestion.yml b/docker/docker-compose-ingestion/docker-compose-ingestion.yml
index 8537698e7d51..a665b001656c 100644
--- a/docker/docker-compose-ingestion/docker-compose-ingestion.yml
+++ b/docker/docker-compose-ingestion/docker-compose-ingestion.yml
@@ -18,7 +18,7 @@ volumes:
services:
ingestion:
container_name: openmetadata_ingestion
- image: docker.getcollate.io/openmetadata/ingestion:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/ingestion:1.3.4
environment:
AIRFLOW__API__AUTH_BACKENDS: "airflow.api.auth.backend.basic_auth,airflow.api.auth.backend.session"
AIRFLOW__CORE__EXECUTOR: LocalExecutor
diff --git a/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml b/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml
index 2ac1cf9bb9d1..94cb770caa1a 100644
--- a/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml
+++ b/docker/docker-compose-openmetadata/docker-compose-openmetadata.yml
@@ -14,7 +14,7 @@ services:
execute-migrate-all:
container_name: execute_migrate_all
command: "./bootstrap/openmetadata-ops.sh migrate"
- image: docker.getcollate.io/openmetadata/server:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/server:1.3.4
environment:
OPENMETADATA_CLUSTER_NAME: ${OPENMETADATA_CLUSTER_NAME:-openmetadata}
SERVER_PORT: ${SERVER_PORT:-8585}
@@ -42,6 +42,23 @@ services:
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP: ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ AUTHENTICATION_CLIENT_TYPE: ${AUTHENTICATION_CLIENT_TYPE:-public}
+ #For OIDC Authentication, when client is confidential
+ OIDC_CLIENT_ID: ${OIDC_CLIENT_ID:-""}
+ OIDC_TYPE: ${OIDC_TYPE:-""} # google, azure etc.
+ OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET:-""}
+ OIDC_SCOPE: ${OIDC_SCOPE:-"openid email profile"}
+ OIDC_DISCOVERY_URI: ${OIDC_DISCOVERY_URI:-""}
+ OIDC_USE_NONCE: ${OIDC_USE_NONCE:-true}
+ OIDC_PREFERRED_JWS: ${OIDC_PREFERRED_JWS:-"RS256"}
+ OIDC_RESPONSE_TYPE: ${OIDC_RESPONSE_TYPE:-"code"}
+ OIDC_DISABLE_PKCE: ${OIDC_DISABLE_PKCE:-true}
+ OIDC_CALLBACK: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ OIDC_SERVER_URL: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ OIDC_TENANT: ${OIDC_TENANT:-""}
+ OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""}
+ OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}}
# For SAML Authentication
# SAML_DEBUG_MODE: ${SAML_DEBUG_MODE:-false}
# SAML_IDP_ENTITY_ID: ${SAML_IDP_ENTITY_ID:-""}
@@ -207,7 +224,7 @@ services:
openmetadata-server:
container_name: openmetadata_server
restart: always
- image: docker.getcollate.io/openmetadata/server:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/server:1.3.4
environment:
OPENMETADATA_CLUSTER_NAME: ${OPENMETADATA_CLUSTER_NAME:-openmetadata}
SERVER_PORT: ${SERVER_PORT:-8585}
@@ -232,6 +249,23 @@ services:
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP: ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ AUTHENTICATION_CLIENT_TYPE: ${AUTHENTICATION_CLIENT_TYPE:-public}
+ #For OIDC Authentication, when client is confidential
+ OIDC_CLIENT_ID: ${OIDC_CLIENT_ID:-""}
+ OIDC_TYPE: ${OIDC_TYPE:-""} # google, azure etc.
+ OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET:-""}
+ OIDC_SCOPE: ${OIDC_SCOPE:-"openid email profile"}
+ OIDC_DISCOVERY_URI: ${OIDC_DISCOVERY_URI:-""}
+ OIDC_USE_NONCE: ${OIDC_USE_NONCE:-true}
+ OIDC_PREFERRED_JWS: ${OIDC_PREFERRED_JWS:-"RS256"}
+ OIDC_RESPONSE_TYPE: ${OIDC_RESPONSE_TYPE:-"code"}
+ OIDC_DISABLE_PKCE: ${OIDC_DISABLE_PKCE:-true}
+ OIDC_CALLBACK: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ OIDC_SERVER_URL: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ OIDC_TENANT: ${OIDC_TENANT:-""}
+ OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""}
+ OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}}
# For SAML Authentication
# SAML_DEBUG_MODE: ${SAML_DEBUG_MODE:-false}
# SAML_IDP_ENTITY_ID: ${SAML_IDP_ENTITY_ID:-""}
diff --git a/docker/docker-compose-quickstart/Dockerfile b/docker/docker-compose-quickstart/Dockerfile
index 72b2064227e7..14a6d7eef6f7 100644
--- a/docker/docker-compose-quickstart/Dockerfile
+++ b/docker/docker-compose-quickstart/Dockerfile
@@ -11,7 +11,7 @@
# Build stage
FROM alpine:3.19 AS build
-ARG RI_VERSION="1.3.0-SNAPSHOT"
+ARG RI_VERSION="1.3.4"
ENV RELEASE_URL="https://github.com/open-metadata/OpenMetadata/releases/download/${RI_VERSION}-release/openmetadata-${RI_VERSION}.tar.gz"
RUN mkdir -p /opt/openmetadata && \
@@ -21,7 +21,7 @@ RUN mkdir -p /opt/openmetadata && \
# Final stage
FROM alpine:3.19
-ARG RI_VERSION="1.3.0-SNAPSHOT"
+ARG RI_VERSION="1.3.4"
ARG BUILD_DATE
ARG COMMIT_ID
LABEL maintainer="OpenMetadata"
diff --git a/docker/docker-compose-quickstart/docker-compose-postgres.yml b/docker/docker-compose-quickstart/docker-compose-postgres.yml
index 9cd0d99a546f..73e4fbe9251d 100644
--- a/docker/docker-compose-quickstart/docker-compose-postgres.yml
+++ b/docker/docker-compose-quickstart/docker-compose-postgres.yml
@@ -18,7 +18,7 @@ volumes:
services:
postgresql:
container_name: openmetadata_postgresql
- image: docker.getcollate.io/openmetadata/postgresql:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/postgresql:1.3.4
restart: always
command: "--work_mem=10MB"
environment:
@@ -61,7 +61,7 @@ services:
execute-migrate-all:
container_name: execute_migrate_all
- image: docker.getcollate.io/openmetadata/server:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/server:1.3.4
command: "./bootstrap/openmetadata-ops.sh migrate"
environment:
OPENMETADATA_CLUSTER_NAME: ${OPENMETADATA_CLUSTER_NAME:-openmetadata}
@@ -90,6 +90,23 @@ services:
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP: ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ AUTHENTICATION_CLIENT_TYPE: ${AUTHENTICATION_CLIENT_TYPE:-public}
+ #For OIDC Authentication, when client is confidential
+ OIDC_CLIENT_ID: ${OIDC_CLIENT_ID:-""}
+ OIDC_TYPE: ${OIDC_TYPE:-""} # google, azure etc.
+ OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET:-""}
+ OIDC_SCOPE: ${OIDC_SCOPE:-"openid email profile"}
+ OIDC_DISCOVERY_URI: ${OIDC_DISCOVERY_URI:-""}
+ OIDC_USE_NONCE: ${OIDC_USE_NONCE:-true}
+ OIDC_PREFERRED_JWS: ${OIDC_PREFERRED_JWS:-"RS256"}
+ OIDC_RESPONSE_TYPE: ${OIDC_RESPONSE_TYPE:-"code"}
+ OIDC_DISABLE_PKCE: ${OIDC_DISABLE_PKCE:-true}
+ OIDC_CALLBACK: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ OIDC_SERVER_URL: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ OIDC_TENANT: ${OIDC_TENANT:-""}
+ OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""}
+ OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}}
# For SAML Authentication
# SAML_DEBUG_MODE: ${SAML_DEBUG_MODE:-false}
# SAML_IDP_ENTITY_ID: ${SAML_IDP_ENTITY_ID:-""}
@@ -255,7 +272,7 @@ services:
openmetadata-server:
container_name: openmetadata_server
restart: always
- image: docker.getcollate.io/openmetadata/server:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/server:1.3.4
environment:
OPENMETADATA_CLUSTER_NAME: ${OPENMETADATA_CLUSTER_NAME:-openmetadata}
SERVER_PORT: ${SERVER_PORT:-8585}
@@ -280,6 +297,23 @@ services:
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP: ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ AUTHENTICATION_CLIENT_TYPE: ${AUTHENTICATION_CLIENT_TYPE:-public}
+ #For OIDC Authentication, when client is confidential
+ OIDC_CLIENT_ID: ${OIDC_CLIENT_ID:-""}
+ OIDC_TYPE: ${OIDC_TYPE:-""} # google, azure etc.
+ OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET:-""}
+ OIDC_SCOPE: ${OIDC_SCOPE:-"openid email profile"}
+ OIDC_DISCOVERY_URI: ${OIDC_DISCOVERY_URI:-""}
+ OIDC_USE_NONCE: ${OIDC_USE_NONCE:-true}
+ OIDC_PREFERRED_JWS: ${OIDC_PREFERRED_JWS:-"RS256"}
+ OIDC_RESPONSE_TYPE: ${OIDC_RESPONSE_TYPE:-"code"}
+ OIDC_DISABLE_PKCE: ${OIDC_DISABLE_PKCE:-true}
+ OIDC_CALLBACK: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ OIDC_SERVER_URL: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ OIDC_TENANT: ${OIDC_TENANT:-""}
+ OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""}
+ OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}}
# For SAML Authentication
# SAML_DEBUG_MODE: ${SAML_DEBUG_MODE:-false}
# SAML_IDP_ENTITY_ID: ${SAML_IDP_ENTITY_ID:-""}
@@ -444,7 +478,7 @@ services:
ingestion:
container_name: openmetadata_ingestion
- image: docker.getcollate.io/openmetadata/ingestion:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/ingestion:1.3.4
depends_on:
elasticsearch:
condition: service_started
diff --git a/docker/docker-compose-quickstart/docker-compose.yml b/docker/docker-compose-quickstart/docker-compose.yml
index c0647108af6c..298c9e9155e7 100644
--- a/docker/docker-compose-quickstart/docker-compose.yml
+++ b/docker/docker-compose-quickstart/docker-compose.yml
@@ -18,7 +18,7 @@ volumes:
services:
mysql:
container_name: openmetadata_mysql
- image: docker.getcollate.io/openmetadata/db:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/db:1.3.4
command: "--sort_buffer_size=10M"
restart: always
environment:
@@ -59,7 +59,7 @@ services:
execute-migrate-all:
container_name: execute_migrate_all
- image: docker.getcollate.io/openmetadata/server:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/server:1.3.4
command: "./bootstrap/openmetadata-ops.sh migrate"
environment:
OPENMETADATA_CLUSTER_NAME: ${OPENMETADATA_CLUSTER_NAME:-openmetadata}
@@ -88,6 +88,23 @@ services:
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP: ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ AUTHENTICATION_CLIENT_TYPE: ${AUTHENTICATION_CLIENT_TYPE:-public}
+ #For OIDC Authentication, when client is confidential
+ OIDC_CLIENT_ID: ${OIDC_CLIENT_ID:-""}
+ OIDC_TYPE: ${OIDC_TYPE:-""} # google, azure etc.
+ OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET:-""}
+ OIDC_SCOPE: ${OIDC_SCOPE:-"openid email profile"}
+ OIDC_DISCOVERY_URI: ${OIDC_DISCOVERY_URI:-""}
+ OIDC_USE_NONCE: ${OIDC_USE_NONCE:-true}
+ OIDC_PREFERRED_JWS: ${OIDC_PREFERRED_JWS:-"RS256"}
+ OIDC_RESPONSE_TYPE: ${OIDC_RESPONSE_TYPE:-"code"}
+ OIDC_DISABLE_PKCE: ${OIDC_DISABLE_PKCE:-true}
+ OIDC_CALLBACK: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ OIDC_SERVER_URL: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ OIDC_TENANT: ${OIDC_TENANT:-""}
+ OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""}
+ OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}}
# For SAML Authentication
# SAML_DEBUG_MODE: ${SAML_DEBUG_MODE:-false}
# SAML_IDP_ENTITY_ID: ${SAML_IDP_ENTITY_ID:-""}
@@ -253,7 +270,7 @@ services:
openmetadata-server:
container_name: openmetadata_server
restart: always
- image: docker.getcollate.io/openmetadata/server:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/server:1.3.4
environment:
OPENMETADATA_CLUSTER_NAME: ${OPENMETADATA_CLUSTER_NAME:-openmetadata}
SERVER_PORT: ${SERVER_PORT:-8585}
@@ -278,6 +295,23 @@ services:
AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""}
AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]}
AUTHENTICATION_ENABLE_SELF_SIGNUP: ${AUTHENTICATION_ENABLE_SELF_SIGNUP:-true}
+ AUTHENTICATION_CLIENT_TYPE: ${AUTHENTICATION_CLIENT_TYPE:-public}
+ #For OIDC Authentication, when client is confidential
+ OIDC_CLIENT_ID: ${OIDC_CLIENT_ID:-""}
+ OIDC_TYPE: ${OIDC_TYPE:-""} # google, azure etc.
+ OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET:-""}
+ OIDC_SCOPE: ${OIDC_SCOPE:-"openid email profile"}
+ OIDC_DISCOVERY_URI: ${OIDC_DISCOVERY_URI:-""}
+ OIDC_USE_NONCE: ${OIDC_USE_NONCE:-true}
+ OIDC_PREFERRED_JWS: ${OIDC_PREFERRED_JWS:-"RS256"}
+ OIDC_RESPONSE_TYPE: ${OIDC_RESPONSE_TYPE:-"code"}
+ OIDC_DISABLE_PKCE: ${OIDC_DISABLE_PKCE:-true}
+ OIDC_CALLBACK: ${OIDC_CALLBACK:-"http://localhost:8585/callback"}
+ OIDC_SERVER_URL: ${OIDC_SERVER_URL:-"http://localhost:8585"}
+ OIDC_CLIENT_AUTH_METHOD: ${OIDC_CLIENT_AUTH_METHOD:-"client_secret_post"}
+ OIDC_TENANT: ${OIDC_TENANT:-""}
+ OIDC_MAX_CLOCK_SKEW: ${OIDC_MAX_CLOCK_SKEW:-""}
+ OIDC_CUSTOM_PARAMS: ${OIDC_CUSTOM_PARAMS:-{}}
# For SAML Authentication
# SAML_DEBUG_MODE: ${SAML_DEBUG_MODE:-false}
# SAML_IDP_ENTITY_ID: ${SAML_IDP_ENTITY_ID:-""}
@@ -442,7 +476,7 @@ services:
ingestion:
container_name: openmetadata_ingestion
- image: docker.getcollate.io/openmetadata/ingestion:1.4.0-SNAPSHOT
+ image: docker.getcollate.io/openmetadata/ingestion:1.3.4
depends_on:
elasticsearch:
condition: service_started
diff --git a/docker/validate_compose.py b/docker/validate_compose.py
index 812d5e49596c..c90b46b311ea 100644
--- a/docker/validate_compose.py
+++ b/docker/validate_compose.py
@@ -23,11 +23,9 @@ def get_last_run_info() -> Tuple[str, str]:
while retries < max_retries:
log_ansi_encoded_string(message="Waiting for DAG Run data...")
time.sleep(5)
- res = requests.get(
+ runs = requests.get(
"http://localhost:8080/api/v1/dags/sample_data/dagRuns", auth=BASIC_AUTH, timeout=REQUESTS_TIMEOUT
- )
- res.raise_for_status()
- runs = res.json()
+ ).json()
dag_runs = runs.get("dag_runs")
if dag_runs[0].get("dag_run_id"):
return dag_runs[0].get("dag_run_id"), "success"
diff --git a/ingestion/Dockerfile b/ingestion/Dockerfile
index a895e68bafc9..134599c6bdd7 100644
--- a/ingestion/Dockerfile
+++ b/ingestion/Dockerfile
@@ -81,7 +81,7 @@ ARG INGESTION_DEPENDENCY="all"
ENV PIP_NO_CACHE_DIR=1
# Make pip silent
ENV PIP_QUIET=1
-ARG RI_VERSION="1.3.0.0.dev0"
+ARG RI_VERSION="1.3.4.0"
RUN pip install --upgrade pip
RUN pip install "openmetadata-managed-apis~=${RI_VERSION}" --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt"
RUN pip install "openmetadata-ingestion[${INGESTION_DEPENDENCY}]~=${RI_VERSION}"
diff --git a/ingestion/operators/docker/Dockerfile b/ingestion/operators/docker/Dockerfile
index 2221394cecf3..eec6233eae38 100644
--- a/ingestion/operators/docker/Dockerfile
+++ b/ingestion/operators/docker/Dockerfile
@@ -87,7 +87,7 @@ ENV PIP_QUIET=1
RUN pip install --upgrade pip
ARG INGESTION_DEPENDENCY="all"
-ARG RI_VERSION="1.3.0.0.dev0"
+ARG RI_VERSION="1.3.4.0"
RUN pip install --upgrade pip
RUN pip install "openmetadata-ingestion[airflow]~=${RI_VERSION}"
RUN pip install "openmetadata-ingestion[${INGESTION_DEPENDENCY}]~=${RI_VERSION}"
diff --git a/ingestion/pyproject.toml b/ingestion/pyproject.toml
index 9a9be4820adf..3bd4441bfd6e 100644
--- a/ingestion/pyproject.toml
+++ b/ingestion/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
# since it helps us organize and isolate version management
[project]
name = "openmetadata-ingestion"
-version = "1.4.0.0.dev0"
+version = "1.3.4.0"
dynamic = ["readme", "dependencies", "optional-dependencies"]
authors = [
{name = "OpenMetadata Committers"}
diff --git a/ingestion/setup.py b/ingestion/setup.py
index 72e9bad4c93f..41f7d1c970b5 100644
--- a/ingestion/setup.py
+++ b/ingestion/setup.py
@@ -163,7 +163,13 @@
},
"db2": {"ibm-db-sa~=0.3"},
"db2-ibmi": {"sqlalchemy-ibmi~=0.9.3"},
- "databricks": {VERSIONS["sqlalchemy-databricks"], VERSIONS["databricks-sdk"]},
+ "databricks": {
+ VERSIONS["sqlalchemy-databricks"],
+ VERSIONS["databricks-sdk"],
+ "ndg-httpsclient~=0.5.1",
+ "pyOpenSSL~=24.1.0",
+ "pyasn1~=0.6.0",
+ },
"datalake-azure": {
VERSIONS["azure-storage-blob"],
VERSIONS["azure-identity"],
@@ -313,7 +319,6 @@
VERSIONS["snowflake"],
VERSIONS["elasticsearch8"],
VERSIONS["giturlparse"],
- "testcontainers==3.7.1",
}
e2e_test = {
diff --git a/ingestion/src/metadata/clients/azure_client.py b/ingestion/src/metadata/clients/azure_client.py
new file mode 100644
index 000000000000..f80cc0ad5e6a
--- /dev/null
+++ b/ingestion/src/metadata/clients/azure_client.py
@@ -0,0 +1,85 @@
+# Copyright 2021 Collate
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Module containing Azure Client
+"""
+
+from metadata.generated.schema.security.credentials.azureCredentials import (
+ AzureCredentials,
+)
+from metadata.utils.logger import utils_logger
+
+logger = utils_logger()
+
+
+class AzureClient:
+ """
+ AzureClient based on AzureCredentials.
+ """
+
+ def __init__(self, credentials: "AzureCredentials"):
+ self.credentials = credentials
+ if not isinstance(credentials, AzureCredentials):
+ self.credentials = AzureCredentials.parse_obj(credentials)
+
+ def create_client(
+ self,
+ ):
+ from azure.identity import ClientSecretCredential, DefaultAzureCredential
+
+ try:
+ if (
+ getattr(self.credentials, "tenantId", None)
+ and getattr(self.credentials, "clientId", None)
+ and getattr(self.credentials, "clientSecret", None)
+ ):
+ logger.info("Using Client Secret Credentials")
+ return ClientSecretCredential(
+ tenant_id=self.credentials.tenantId,
+ client_id=self.credentials.clientId,
+ client_secret=self.credentials.clientSecret.get_secret_value(),
+ )
+ else:
+ logger.info("Using Default Azure Credentials")
+ return DefaultAzureCredential()
+ except Exception as e:
+ logger.error(f"Error creating Azure Client: {e}")
+ raise e
+
+ def create_blob_client(self):
+ from azure.storage.blob import BlobServiceClient
+
+ try:
+ logger.info("Creating Blob Service Client")
+ if self.credentials.accountName:
+ return BlobServiceClient(
+ account_url=f"https://{self.credentials.accountName}.blob.core.windows.net/",
+ credential=self.create_client(),
+ )
+ raise ValueError("Account Name is required to create Blob Service Client")
+ except Exception as e:
+ logger.error(f"Error creating Blob Service Client: {e}")
+ raise e
+
+ def create_secret_client(self):
+ from azure.keyvault.secrets import SecretClient
+
+ try:
+ if self.credentials.vaultName:
+ logger.info("Creating Secret Client")
+ return SecretClient(
+ vault_url=f"https://{self.credentials.vaultName}.vault.azure.net/",
+ credential=self.create_client(),
+ )
+ raise ValueError("Vault Name is required to create a Secret Client")
+ except Exception as e:
+ logger.error(f"Error creating Secret Client: {e}")
+ raise e
diff --git a/ingestion/src/metadata/data_quality/source/test_suite.py b/ingestion/src/metadata/data_quality/source/test_suite.py
index cdb0bdd37a8b..a60639db569c 100644
--- a/ingestion/src/metadata/data_quality/source/test_suite.py
+++ b/ingestion/src/metadata/data_quality/source/test_suite.py
@@ -83,11 +83,11 @@ def _get_test_cases_from_test_suite(
) -> Optional[List[TestCase]]:
"""Return test cases if the test suite exists and has them"""
if test_suite:
- test_cases = self.metadata.list_entities(
+ test_cases = self.metadata.list_all_entities(
entity=TestCase,
fields=["testSuite", "entityLink", "testDefinition"],
params={"testSuiteId": test_suite.id.__root__},
- ).entities
+ )
test_cases = cast(List[TestCase], test_cases) # satisfy type checker
return test_cases
diff --git a/ingestion/src/metadata/examples/workflows/datalake_azure.yaml b/ingestion/src/metadata/examples/workflows/datalake_azure_client_secret.yaml
similarity index 100%
rename from ingestion/src/metadata/examples/workflows/datalake_azure.yaml
rename to ingestion/src/metadata/examples/workflows/datalake_azure_client_secret.yaml
diff --git a/ingestion/src/metadata/examples/workflows/datalake_azure_default.yaml b/ingestion/src/metadata/examples/workflows/datalake_azure_default.yaml
new file mode 100644
index 000000000000..2a4f248232e3
--- /dev/null
+++ b/ingestion/src/metadata/examples/workflows/datalake_azure_default.yaml
@@ -0,0 +1,29 @@
+source:
+ type: datalake
+ serviceName: local_datalake4
+ serviceConnection:
+ config:
+ type: Datalake
+ configSource:
+ securityConfig:
+ clientId: clientId
+ accountName: accountName
+ bucketName: bucket name
+ prefix: prefix
+ sourceConfig:
+ config:
+ type: DatabaseMetadata
+ tableFilterPattern:
+ includes:
+ - ''
+sink:
+ type: metadata-rest
+ config: {}
+workflowConfig:
+# loggerLevel: INFO # DEBUG, INFO, WARN or ERROR
+ openMetadataServerConfig:
+ hostPort: http://localhost:8585/api
+ authProvider: openmetadata
+ securityConfig:
+ jwtToken: "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
+
\ No newline at end of file
diff --git a/ingestion/src/metadata/examples/workflows/dbt.yaml b/ingestion/src/metadata/examples/workflows/dbt.yaml
index c5266ec890a6..7d5f1e95ee72 100644
--- a/ingestion/src/metadata/examples/workflows/dbt.yaml
+++ b/ingestion/src/metadata/examples/workflows/dbt.yaml
@@ -5,21 +5,28 @@ source:
config:
type: DBT
# For DBT, choose one of Cloud, Local, HTTP, S3 or GCS configurations
- # dbtConfigSource:
- # # For cloud
- # dbtCloudAuthToken: token
- # dbtCloudAccountId: ID
- # dbtCloudJobId: JOB ID
- # dbtCloudUrl: https://cloud.getdbt.com
+ # For cloud
+ dbtConfigSource:
+ dbtConfigType: cloud
+ dbtCloudAuthToken: token
+ dbtCloudAccountId: ID
+ dbtCloudJobId: JOB ID
+ dbtCloudUrl: https://cloud.getdbt.com
# # For Local
+ # dbtConfigSource:
+ # dbtConfigType: local
# dbtCatalogFilePath: path-to-catalog.json
# dbtManifestFilePath: path-to-manifest.json
# dbtRunResultsFilePath: path-to-run_results.json
# # For HTTP
+ # dbtConfigSource:
+ # dbtConfigType: http
# dbtCatalogHttpPath: http://path-to-catalog.json
# dbtManifestHttpPath: http://path-to-manifest.json
# dbtRunResultsHttpPath: http://path-to-run_results.json
# # For S3
+ # dbtConfigSource:
+ # dbtConfigType: s3
# dbtSecurityConfig: # These are modeled after all AWS credentials
# awsAccessKeyId: KEY
# awsSecretAccessKey: SECRET
@@ -28,6 +35,8 @@ source:
# dbtBucketName: bucket_name
# dbtObjectPrefix: "main_dir/dbt_files"
# # For GCS
+ # dbtConfigSource:
+ # dbtConfigType: gcs
# dbtSecurityConfig: # These are modeled after all GCS credentials
# gcpConfig:
# type: My Type
@@ -47,6 +56,8 @@ source:
# dbtBucketName: bucket_name
# dbtObjectPrefix: "main_dir/dbt_files"
# # For Azure
+ # dbtConfigSource:
+ # dbtConfigType: azure
# dbtSecurityConfig: # These are modeled after all Azure credentials
# clientId: clientId
# clientSecret: clientSecret
diff --git a/ingestion/src/metadata/great_expectations/action.py b/ingestion/src/metadata/great_expectations/action.py
index 7a475cc6a10c..76fc5797ad8b 100644
--- a/ingestion/src/metadata/great_expectations/action.py
+++ b/ingestion/src/metadata/great_expectations/action.py
@@ -15,6 +15,7 @@
This subpackage needs to be used in Great Expectations
checkpoints actions.
"""
+import logging
import traceback
from datetime import datetime, timezone
from typing import Dict, List, Optional, Union, cast
@@ -72,9 +73,10 @@
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.utils import fqn
from metadata.utils.entity_link import get_entity_link
-from metadata.utils.logger import great_expectations_logger
-logger = great_expectations_logger()
+logger = logging.getLogger(
+ "great_expectations.validation_operators.validation_operators.openmetadata"
+)
class OpenMetadataValidationAction(ValidationAction):
@@ -107,7 +109,7 @@ def __init__(
self.config_file_path = config_file_path
self.ometa_conn = self._create_ometa_connection()
- def _run( # pylint: disable=unused-argument,arguments-renamed
+ def _run( # pylint: disable=unused-argument
self,
validation_result_suite: ExpectationSuiteValidationResult,
validation_result_suite_identifier: Union[
@@ -124,6 +126,7 @@ def _run( # pylint: disable=unused-argument,arguments-renamed
validation_result_suite: result suite returned when checkpoint is ran
validation_result_suite_identifier: type of result suite
data_asset:
+ payload:
expectation_suite_identifier: type of expectation suite
checkpoint_identifier: identifier for the checkpoint
"""
@@ -428,7 +431,7 @@ def _handle_test_case(
test_case_fqn=test_case.fullyQualifiedName.__root__,
)
- logger.info(
+ logger.debug(
f"Test case result for {test_case.fullyQualifiedName.__root__} successfully ingested"
)
diff --git a/ingestion/src/metadata/ingestion/api/parser.py b/ingestion/src/metadata/ingestion/api/parser.py
index c05d4dbdb21d..7808b281efc5 100644
--- a/ingestion/src/metadata/ingestion/api/parser.py
+++ b/ingestion/src/metadata/ingestion/api/parser.py
@@ -69,6 +69,28 @@
DatabaseServiceQueryUsagePipeline,
DatabaseUsageConfigType,
)
+from metadata.generated.schema.metadataIngestion.dbtconfig.dbtAzureConfig import (
+ DbtAzureConfig,
+)
+from metadata.generated.schema.metadataIngestion.dbtconfig.dbtCloudConfig import (
+ DbtCloudConfig,
+)
+from metadata.generated.schema.metadataIngestion.dbtconfig.dbtGCSConfig import (
+ DbtGcsConfig,
+)
+from metadata.generated.schema.metadataIngestion.dbtconfig.dbtHttpConfig import (
+ DbtHttpConfig,
+)
+from metadata.generated.schema.metadataIngestion.dbtconfig.dbtLocalConfig import (
+ DbtLocalConfig,
+)
+from metadata.generated.schema.metadataIngestion.dbtconfig.dbtS3Config import (
+ DbtS3Config,
+)
+from metadata.generated.schema.metadataIngestion.dbtPipeline import (
+ DbtConfigType,
+ DbtPipeline,
+)
from metadata.generated.schema.metadataIngestion.messagingServiceMetadataPipeline import (
MessagingMetadataConfigType,
MessagingServiceMetadataPipeline,
@@ -125,6 +147,16 @@
DatabaseMetadataConfigType.DatabaseMetadata.value: DatabaseServiceMetadataPipeline,
StorageMetadataConfigType.StorageMetadata.value: StorageServiceMetadataPipeline,
SearchMetadataConfigType.SearchMetadata.value: SearchServiceMetadataPipeline,
+ DbtConfigType.DBT.value: DbtPipeline,
+}
+
+DBT_CONFIG_TYPE_MAP = {
+ "cloud": DbtCloudConfig,
+ "local": DbtLocalConfig,
+ "http": DbtHttpConfig,
+ "s3": DbtS3Config,
+ "gcs": DbtGcsConfig,
+ "azure": DbtAzureConfig,
}
@@ -171,6 +203,7 @@ def get_source_config_class(
Type[PipelineServiceMetadataPipeline],
Type[MlModelServiceMetadataPipeline],
Type[DatabaseServiceMetadataPipeline],
+ Type[DbtPipeline],
]:
"""
Return the source config type for a source string
@@ -179,7 +212,7 @@ def get_source_config_class(
"""
source_config_class = SOURCE_CONFIG_CLASS_MAP.get(source_config_type)
- if source_config_type:
+ if source_config_class:
return source_config_class
raise ValueError(f"Cannot find the service type of {source_config_type}")
@@ -266,6 +299,27 @@ def _unsafe_parse_config(config: dict, cls: Type[T], message: str) -> None:
raise err
+def _unsafe_parse_dbt_config(config: dict, cls: Type[T], message: str) -> None:
+ """
+ Given a config dictionary and the class it should match,
+ try to parse it or log the given message
+ """
+ logger.debug(f"Parsing message: [{message}]")
+ try:
+ # Parse the oneOf config types of dbt to check
+ dbt_config_type = config["dbtConfigSource"]["dbtConfigType"]
+ dbt_config_class = DBT_CONFIG_TYPE_MAP.get(dbt_config_type)
+ dbt_config_class.parse_obj(config["dbtConfigSource"])
+
+ # Parse the entire dbtPipeline object
+ cls.parse_obj(config)
+ except ValidationError as err:
+ logger.debug(
+ f"The supported properties for {cls.__name__} are {list(cls.__fields__.keys())}"
+ )
+ raise err
+
+
def _parse_inner_connection(config_dict: dict, source_type: str) -> None:
"""
Parse the inner connection of the flagged connectors
@@ -291,32 +345,35 @@ def parse_service_connection(config_dict: dict) -> None:
:param config_dict: JSON configuration
"""
# Unsafe access to the keys. Allow a KeyError if the config is not well formatted
- source_type = config_dict["source"]["serviceConnection"]["config"].get("type")
- if source_type is None:
- raise InvalidWorkflowException("Missing type in the serviceConnection config")
+ if config_dict["source"].get("serviceConnection"):
+ source_type = config_dict["source"]["serviceConnection"]["config"].get("type")
+ if source_type is None:
+ raise InvalidWorkflowException(
+ "Missing type in the serviceConnection config"
+ )
- logger.debug(
- f"Error parsing the Workflow Configuration for {source_type} ingestion"
- )
+ logger.debug(
+ f"Error parsing the Workflow Configuration for {source_type} ingestion"
+ )
- service_type = get_service_type(source_type)
- connection_class = get_connection_class(source_type, service_type)
+ service_type = get_service_type(source_type)
+ connection_class = get_connection_class(source_type, service_type)
- if source_type in HAS_INNER_CONNECTION:
- # We will first parse the inner `connection` configuration
- _parse_inner_connection(
- config_dict["source"]["serviceConnection"]["config"]["connection"][
- "config"
- ]["connection"],
- source_type,
- )
+ if source_type in HAS_INNER_CONNECTION:
+ # We will first parse the inner `connection` configuration
+ _parse_inner_connection(
+ config_dict["source"]["serviceConnection"]["config"]["connection"][
+ "config"
+ ]["connection"],
+ source_type,
+ )
- # Parse the service connection dictionary with the scoped class
- _unsafe_parse_config(
- config=config_dict["source"]["serviceConnection"]["config"],
- cls=connection_class,
- message="Error parsing the service connection",
- )
+ # Parse the service connection dictionary with the scoped class
+ _unsafe_parse_config(
+ config=config_dict["source"]["serviceConnection"]["config"],
+ cls=connection_class,
+ message="Error parsing the service connection",
+ )
def parse_source_config(config_dict: dict) -> None:
@@ -334,6 +391,13 @@ def parse_source_config(config_dict: dict) -> None:
source_config_class = get_source_config_class(source_config_type)
+ if source_config_class == DbtPipeline:
+ _unsafe_parse_dbt_config(
+ config=config_dict["source"]["sourceConfig"]["config"],
+ cls=source_config_class,
+ message="Error parsing the dbt source config",
+ )
+
_unsafe_parse_config(
config=config_dict["source"]["sourceConfig"]["config"],
cls=source_config_class,
diff --git a/ingestion/src/metadata/ingestion/api/topology_runner.py b/ingestion/src/metadata/ingestion/api/topology_runner.py
index 2e3c52a94db4..398043cec204 100644
--- a/ingestion/src/metadata/ingestion/api/topology_runner.py
+++ b/ingestion/src/metadata/ingestion/api/topology_runner.py
@@ -265,11 +265,12 @@ def yield_and_update_context(
if entity:
same_fingerprint = True
- create_entity_request_hash = generate_source_hash(
- create_request=entity_request.right,
- )
+ create_entity_request_hash = None
if hasattr(entity_request.right, "sourceHash"):
+ create_entity_request_hash = generate_source_hash(
+ create_request=entity_request.right,
+ )
entity_request.right.sourceHash = create_entity_request_hash
if entity is None and stage.use_cache:
diff --git a/ingestion/src/metadata/ingestion/lineage/parser.py b/ingestion/src/metadata/ingestion/lineage/parser.py
index 5d855784523d..368dd738abab 100644
--- a/ingestion/src/metadata/ingestion/lineage/parser.py
+++ b/ingestion/src/metadata/ingestion/lineage/parser.py
@@ -217,6 +217,11 @@ def get_comparison_elements(
"""
aliases = self.table_aliases
values = identifier.value.split(".")
+
+ if len(values) > 4:
+ logger.debug(f"Invalid comparison element from identifier: {identifier}")
+ return None, None
+
database_name, schema_name, table_or_alias, column_name = (
[None] * (4 - len(values))
) + values
@@ -307,29 +312,39 @@ def stateful_add_joins_from_statement(
comparisons.append(sub)
for comparison in comparisons:
- if "." not in comparison.left.value or "." not in comparison.right.value:
- logger.debug(f"Ignoring comparison {comparison}")
- continue
-
- table_left, column_left = self.get_comparison_elements(
- identifier=comparison.left
- )
- table_right, column_right = self.get_comparison_elements(
- identifier=comparison.right
- )
+ try:
+ if (
+ "." not in comparison.left.value
+ or "." not in comparison.right.value
+ ):
+ logger.debug(f"Ignoring comparison {comparison}")
+ continue
+
+ table_left, column_left = self.get_comparison_elements(
+ identifier=comparison.left
+ )
+ table_right, column_right = self.get_comparison_elements(
+ identifier=comparison.right
+ )
- if not table_left or not table_right:
- logger.warning(f"Cannot find ingredients from {comparison}")
- continue
+ if not table_left or not table_right:
+ logger.warning(
+ f"Can't extract table names when parsing JOIN information from {comparison}"
+ )
+ logger.debug(f"Query: {sql_statement}")
+ continue
- left_table_column = TableColumn(table=table_left, column=column_left)
- right_table_column = TableColumn(table=table_right, column=column_right)
+ left_table_column = TableColumn(table=table_left, column=column_left)
+ right_table_column = TableColumn(table=table_right, column=column_right)
- # We just send the info once, from Left -> Right.
- # The backend will prepare the symmetric information.
- self.stateful_add_table_joins(
- join_data, left_table_column, right_table_column
- )
+ # We just send the info once, from Left -> Right.
+ # The backend will prepare the symmetric information.
+ self.stateful_add_table_joins(
+ join_data, left_table_column, right_table_column
+ )
+ except Exception as exc:
+ logger.debug(f"Cannot process comparison {comparison}: {exc}")
+ logger.debug(traceback.format_exc())
@cached_property
def table_joins(self) -> Dict[str, List[TableColumnJoin]]:
diff --git a/ingestion/src/metadata/ingestion/models/custom_properties.py b/ingestion/src/metadata/ingestion/models/custom_properties.py
index 6a62c1dbc053..c287f64f122c 100644
--- a/ingestion/src/metadata/ingestion/models/custom_properties.py
+++ b/ingestion/src/metadata/ingestion/models/custom_properties.py
@@ -41,7 +41,6 @@ class CustomPropertyDataTypes(Enum):
class OMetaCustomProperties(BaseModel):
entity_type: Type[T]
- custom_property_type: Optional[CustomPropertyDataTypes]
createCustomPropertyRequest: CreateCustomPropertyRequest
diff --git a/ingestion/src/metadata/ingestion/models/patch_request.py b/ingestion/src/metadata/ingestion/models/patch_request.py
index 513ea5be3ac6..862c412b432e 100644
--- a/ingestion/src/metadata/ingestion/models/patch_request.py
+++ b/ingestion/src/metadata/ingestion/models/patch_request.py
@@ -19,6 +19,7 @@
from metadata.ingestion.api.models import Entity, T
from metadata.ingestion.ometa.mixins.patch_mixin_utils import PatchOperation
+from metadata.ingestion.ometa.utils import model_str
class PatchRequest(BaseModel):
@@ -138,12 +139,15 @@ class PatchedEntity(BaseModel):
RESTRICT_UPDATE_LIST = ["description", "tags", "owner"]
+ARRAY_ENTITY_FIELDS = ["columns", "tasks", "fields"]
+
def build_patch(
source: T,
destination: T,
allowed_fields: Optional[Dict] = None,
restrict_update_fields: Optional[List] = None,
+ array_entity_fields: Optional[List] = None,
) -> Optional[jsonpatch.JsonPatch]:
"""
Given an Entity type and Source entity and Destination entity,
@@ -163,6 +167,13 @@ def build_patch(
source = _remove_change_description(source)
destination = _remove_change_description(destination)
+ if array_entity_fields:
+ _sort_array_entity_fields(
+ source=source,
+ destination=destination,
+ array_entity_fields=array_entity_fields,
+ )
+
# Get the difference between source and destination
if allowed_fields:
patch = jsonpatch.make_patch(
@@ -192,20 +203,61 @@ def build_patch(
# for a user editable fields like descriptions, tags we only want to support "add" operation in patch
# we will remove the other operations for replace, remove from here
if restrict_update_fields:
- patch.patch = [
- patch_ops
- for patch_ops in patch.patch
+ patch_ops_list = []
+ for patch_ops in patch.patch or []:
if _determine_restricted_operation(
- patch_ops=patch_ops,
- restrict_update_fields=restrict_update_fields,
- )
- ]
-
+ patch_ops=patch_ops, restrict_update_fields=restrict_update_fields
+ ):
+ if (
+ patch_ops.get("op") == PatchOperation.REPLACE.value
+ and patch_ops.get("value") is None
+ ):
+ patch_ops["op"] = PatchOperation.REMOVE.value
+ del patch_ops["value"]
+ patch_ops_list.append(patch_ops)
+ patch.patch = patch_ops_list
return patch
+def _sort_array_entity_fields(
+ source: T,
+ destination: T,
+ array_entity_fields: Optional[List] = None,
+):
+ """
+ Sort the array entity fields to make sure the order is consistent
+ """
+ for field in array_entity_fields or []:
+ if hasattr(destination, field) and hasattr(source, field):
+ destination_attributes = getattr(destination, field)
+ source_attributes = getattr(source, field)
+
+ # Create a dictionary of destination attributes for easy lookup
+ destination_dict = {
+ model_str(attr.name): attr for attr in destination_attributes
+ }
+
+ updated_attributes = []
+ for source_attr in source_attributes or []:
+ # Update the destination attribute with the source attribute
+ destination_attr = destination_dict.get(model_str(source_attr.name))
+ if destination_attr:
+ updated_attributes.append(
+ source_attr.copy(update=destination_attr.__dict__)
+ )
+ # Remove the updated attribute from the destination dictionary
+ del destination_dict[model_str(source_attr.name)]
+ else:
+ updated_attributes.append(None)
+
+ # Combine the updated attributes with the remaining destination attributes
+ final_attributes = updated_attributes + list(destination_dict.values())
+ setattr(destination, field, final_attributes)
+
+
def _determine_restricted_operation(
- patch_ops: Dict, restrict_update_fields: Optional[List] = None
+ patch_ops: Dict,
+ restrict_update_fields: Optional[List] = None,
) -> bool:
"""
Only retain add operation for restrict_update_fields fields
diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/custom_property_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/custom_property_mixin.py
index a60aea192466..5b0fe01115c0 100644
--- a/ingestion/src/metadata/ingestion/ometa/mixins/custom_property_mixin.py
+++ b/ingestion/src/metadata/ingestion/ometa/mixins/custom_property_mixin.py
@@ -15,7 +15,8 @@
"""
from typing import Dict
-from metadata.generated.schema.api.data.createCustomProperty import PropertyType
+from metadata.generated.schema.type.customProperty import PropertyType
+from metadata.generated.schema.type.entityReference import EntityReference
from metadata.ingestion.models.custom_properties import (
CustomPropertyDataTypes,
CustomPropertyType,
@@ -54,16 +55,6 @@ def create_or_update_custom_property(
f"/metadata/types/name/{entity_type}?category=field"
)
- # Get the data type of the custom property
- if not ometa_custom_property.createCustomPropertyRequest.propertyType:
- custom_property_type = self.get_custom_property_type(
- data_type=ometa_custom_property.custom_property_type
- )
- property_type = PropertyType(id=custom_property_type.id, type="type")
- ometa_custom_property.createCustomPropertyRequest.propertyType = (
- property_type
- )
-
resp = self.client.put(
f"/metadata/types/{entity_schema.get('id')}",
data=ometa_custom_property.createCustomPropertyRequest.json(),
@@ -78,3 +69,12 @@ def get_custom_property_type(
"""
resp = self.client.get(f"/metadata/types/name/{data_type.value}?category=field")
return CustomPropertyType(**resp)
+
+ def get_property_type_ref(self, data_type: CustomPropertyDataTypes) -> PropertyType:
+ """
+ Get the PropertyType for custom properties
+ """
+ custom_property_type = self.get_custom_property_type(data_type=data_type)
+ return PropertyType(
+ __root__=EntityReference(id=custom_property_type.id, type="type")
+ )
diff --git a/ingestion/src/metadata/ingestion/ometa/mixins/patch_mixin.py b/ingestion/src/metadata/ingestion/ometa/mixins/patch_mixin.py
index 3e627597d8f0..542861294825 100644
--- a/ingestion/src/metadata/ingestion/ometa/mixins/patch_mixin.py
+++ b/ingestion/src/metadata/ingestion/ometa/mixins/patch_mixin.py
@@ -46,7 +46,7 @@
)
from metadata.ingestion.ometa.utils import model_str
from metadata.utils.deprecation import deprecated
-from metadata.utils.logger import ometa_logger
+from metadata.utils.logger import get_log_name, ometa_logger
logger = ometa_logger()
@@ -119,6 +119,7 @@ def patch(
destination: T,
allowed_fields: Optional[Dict] = None,
restrict_update_fields: Optional[List] = None,
+ array_entity_fields: Optional[List] = None,
) -> Optional[T]:
"""
Given an Entity type and Source entity and Destination entity,
@@ -140,6 +141,7 @@ def patch(
destination=destination,
allowed_fields=allowed_fields,
restrict_update_fields=restrict_update_fields,
+ array_entity_fields=array_entity_fields,
)
if not patch:
@@ -153,9 +155,7 @@ def patch(
except Exception as exc:
logger.debug(traceback.format_exc())
- logger.error(
- f"Error trying to PATCH {entity.__name__} [{source.id.__root__}]: {exc}"
- )
+ logger.error(f"Error trying to PATCH {get_log_name(source)}: {exc}")
return None
diff --git a/ingestion/src/metadata/ingestion/ometa/routes.py b/ingestion/src/metadata/ingestion/ometa/routes.py
index f625459a179f..c92dda4fc3d5 100644
--- a/ingestion/src/metadata/ingestion/ometa/routes.py
+++ b/ingestion/src/metadata/ingestion/ometa/routes.py
@@ -90,6 +90,16 @@
from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest
from metadata.generated.schema.dataInsight.dataInsightChart import DataInsightChart
from metadata.generated.schema.dataInsight.kpi.kpi import Kpi
+from metadata.generated.schema.entity.applications.app import App
+from metadata.generated.schema.entity.applications.createAppRequest import (
+ CreateAppRequest,
+)
+from metadata.generated.schema.entity.applications.marketplace.appMarketPlaceDefinition import (
+ AppMarketPlaceDefinition,
+)
+from metadata.generated.schema.entity.applications.marketplace.createAppMarketPlaceDefinitionReq import (
+ CreateAppMarketPlaceDefinitionRequest,
+)
from metadata.generated.schema.entity.automations.workflow import Workflow
from metadata.generated.schema.entity.bot import Bot
from metadata.generated.schema.entity.classification.classification import (
@@ -232,4 +242,9 @@
# Suggestions
Suggestion.__name__: "/suggestions",
CreateSuggestionRequest.__name__: "/suggestions",
+ # Apps
+ App.__name__: "/apps",
+ CreateAppRequest.__name__: "/apps",
+ AppMarketPlaceDefinition.__name__: "/apps/marketplace",
+ CreateAppMarketPlaceDefinitionRequest.__name__: "/apps/marketplace",
}
diff --git a/ingestion/src/metadata/ingestion/sink/metadata_rest.py b/ingestion/src/metadata/ingestion/sink/metadata_rest.py
index cdff7d632dc8..e6d6e3c2014e 100644
--- a/ingestion/src/metadata/ingestion/sink/metadata_rest.py
+++ b/ingestion/src/metadata/ingestion/sink/metadata_rest.py
@@ -62,6 +62,7 @@
from metadata.ingestion.models.ometa_topic_data import OMetaTopicSampleData
from metadata.ingestion.models.patch_request import (
ALLOWED_COMMON_PATCH_FIELDS,
+ ARRAY_ENTITY_FIELDS,
RESTRICT_UPDATE_LIST,
PatchedEntity,
PatchRequest,
@@ -179,6 +180,7 @@ def patch_entity(self, record: PatchRequest) -> Either[Entity]:
destination=record.new_entity,
allowed_fields=ALLOWED_COMMON_PATCH_FIELDS,
restrict_update_fields=RESTRICT_UPDATE_LIST,
+ array_entity_fields=ARRAY_ENTITY_FIELDS,
)
patched_entity = PatchedEntity(new_entity=entity) if entity else None
return Either(right=patched_entity)
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/looker/utils.py b/ingestion/src/metadata/ingestion/source/dashboard/looker/utils.py
index a2e1656992d0..d4b237f75464 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/looker/utils.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/looker/utils.py
@@ -54,14 +54,16 @@ def _clone_repo(
return
url = None
+ allow_unsafe_protocols = False
if isinstance(credential, GitHubCredentials):
url = f"https://x-oauth-basic:{credential.token.__root__.get_secret_value()}@github.com/{repo_name}.git"
elif isinstance(credential, BitBucketCredentials):
- url = f"https://x-token-auth::{credential.token.__root__.get_secret_value()}@bitbucket.or/{repo_name}.git"
+ url = f"https://x-token-auth:{credential.token.__root__.get_secret_value()}@bitbucket.org/{repo_name}.git"
+ allow_unsafe_protocols = True
assert url is not None
- Repo.clone_from(url, path)
+ Repo.clone_from(url, path, allow_unsafe_protocols=allow_unsafe_protocols)
logger.info(f"repo {repo_name} cloned to {path}")
except Exception as exc:
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/metabase/client.py b/ingestion/src/metadata/ingestion/source/dashboard/metabase/client.py
index dad59df5e4a6..9a0bf81d83fd 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/metabase/client.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/metabase/client.py
@@ -124,6 +124,10 @@ def get_dashboard_details(
try:
resp_dashboard = self.client.get(f"/dashboard/{dashboard_id}")
if resp_dashboard:
+ # Small hack needed to support Metabase versions older than 0.48
+ # https://www.metabase.com/releases/metabase-48#fyi--breaking-changes
+ if "ordered_cards" in resp_dashboard:
+ resp_dashboard["dashcards"] = resp_dashboard["ordered_cards"]
return MetabaseDashboardDetails(**resp_dashboard)
except Exception:
logger.debug(traceback.format_exc())
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/metabase/metadata.py b/ingestion/src/metadata/ingestion/source/dashboard/metabase/metadata.py
index 677d883acec4..7ed7020bf276 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/metabase/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/metabase/metadata.py
@@ -178,7 +178,7 @@ def yield_dashboard_chart(
Returns:
Iterable[CreateChartRequest]
"""
- charts = dashboard_details.ordered_cards
+ charts = dashboard_details.dashcards
for chart in charts:
try:
chart_details = chart.card
@@ -225,7 +225,7 @@ def yield_dashboard_lineage_details(
if not db_service_name:
return
chart_list, dashboard_name = (
- dashboard_details.ordered_cards,
+ dashboard_details.dashcards,
str(dashboard_details.id),
)
for chart in chart_list:
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/metabase/models.py b/ingestion/src/metadata/ingestion/source/dashboard/metabase/models.py
index 7605ed31574e..fce0d030adc7 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/metabase/models.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/metabase/models.py
@@ -67,7 +67,7 @@ class MetabaseChart(BaseModel):
display: Optional[str]
-class OrderedCard(BaseModel):
+class DashCard(BaseModel):
card: MetabaseChart
@@ -77,7 +77,7 @@ class MetabaseDashboardDetails(BaseModel):
"""
description: Optional[str]
- ordered_cards: List[OrderedCard]
+ dashcards: List[DashCard]
name: Optional[str]
id: int
collection_id: Optional[str]
diff --git a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/client.py b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/client.py
index 900147135924..e72cead1d795 100644
--- a/ingestion/src/metadata/ingestion/source/dashboard/powerbi/client.py
+++ b/ingestion/src/metadata/ingestion/source/dashboard/powerbi/client.py
@@ -19,6 +19,9 @@
import msal
+from metadata.generated.schema.entity.services.connections.dashboard.powerBIConnection import (
+ PowerBIConnection,
+)
from metadata.ingestion.api.steps import InvalidSourceException
from metadata.ingestion.ometa.client import REST, ClientConfig
from metadata.ingestion.source.dashboard.powerbi.models import (
@@ -52,7 +55,7 @@ class PowerBiApiClient:
client: REST
- def __init__(self, config):
+ def __init__(self, config: PowerBIConnection):
self.config = config
self.msal_client = msal.ConfidentialClientApplication(
client_id=self.config.clientId,
diff --git a/ingestion/src/metadata/ingestion/source/database/azuresql/connection.py b/ingestion/src/metadata/ingestion/source/database/azuresql/connection.py
index 1cb201d7b099..9fd23b2fa19e 100644
--- a/ingestion/src/metadata/ingestion/source/database/azuresql/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/azuresql/connection.py
@@ -15,12 +15,13 @@
from typing import Optional, Union
from urllib.parse import quote_plus
-from sqlalchemy.engine import Engine
+from sqlalchemy.engine import URL, Engine
from metadata.generated.schema.entity.automations.workflow import (
Workflow as AutomationWorkflow,
)
from metadata.generated.schema.entity.services.connections.database.azureSQLConnection import (
+ Authentication,
AzureSQLConnection,
)
from metadata.generated.schema.entity.services.connections.database.mssqlConnection import (
@@ -40,13 +41,29 @@ def get_connection_url(connection: Union[AzureSQLConnection, MssqlConnection]) -
Build the connection URL
"""
+ if connection.authenticationMode:
+ connection_string = f"Driver={connection.driver};Server={connection.hostPort};Database={connection.database};"
+ connection_string += f"Uid={connection.username};"
+ if (
+ connection.authenticationMode.authentication
+ == Authentication.ActiveDirectoryPassword
+ ):
+ connection_string += f"Pwd={connection.password.get_secret_value()};"
+
+ connection_string += f"Encrypt={'yes' if connection.authenticationMode.encrypt else 'no'};TrustServerCertificate={'yes' if connection.authenticationMode.trustServerCertificate else 'no'};"
+ connection_string += f"Connection Timeout={connection.authenticationMode.connectionTimeout or 30};Authentication={connection.authenticationMode.authentication.value};"
+
+ connection_url = URL.create(
+ "mssql+pyodbc", query={"odbc_connect": connection_string}
+ )
+ return connection_url
url = f"{connection.scheme.value}://"
if connection.username:
url += f"{quote_plus(connection.username)}"
url += (
f":{quote_plus(connection.password.get_secret_value())}"
- if connection
+ if connection.password
else ""
)
url += "@"
@@ -54,12 +71,13 @@ def get_connection_url(connection: Union[AzureSQLConnection, MssqlConnection]) -
url += f"{connection.hostPort}"
url += f"/{quote_plus(connection.database)}" if connection.database else ""
url += f"?driver={quote_plus(connection.driver)}"
+
options = get_connection_options_dict(connection)
if options:
if not connection.database:
url += "/"
params = "&".join(
- f"{key}={quote_plus(value)}" for (key, value) in options.items() if value
+ f"{key}={quote_plus(value)}" for key, value in options.items() if value
)
url = f"{url}?{params}"
diff --git a/ingestion/src/metadata/ingestion/source/database/bigquery/metadata.py b/ingestion/src/metadata/ingestion/source/database/bigquery/metadata.py
index 28fc26659481..ab090e6e6216 100644
--- a/ingestion/src/metadata/ingestion/source/database/bigquery/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/database/bigquery/metadata.py
@@ -251,7 +251,9 @@ def _test_connection(self) -> None:
test_connection_fn(
self.metadata, inspector_details.engine, self.service_connection
)
- if os.environ[GOOGLE_CREDENTIALS]:
+ # GOOGLE_CREDENTIALS may not have been set,
+ # to avoid key error, we use `get` for dict
+ if os.environ.get(GOOGLE_CREDENTIALS):
self.temp_credentials_file_path.append(os.environ[GOOGLE_CREDENTIALS])
def query_table_names_and_types(
@@ -442,7 +444,8 @@ def set_inspector(self, database_name: str):
inspector_details = get_inspector_details(
database_name=database_name, service_connection=self.service_connection
)
- self.temp_credentials_file_path.append(os.environ[GOOGLE_CREDENTIALS])
+ if os.environ.get(GOOGLE_CREDENTIALS):
+ self.temp_credentials_file_path.append(os.environ[GOOGLE_CREDENTIALS])
self.client = inspector_details.client
self.engine = inspector_details.engine
self.inspector = inspector_details.inspector
diff --git a/ingestion/src/metadata/ingestion/source/database/datalake/connection.py b/ingestion/src/metadata/ingestion/source/database/datalake/connection.py
index 56e5315da5ba..c5be85eb5a13 100644
--- a/ingestion/src/metadata/ingestion/source/database/datalake/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/datalake/connection.py
@@ -20,6 +20,7 @@
from google.cloud import storage
+from metadata.clients.azure_client import AzureClient
from metadata.generated.schema.entity.automations.workflow import (
Workflow as AutomationWorkflow,
)
@@ -88,22 +89,9 @@ def _(config: GCSConfig):
@get_datalake_client.register
def _(config: AzureConfig):
- from azure.identity import ClientSecretCredential
- from azure.storage.blob import BlobServiceClient
try:
- credentials = ClientSecretCredential(
- config.securityConfig.tenantId,
- config.securityConfig.clientId,
- config.securityConfig.clientSecret.get_secret_value(),
- )
-
- azure_client = BlobServiceClient(
- f"https://{config.securityConfig.accountName}.blob.core.windows.net/",
- credential=credentials,
- )
- return azure_client
-
+ return AzureClient(config.securityConfig).create_blob_client()
except Exception as exc:
raise RuntimeError(
f"Unknown error connecting with {config.securityConfig}: {exc}."
diff --git a/ingestion/src/metadata/ingestion/source/database/datalake/metadata.py b/ingestion/src/metadata/ingestion/source/database/datalake/metadata.py
index a32a2a9aa7ec..727898801832 100644
--- a/ingestion/src/metadata/ingestion/source/database/datalake/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/database/datalake/metadata.py
@@ -407,7 +407,7 @@ def yield_table(
schema_name = self.context.database_schema
try:
table_constraints = None
- data_frame = fetch_dataframe(
+ data_frame, raw_data = fetch_dataframe(
config_source=self.config_source,
client=self.client,
file_fqn=DatalakeTableSchemaWrapper(
@@ -415,10 +415,11 @@ def yield_table(
bucket_name=schema_name,
file_extension=table_extension,
),
+ fetch_raw_data=True,
)
if data_frame:
column_parser = DataFrameColumnParser.create(
- data_frame[0], table_extension
+ data_frame[0], table_extension, raw_data=raw_data
)
columns = column_parser.get_columns()
else:
diff --git a/ingestion/src/metadata/ingestion/source/database/dbt/dbt_config.py b/ingestion/src/metadata/ingestion/source/database/dbt/dbt_config.py
index c130494c73c6..79e9bc1ec2e8 100644
--- a/ingestion/src/metadata/ingestion/source/database/dbt/dbt_config.py
+++ b/ingestion/src/metadata/ingestion/source/database/dbt/dbt_config.py
@@ -20,6 +20,7 @@
import requests
from metadata.clients.aws_client import AWSClient
+from metadata.clients.azure_client import AzureClient
from metadata.generated.schema.metadataIngestion.dbtconfig.dbtAzureConfig import (
DbtAzureConfig,
)
@@ -172,7 +173,7 @@ def _(config: DbtCloudConfig): # pylint: disable=too-many-locals
params_data["job_definition_id"] = job_id
response = client.get(f"/accounts/{account_id}/runs", data=params_data)
- if not response and not response.get("data"):
+ if not response or not response.get("data"):
raise DBTConfigException(
"Unable to get the dbt job runs information.\n"
"Please check if the auth token is correct and has the necessary scopes to fetch dbt runs"
@@ -357,21 +358,8 @@ def _(config: DbtGcsConfig):
def _(config: DbtAzureConfig):
try:
bucket_name, prefix = get_dbt_prefix_config(config)
- from azure.identity import ( # pylint: disable=import-outside-toplevel
- ClientSecretCredential,
- )
- from azure.storage.blob import ( # pylint: disable=import-outside-toplevel
- BlobServiceClient,
- )
- client = BlobServiceClient(
- f"https://{config.dbtSecurityConfig.accountName}.blob.core.windows.net/",
- credential=ClientSecretCredential(
- config.dbtSecurityConfig.tenantId,
- config.dbtSecurityConfig.clientId,
- config.dbtSecurityConfig.clientSecret.get_secret_value(),
- ),
- )
+ client = AzureClient(config.dbtSecurityConfig).create_blob_client()
if not bucket_name:
container_dicts = client.list_containers()
diff --git a/ingestion/src/metadata/ingestion/source/database/mssql/lineage.py b/ingestion/src/metadata/ingestion/source/database/mssql/lineage.py
index 0f22eddf2993..4a112563d354 100644
--- a/ingestion/src/metadata/ingestion/source/database/mssql/lineage.py
+++ b/ingestion/src/metadata/ingestion/source/database/mssql/lineage.py
@@ -27,5 +27,6 @@ class MssqlLineageSource(MssqlQueryParserSource, LineageSource):
OR lower(t.text) LIKE '%%merge%%'
)
AND lower(t.text) NOT LIKE '%%create%%procedure%%'
+ AND lower(t.text) NOT LIKE '%%create%%function%%'
AND lower(t.text) NOT LIKE '%%declare%%'
"""
diff --git a/ingestion/src/metadata/ingestion/source/database/mssql/usage.py b/ingestion/src/metadata/ingestion/source/database/mssql/usage.py
index acb131e931f5..5a9aabee2a95 100644
--- a/ingestion/src/metadata/ingestion/source/database/mssql/usage.py
+++ b/ingestion/src/metadata/ingestion/source/database/mssql/usage.py
@@ -19,4 +19,8 @@
class MssqlUsageSource(MssqlQueryParserSource, UsageSource):
sql_stmt = MSSQL_SQL_STATEMENT
- filters = "" # No filtering in the queries
+ filters = """
+ AND lower(t.text) NOT LIKE '%%create%%procedure%%'
+ AND lower(t.text) NOT LIKE '%%create%%function%%'
+ AND lower(t.text) NOT LIKE '%%declare%%'
+ """
diff --git a/ingestion/src/metadata/ingestion/source/database/mysql/connection.py b/ingestion/src/metadata/ingestion/source/database/mysql/connection.py
index dca28eefbaef..f5e6e61d5f40 100644
--- a/ingestion/src/metadata/ingestion/source/database/mysql/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/mysql/connection.py
@@ -16,9 +16,13 @@
from sqlalchemy.engine import Engine
+from metadata.clients.azure_client import AzureClient
from metadata.generated.schema.entity.automations.workflow import (
Workflow as AutomationWorkflow,
)
+from metadata.generated.schema.entity.services.connections.database.common.basicAuth import (
+ BasicAuth,
+)
from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
MysqlConnection,
)
@@ -38,6 +42,16 @@ def get_connection(connection: MysqlConnection) -> Engine:
"""
Create connection
"""
+ if hasattr(connection.authType, "azureConfig"):
+ azure_client = AzureClient(connection.authType.azureConfig).create_client()
+ if not connection.authType.azureConfig.scopes:
+ raise ValueError(
+ "Azure Scopes are missing, please refer https://learn.microsoft.com/en-gb/azure/mysql/flexible-server/how-to-azure-ad#2---retrieve-microsoft-entra-access-token and fetch the resource associated with it, for e.g. https://ossrdbms-aad.database.windows.net/.default"
+ )
+ access_token_obj = azure_client.get_token(
+ *connection.authType.azureConfig.scopes.split(",")
+ )
+ connection.authType = BasicAuth(password=access_token_obj.token)
if connection.sslCA or connection.sslCert or connection.sslKey:
if not connection.connectionOptions:
connection.connectionOptions = init_empty_connection_options()
diff --git a/ingestion/src/metadata/ingestion/source/database/oracle/connection.py b/ingestion/src/metadata/ingestion/source/database/oracle/connection.py
index 5a98d7a5197e..324aa5def4f7 100644
--- a/ingestion/src/metadata/ingestion/source/database/oracle/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/oracle/connection.py
@@ -38,6 +38,7 @@
)
from metadata.ingestion.connections.test_connections import test_connection_db_common
from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.ingestion.source.database.oracle.queries import CHECK_ACCESS_TO_DBA
from metadata.utils.logger import ingestion_logger
CX_ORACLE_LIB_VERSION = "8.3.0"
@@ -136,9 +137,13 @@ def test_connection(
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
+
+ test_conn_queries = {"CheckAccess": CHECK_ACCESS_TO_DBA}
+
test_connection_db_common(
metadata=metadata,
engine=engine,
service_connection=service_connection,
automation_workflow=automation_workflow,
+ queries=test_conn_queries,
)
diff --git a/ingestion/src/metadata/ingestion/source/database/oracle/queries.py b/ingestion/src/metadata/ingestion/source/database/oracle/queries.py
index d69be9a2d137..3540946aa2e2 100644
--- a/ingestion/src/metadata/ingestion/source/database/oracle/queries.py
+++ b/ingestion/src/metadata/ingestion/source/database/oracle/queries.py
@@ -88,7 +88,7 @@
type = 'PROCEDURE' and owner = '{schema}'
"""
)
-
+CHECK_ACCESS_TO_DBA = "SELECT table_name FROM DBA_TABLES where ROWNUM < 2"
ORACLE_GET_STORED_PROCEDURE_QUERIES = textwrap.dedent(
"""
WITH SP_HISTORY AS (SELECT
diff --git a/ingestion/src/metadata/ingestion/source/database/postgres/connection.py b/ingestion/src/metadata/ingestion/source/database/postgres/connection.py
index 2b34896cd0e2..3427fdb0e5f1 100644
--- a/ingestion/src/metadata/ingestion/source/database/postgres/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/postgres/connection.py
@@ -17,9 +17,13 @@
from sqlalchemy.engine import Engine
+from metadata.clients.azure_client import AzureClient
from metadata.generated.schema.entity.automations.workflow import (
Workflow as AutomationWorkflow,
)
+from metadata.generated.schema.entity.services.connections.database.common.basicAuth import (
+ BasicAuth,
+)
from metadata.generated.schema.entity.services.connections.database.postgresConnection import (
PostgresConnection,
SslMode,
@@ -46,6 +50,17 @@ def get_connection(connection: PostgresConnection) -> Engine:
"""
Create connection
"""
+
+ if hasattr(connection.authType, "azureConfig"):
+ azure_client = AzureClient(connection.authType.azureConfig).create_client()
+ if not connection.authType.azureConfig.scopes:
+ raise ValueError(
+ "Azure Scopes are missing, please refer https://learn.microsoft.com/en-gb/azure/postgresql/flexible-server/how-to-configure-sign-in-azure-ad-authentication#retrieve-the-microsoft-entra-access-token and fetch the resource associated with it, for e.g. https://ossrdbms-aad.database.windows.net/.default"
+ )
+ access_token_obj = azure_client.get_token(
+ *connection.authType.azureConfig.scopes.split(",")
+ )
+ connection.authType = BasicAuth(password=access_token_obj.token)
if connection.sslMode:
if not connection.connectionArguments:
connection.connectionArguments = init_empty_connection_arguments()
diff --git a/ingestion/src/metadata/ingestion/source/database/stored_procedures_mixin.py b/ingestion/src/metadata/ingestion/source/database/stored_procedures_mixin.py
index 199f01d3efbf..057fdf5aee74 100644
--- a/ingestion/src/metadata/ingestion/source/database/stored_procedures_mixin.py
+++ b/ingestion/src/metadata/ingestion/source/database/stored_procedures_mixin.py
@@ -138,7 +138,7 @@ def is_lineage_query(query_type: str, query_text: str) -> bool:
return True
if query_type == "INSERT" and re.search(
- "^.*insert.*into.*select.*$", query_text, re.IGNORECASE
+ "^.*insert.*into.*select.*$", query_text.replace("\n", " "), re.IGNORECASE
):
return True
diff --git a/ingestion/src/metadata/ingestion/source/database/unitycatalog/connection.py b/ingestion/src/metadata/ingestion/source/database/unitycatalog/connection.py
index 1035a0fc398a..2ba24d3ac328 100644
--- a/ingestion/src/metadata/ingestion/source/database/unitycatalog/connection.py
+++ b/ingestion/src/metadata/ingestion/source/database/unitycatalog/connection.py
@@ -68,16 +68,20 @@ def get_catalogs(connection: WorkspaceClient, table_obj: DatabricksTable):
break
def get_schemas(connection: WorkspaceClient, table_obj: DatabricksTable):
- for schema in connection.schemas.list(catalog_name=table_obj.catalog_name):
- table_obj.schema_name = schema.name
- break
+ for catalog in connection.catalogs.list():
+ for schema in connection.schemas.list(catalog_name=catalog.name):
+ if schema.name:
+ table_obj.schema_name = schema.name
+ table_obj.catalog_name = catalog.name
+ return
def get_tables(connection: WorkspaceClient, table_obj: DatabricksTable):
- for table in connection.tables.list(
- catalog_name=table_obj.catalog_name, schema_name=table_obj.schema_name
- ):
- table_obj.name = table.name
- break
+ if table_obj.catalog_name and table_obj.schema_name:
+ for table in connection.tables.list(
+ catalog_name=table_obj.catalog_name, schema_name=table_obj.schema_name
+ ):
+ table_obj.name = table.name
+ break
test_fn = {
"CheckAccess": connection.catalogs.list,
diff --git a/ingestion/src/metadata/ingestion/source/pipeline/dagster/metadata.py b/ingestion/src/metadata/ingestion/source/pipeline/dagster/metadata.py
index eeec4d1d5c5f..5fb0d36793d4 100644
--- a/ingestion/src/metadata/ingestion/source/pipeline/dagster/metadata.py
+++ b/ingestion/src/metadata/ingestion/source/pipeline/dagster/metadata.py
@@ -214,7 +214,9 @@ def yield_pipeline_status(
service_name=self.context.pipeline_service,
pipeline_name=self.context.pipeline,
)
- pipeline_entity = self.metadata.get_by_name(entity=Pipeline, fqn=pipeline_fqn)
+ pipeline_entity = self.metadata.get_by_name(
+ entity=Pipeline, fqn=pipeline_fqn, fields=["tasks"]
+ )
for task in pipeline_entity.tasks or []:
try:
runs = self.client.get_task_runs(
diff --git a/ingestion/src/metadata/ingestion/source/storage/storage_service.py b/ingestion/src/metadata/ingestion/source/storage/storage_service.py
index 2c3f9a4dad18..6b3e2eba33df 100644
--- a/ingestion/src/metadata/ingestion/source/storage/storage_service.py
+++ b/ingestion/src/metadata/ingestion/source/storage/storage_service.py
@@ -260,7 +260,7 @@ def extract_column_definitions(
metadata_entry: MetadataEntry,
) -> List[Column]:
"""Extract Column related metadata from s3"""
- data_structure_details = fetch_dataframe(
+ data_structure_details, raw_data = fetch_dataframe(
config_source=config_source,
client=client,
file_fqn=DatalakeTableSchemaWrapper(
@@ -269,10 +269,13 @@ def extract_column_definitions(
file_extension=SupportedTypes(metadata_entry.structureFormat),
separator=metadata_entry.separator,
),
+ fetch_raw_data=True,
)
columns = []
column_parser = DataFrameColumnParser.create(
- data_structure_details, SupportedTypes(metadata_entry.structureFormat)
+ data_structure_details,
+ SupportedTypes(metadata_entry.structureFormat),
+ raw_data=raw_data,
)
columns = column_parser.get_columns()
return columns
diff --git a/ingestion/src/metadata/parsers/json_schema_parser.py b/ingestion/src/metadata/parsers/json_schema_parser.py
index 818fc27fbce3..f56edce4fc61 100644
--- a/ingestion/src/metadata/parsers/json_schema_parser.py
+++ b/ingestion/src/metadata/parsers/json_schema_parser.py
@@ -18,6 +18,8 @@
from enum import Enum
from typing import List, Optional
+from pydantic.main import ModelMetaclass
+
from metadata.generated.schema.type.schema import FieldModel
from metadata.utils.logger import ingestion_logger
@@ -36,20 +38,25 @@ class JsonSchemaDataTypes(Enum):
NULL = "null"
RECORD = "object"
ARRAY = "array"
+ UNKNOWN = "unknown"
-def parse_json_schema(schema_text: str) -> Optional[List[FieldModel]]:
+def parse_json_schema(
+ schema_text: str, cls: ModelMetaclass = FieldModel
+) -> Optional[List[FieldModel]]:
"""
Method to parse the jsonschema
"""
try:
json_schema_data = json.loads(schema_text)
field_models = [
- FieldModel(
+ cls(
name=json_schema_data.get("title", "default"),
dataType=JsonSchemaDataTypes(json_schema_data.get("type")).name,
description=json_schema_data.get("description"),
- children=get_json_schema_fields(json_schema_data.get("properties")),
+ children=get_json_schema_fields(
+ json_schema_data.get("properties", {}), cls=cls
+ ),
)
]
return field_models
@@ -59,7 +66,9 @@ def parse_json_schema(schema_text: str) -> Optional[List[FieldModel]]:
return None
-def get_json_schema_fields(properties) -> Optional[List[FieldModel]]:
+def get_json_schema_fields(
+ properties, cls: ModelMetaclass = FieldModel
+) -> Optional[List[FieldModel]]:
"""
Recursively convert the parsed schema into required models
"""
@@ -67,9 +76,10 @@ def get_json_schema_fields(properties) -> Optional[List[FieldModel]]:
for key, value in properties.items():
try:
field_models.append(
- FieldModel(
- name=value.get("title", key),
- dataType=JsonSchemaDataTypes(value.get("type")).name,
+ cls(
+ name=key,
+ displayName=value.get("title"),
+ dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
description=value.get("description"),
children=get_json_schema_fields(value.get("properties"))
if value.get("type") == "object"
diff --git a/ingestion/src/metadata/pii/scanners/ner_scanner.py b/ingestion/src/metadata/pii/scanners/ner_scanner.py
index 6ce29a0740fa..c177a0af7673 100644
--- a/ingestion/src/metadata/pii/scanners/ner_scanner.py
+++ b/ingestion/src/metadata/pii/scanners/ner_scanner.py
@@ -21,7 +21,7 @@
from metadata.generated.schema.entity.classification.tag import Tag
from metadata.pii.constants import PII, SPACY_EN_MODEL
-from metadata.pii.models import TagAndConfidence, TagType
+from metadata.pii.models import TagAndConfidence
from metadata.pii.ner import NEREntity
from metadata.utils import fqn
from metadata.utils.logger import pii_logger
@@ -119,13 +119,15 @@ def scan(self, sample_data_rows: List[Any]) -> Optional[TagAndConfidence]:
if entities_score:
label, score = self.get_highest_score_label(entities_score)
- tag_type = NEREntity.__members__.get(label, TagType.NONSENSITIVE).value
+ tag_type = NEREntity.__members__.get(label)
+ if not tag_type:
+ return None
return TagAndConfidence(
tag_fqn=fqn.build(
metadata=None,
entity_type=Tag,
classification_name=PII,
- tag_name=tag_type,
+ tag_name=tag_type.value,
),
confidence=score,
)
diff --git a/ingestion/src/metadata/profiler/__init__.py b/ingestion/src/metadata/profiler/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/ingestion/src/metadata/profiler/adaptors/__init__.py b/ingestion/src/metadata/profiler/adaptors/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/ingestion/src/metadata/profiler/adaptors/adaptor_factory.py b/ingestion/src/metadata/profiler/adaptors/adaptor_factory.py
deleted file mode 100644
index fb06b9969bb2..000000000000
--- a/ingestion/src/metadata/profiler/adaptors/adaptor_factory.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright 2024 Collate
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-factory for NoSQL adaptors that are used in the NoSQLProfiler.
-"""
-
-from metadata.generated.schema.entity.services.connections.database.mongoDBConnection import (
- MongoDBConnection,
-)
-from metadata.profiler.adaptors.mongodb import MongoDB
-from metadata.profiler.factory import Factory
-from metadata.utils.logger import profiler_logger
-
-logger = profiler_logger()
-
-
-class NoSQLAdaptorFactory(Factory):
- def create(self, interface_type: str, *args, **kwargs) -> any:
- logger.debug(f"Creating NoSQL client for {interface_type}")
- client_class = self._interface_type.get(interface_type)
- if not client_class:
- raise ValueError(f"Unknown NoSQL source: {interface_type}")
- logger.debug(f"Using NoSQL client constructor: {client_class.__name__}")
- return client_class(*args, **kwargs)
-
-
-adaptors = profilers = {
- MongoDBConnection.__name__: MongoDB,
-}
-factory = NoSQLAdaptorFactory()
-factory.register_many(adaptors)
diff --git a/ingestion/src/metadata/profiler/adaptors/mongodb.py b/ingestion/src/metadata/profiler/adaptors/mongodb.py
deleted file mode 100644
index 22194535bb91..000000000000
--- a/ingestion/src/metadata/profiler/adaptors/mongodb.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Copyright 2024 Collate
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-MongoDB adaptor for the NoSQL profiler.
-"""
-import json
-from enum import Enum
-from typing import TYPE_CHECKING, Dict, List, Optional, Union
-
-from pydantic import BaseModel, Field
-from pymongo.command_cursor import CommandCursor
-from pymongo.cursor import Cursor
-
-from metadata.generated.schema.entity.data.table import Column, Table
-from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor
-from metadata.utils.sqa_like_column import SQALikeColumn
-
-if TYPE_CHECKING:
- from pymongo import MongoClient
-else:
- MongoClient = None # pylint: disable=invalid-name
-
-
-class AggregationFunction(Enum):
- SUM = "$sum"
- MEAN = "$avg"
- COUNT = "$count"
- MAX = "$max"
- MIN = "$min"
-
-
-class Executable(BaseModel):
- def to_executable(self, client: MongoClient) -> Union[CommandCursor, Cursor]:
- raise NotImplementedError
-
-
-class Query(Executable):
- database: str
- collection: str
- filter: dict = Field(default_factory=dict)
- limit: Optional[int] = None
-
- def to_executable(self, client: MongoClient) -> Cursor:
- db = client[self.database]
- collection = db[self.collection]
- query = collection.find(self.filter)
- if self.limit:
- query = query.limit(self.limit)
- return query
-
-
-class Aggregation(Executable):
- database: str
- collection: str
- column: str
- aggregations: List[AggregationFunction]
-
- def to_executable(self, client: MongoClient) -> CommandCursor:
- db = client[self.database]
- collection = db[self.collection]
- return collection.aggregate(
- [
- {
- "$group": {
- "_id": None,
- **{
- a.name.lower(): {a.value: f"${self.column}"}
- for a in self.aggregations
- },
- }
- }
- ]
- )
-
-
-class MongoDB(NoSQLAdaptor):
- """A MongoDB client that serves as an adaptor for profiling data assets on MongoDB"""
-
- def __init__(self, client: MongoClient):
- self.client = client
-
- def item_count(self, table: Table) -> int:
- db = self.client[table.databaseSchema.name]
- collection = db[table.name.__root__]
- return collection.count_documents({})
-
- def scan(
- self, table: Table, columns: List[Column], limit: int
- ) -> List[Dict[str, any]]:
- return self.execute(
- Query(
- database=table.databaseSchema.name,
- collection=table.name.__root__,
- limit=limit,
- )
- )
-
- def query(
- self, table: Table, columns: List[Column], query: any, limit: int
- ) -> List[Dict[str, any]]:
- try:
- json_query = json.loads(query)
- except json.JSONDecodeError:
- raise ValueError("Invalid JSON query")
- return self.execute(
- Query(
- database=table.databaseSchema.name,
- collection=table.name.__root__,
- filter=json_query,
- )
- )
-
- def get_aggregates(
- self,
- table: Table,
- column: SQALikeColumn,
- aggregate_functions: List[AggregationFunction],
- ) -> Dict[str, Union[int, float]]:
- """
- Get the aggregate functions for a column in a table
- Returns:
- Dict[str, Union[int, float]]: A dictionary of the aggregate functions
- Example:
- {
- "sum": 100,
- "avg": 50,
- "count": 2,
- "max": 75,
- "min": 25
- }
- """
- row = self.execute(
- Aggregation(
- database=table.databaseSchema.name,
- collection=table.name.__root__,
- column=column.name,
- aggregations=aggregate_functions,
- )
- )[0]
- return {k: v for k, v in row.items() if k != "_id"}
-
- def sum(self, table: Table, column: SQALikeColumn) -> AggregationFunction:
- return AggregationFunction.SUM
-
- def mean(self, table: Table, column: SQALikeColumn) -> AggregationFunction:
- return AggregationFunction.MEAN
-
- def max(self, table: Table, column: SQALikeColumn) -> AggregationFunction:
- return AggregationFunction.MAX
-
- def min(self, table: Table, column: SQALikeColumn) -> AggregationFunction:
- return AggregationFunction.MIN
-
- def execute(self, query: Executable) -> List[Dict[str, any]]:
- records = list(query.to_executable(self.client))
- result = []
- for r in records:
- result.append({c: self._json_safe(r.get(c)) for c in r})
- return result
-
- @staticmethod
- def _json_safe(data: any):
- try:
- json.dumps(data)
- return data
- except Exception: # noqa
- return str(data)
diff --git a/ingestion/src/metadata/profiler/adaptors/nosql_adaptor.py b/ingestion/src/metadata/profiler/adaptors/nosql_adaptor.py
deleted file mode 100644
index 4a78100c5686..000000000000
--- a/ingestion/src/metadata/profiler/adaptors/nosql_adaptor.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2024 Collate
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-NoSQL adaptor for the NoSQL profiler.
-"""
-from abc import ABC, abstractmethod
-from typing import Dict, List, Union
-
-from metadata.generated.schema.entity.data.table import Column, Table
-from metadata.utils.sqa_like_column import SQALikeColumn
-
-
-class NoSQLAdaptor(ABC):
- """
- NoSQL adaptor for the NoSQL profiler. This class implememts the required methods for retreiving data from a NoSQL
- database.
- """
-
- @abstractmethod
- def item_count(self, table: Table) -> int:
- raise NotImplementedError
-
- @abstractmethod
- def scan(
- self, table: Table, columns: List[Column], limit: int
- ) -> List[Dict[str, any]]:
- pass
-
- def query(
- self, table: Table, columns: List[Column], query: any, limit: int
- ) -> List[Dict[str, any]]:
- raise NotImplementedError
-
- def get_aggregates(
- self, table: Table, column: SQALikeColumn, aggregate_functions: List[any]
- ) -> Dict[str, Union[int, float]]:
- raise NotImplementedError
-
- def sum(
- self, table: Table, column: Column # pylint: disable=unused-argument
- ) -> any:
- return None
-
- def mean(
- self, table: Table, column: Column # pylint: disable=unused-argument
- ) -> any:
- return None
-
- def max(
- self, table: Table, column: Column # pylint: disable=unused-argument
- ) -> any:
- return None
-
- def min(
- self, table: Table, column: Column # pylint: disable=unused-argument
- ) -> any:
- return None
diff --git a/ingestion/src/metadata/profiler/api/models.py b/ingestion/src/metadata/profiler/api/models.py
index 499f9b4f149d..961d6fcd1280 100644
--- a/ingestion/src/metadata/profiler/api/models.py
+++ b/ingestion/src/metadata/profiler/api/models.py
@@ -122,7 +122,7 @@ def __str__(self):
class ThreadPoolMetrics(ConfigModel):
- """A container for all metrics to be computed on the same thread."""
+ """thread pool metric"""
metrics: Union[List[Union[Type[Metric], CustomMetric]], Type[Metric]]
metric_type: MetricTypes
diff --git a/ingestion/src/metadata/profiler/factory.py b/ingestion/src/metadata/profiler/factory.py
deleted file mode 100644
index fa89590401b7..000000000000
--- a/ingestion/src/metadata/profiler/factory.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright 2021 Collate
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Factory class for creating profiler interface objects
-"""
-from abc import ABC, abstractmethod
-
-
-class Factory(ABC):
- """Creational factory for interface objects"""
-
- def __init__(self):
- self._interface_type = {}
-
- def register(self, interface_type: str, interface_class):
- """Register a new interface"""
- self._interface_type[interface_type] = interface_class
-
- def register_many(self, interface_dict):
- """
- Registers multiple profiler interfaces at once.
-
- Args:
- interface_dict: A dictionary mapping connection class names (strings) to their
- corresponding profiler interface classes.
- """
- for interface_type, interface_class in interface_dict.items():
- self.register(interface_type, interface_class)
-
- @abstractmethod
- def create(self, interface_type: str, *args, **kwargs) -> any:
- pass
diff --git a/ingestion/src/metadata/profiler/interface/nosql/profiler_interface.py b/ingestion/src/metadata/profiler/interface/nosql/profiler_interface.py
deleted file mode 100644
index 852f88a70e29..000000000000
--- a/ingestion/src/metadata/profiler/interface/nosql/profiler_interface.py
+++ /dev/null
@@ -1,231 +0,0 @@
-# Copyright 2021 Collate
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=arguments-differ
-
-"""
-Interfaces with database for all database engine
-supporting sqlalchemy abstraction layer
-"""
-import traceback
-from collections import defaultdict
-from datetime import datetime, timezone
-from typing import Dict, List, Optional, Type
-
-from sqlalchemy import Column
-
-from metadata.generated.schema.entity.data.table import TableData
-from metadata.generated.schema.tests.customMetric import CustomMetric
-from metadata.profiler.adaptors.adaptor_factory import factory
-from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor
-from metadata.profiler.api.models import ThreadPoolMetrics
-from metadata.profiler.interface.profiler_interface import ProfilerInterface
-from metadata.profiler.metrics.core import Metric, MetricTypes
-from metadata.profiler.metrics.registry import Metrics
-from metadata.profiler.processor.sampler.nosql.sampler import NoSQLSampler
-from metadata.utils.logger import profiler_interface_registry_logger
-from metadata.utils.sqa_like_column import SQALikeColumn
-
-logger = profiler_interface_registry_logger()
-
-
-class NoSQLProfilerInterface(ProfilerInterface):
- """
- Interface to interact with registry supporting
- sqlalchemy.
- """
-
- # pylint: disable=too-many-arguments
-
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- self.sampler = self._get_sampler()
-
- def _compute_table_metrics(
- self,
- metrics: List[Type[Metric]],
- runner: NoSQLAdaptor,
- *args,
- **kwargs,
- ):
- result = {}
- for metric in metrics:
- try:
- fn = metric().nosql_fn(runner)
- result[metric.name()] = fn(self.table)
- except Exception as exc:
- logger.debug(
- f"{traceback.format_exc()}\n"
- f"Error trying to compute metric {metric} for {self.table.fullyQualifiedName}: {exc}"
- )
- raise RuntimeError(
- f"Error trying to compute metric {metric.name()} for {self.table.fullyQualifiedName}: {exc}"
- )
- return result
-
- def _compute_static_metrics(
- self,
- metrics: List[Metrics],
- runner: NoSQLAdaptor,
- column: SQALikeColumn,
- *args,
- **kwargs,
- ) -> Dict[str, any]:
- try:
- aggs = [metric(column).nosql_fn(runner)(self.table) for metric in metrics]
- filtered = [agg for agg in aggs if agg is not None]
- if not filtered:
- return {}
- row = runner.get_aggregates(self.table, column, filtered)
- return dict(row)
- except Exception as exc:
- logger.debug(
- f"{traceback.format_exc()}\n"
- f"Error trying to compute metrics for {self.table.fullyQualifiedName}: {exc}"
- )
- raise RuntimeError(
- f"Error trying to compute metris for {self.table.fullyQualifiedName}: {exc}"
- )
-
- def _compute_query_metrics(
- self,
- metric: Metrics,
- runner,
- *args,
- **kwargs,
- ):
- return None
-
- def _compute_window_metrics(
- self,
- metrics: List[Metrics],
- runner,
- *args,
- **kwargs,
- ):
- return None
-
- def _compute_system_metrics(
- self,
- metrics: Metrics,
- runner: List,
- *args,
- **kwargs,
- ):
- return None
-
- def _compute_custom_metrics(
- self, metrics: List[CustomMetric], runner, *args, **kwargs
- ):
- return None
-
- def compute_metrics(
- self,
- client: NoSQLAdaptor,
- metric_func: ThreadPoolMetrics,
- ):
- """Run metrics in processor worker"""
- logger.debug(f"Running profiler for {metric_func.table}")
- try:
- row = self._get_metric_fn[metric_func.metric_type.value](
- metric_func.metrics,
- client,
- column=metric_func.column,
- )
- except Exception as exc:
- name = f"{metric_func.column if metric_func.column is not None else metric_func.table}"
- error = f"{name} metric_type.value: {exc}"
- logger.error(error)
- self.status.failed_profiler(error, traceback.format_exc())
- row = None
- if metric_func.column is not None:
- column = metric_func.column.name
- self.status.scanned(f"{metric_func.table.name.__root__}.{column}")
- else:
- self.status.scanned(metric_func.table.name.__root__)
- column = None
- return row, column, metric_func.metric_type.value
-
- def fetch_sample_data(self, table, columns: List[SQALikeColumn]) -> TableData:
- return self.sampler.fetch_sample_data(columns)
-
- def _get_sampler(self) -> NoSQLSampler:
- """Get NoSQL sampler from config"""
- from metadata.profiler.processor.sampler.sampler_factory import ( # pylint: disable=import-outside-toplevel
- sampler_factory_,
- )
-
- return sampler_factory_.create(
- self.service_connection_config.__class__.__name__,
- table=self.table,
- client=factory.create(
- self.service_connection_config.__class__.__name__, self.connection
- ),
- profile_sample_config=self.profile_sample_config,
- partition_details=self.partition_details,
- profile_sample_query=self.profile_query,
- )
-
- def get_composed_metrics(
- self, column: Column, metric: Metrics, column_results: Dict
- ):
- return None
-
- def get_hybrid_metrics(
- self, column: Column, metric: Metrics, column_results: Dict, **kwargs
- ):
- return None
-
- def get_all_metrics(
- self,
- metric_funcs: List[ThreadPoolMetrics],
- ):
- """get all profiler metrics"""
- profile_results = {"table": {}, "columns": defaultdict(dict)}
- runner = factory.create(
- self.service_connection_config.__class__.__name__, self.connection
- )
- metric_list = [
- self.compute_metrics(runner, metric_func) for metric_func in metric_funcs
- ]
- for metric_result in metric_list:
- profile, column, metric_type = metric_result
- if profile:
- if metric_type == MetricTypes.Table.value:
- profile_results["table"].update(profile)
- if metric_type == MetricTypes.System.value:
- profile_results["system"] = profile
- elif metric_type == MetricTypes.Custom.value and column is None:
- profile_results["table"].update(profile)
- else:
- profile_results["columns"][column].update(
- {
- "name": column,
- "timestamp": int(
- datetime.now(tz=timezone.utc).timestamp() * 1000
- ),
- **profile,
- }
- )
- return profile_results
-
- @property
- def table(self):
- """OM Table entity"""
- return self.table_entity
-
- def get_columns(self) -> List[Optional[SQALikeColumn]]:
- return [
- SQALikeColumn(name=c.name.__root__, type=c.dataType)
- for c in self.table.columns
- ]
-
- def close(self):
- self.connection.close()
diff --git a/ingestion/src/metadata/profiler/interface/profiler_interface.py b/ingestion/src/metadata/profiler/interface/profiler_interface.py
index 8dc5f330aba3..e1881d1806ec 100644
--- a/ingestion/src/metadata/profiler/interface/profiler_interface.py
+++ b/ingestion/src/metadata/profiler/interface/profiler_interface.py
@@ -33,7 +33,7 @@
TableData,
)
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
- SampleDataStorageConfig,
+ DataStorageConfig,
)
from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
DatalakeConnection,
@@ -93,7 +93,7 @@ def __init__(
service_connection_config: Union[DatabaseConnection, DatalakeConnection],
ometa_client: OpenMetadata,
entity: Table,
- storage_config: SampleDataStorageConfig,
+ storage_config: DataStorageConfig,
profile_sample_config: Optional[ProfileSampleConfig],
source_config: DatabaseServiceProfilerPipeline,
sample_query: Optional[str],
@@ -248,7 +248,7 @@ def _get_sample_storage_config(
DatabaseProfilerConfig,
DatabaseAndSchemaConfig,
]
- ):
+ ) -> Optional[DataStorageConfig]:
if (
config
and config.sampleDataStorageConfig
@@ -264,7 +264,7 @@ def get_storage_config_for_table(
database_profiler_config: Optional[DatabaseProfilerConfig],
db_service: Optional[DatabaseService],
profiler_config: ProfilerProcessorConfig,
- ) -> Optional[SampleDataStorageConfig]:
+ ) -> Optional[DataStorageConfig]:
"""Get config for a specific entity
Args:
@@ -425,12 +425,8 @@ def _compute_static_metrics(
runner,
*args,
**kwargs,
- ) -> Dict[str, Any]:
- """Get metrics
- Return:
- Dict[str, Any]: dict of metrics tio be merged into the final column profile. Keys need to be compatible with
- the `metadata.generated.schema.entity.data.table.ColumnProfile` schema.
- """
+ ):
+ """Get metrics"""
raise NotImplementedError
@abstractmethod
diff --git a/ingestion/src/metadata/profiler/interface/profiler_interface_factory.py b/ingestion/src/metadata/profiler/interface/profiler_interface_factory.py
index 3a03a921a9d2..2733d0a8c4cb 100644
--- a/ingestion/src/metadata/profiler/interface/profiler_interface_factory.py
+++ b/ingestion/src/metadata/profiler/interface/profiler_interface_factory.py
@@ -30,9 +30,6 @@
from metadata.generated.schema.entity.services.connections.database.mariaDBConnection import (
MariaDBConnection,
)
-from metadata.generated.schema.entity.services.connections.database.mongoDBConnection import (
- MongoDBConnection,
-)
from metadata.generated.schema.entity.services.connections.database.singleStoreConnection import (
SingleStoreConnection,
)
@@ -46,8 +43,6 @@
UnityCatalogConnection,
)
from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
-from metadata.profiler.factory import Factory
-from metadata.profiler.interface.nosql.profiler_interface import NoSQLProfilerInterface
from metadata.profiler.interface.pandas.profiler_interface import (
PandasProfilerInterface,
)
@@ -81,7 +76,27 @@
)
-class ProfilerInterfaceFactory(Factory):
+class ProfilerInterfaceFactory:
+ """Creational factory for profiler interface objects"""
+
+ def __init__(self):
+ self._interface_type = {}
+
+ def register(self, interface_type: str, interface_class):
+ """Register a new interface"""
+ self._interface_type[interface_type] = interface_class
+
+ def register_many(self, interface_dict):
+ """
+ Registers multiple profiler interfaces at once.
+
+ Args:
+ interface_dict: A dictionary mapping connection class names (strings) to their
+ corresponding profiler interface classes.
+ """
+ for interface_type, interface_class in interface_dict.items():
+ self.register(interface_type, interface_class)
+
def create(self, interface_type: str, *args, **kwargs):
"""Create interface object based on interface type"""
interface_class = self._interface_type.get(interface_type)
@@ -103,6 +118,6 @@ def create(self, interface_type: str, *args, **kwargs):
UnityCatalogConnection.__name__: UnityCatalogProfilerInterface,
DatabricksConnection.__name__: DatabricksProfilerInterface,
Db2Connection.__name__: DB2ProfilerInterface,
- MongoDBConnection.__name__: NoSQLProfilerInterface,
}
+
profiler_interface_factory.register_many(profilers)
diff --git a/ingestion/src/metadata/profiler/interface/sqlalchemy/profiler_interface.py b/ingestion/src/metadata/profiler/interface/sqlalchemy/profiler_interface.py
index 0a4441595dfd..d99adbf53896 100644
--- a/ingestion/src/metadata/profiler/interface/sqlalchemy/profiler_interface.py
+++ b/ingestion/src/metadata/profiler/interface/sqlalchemy/profiler_interface.py
@@ -189,6 +189,7 @@ def _compute_table_metrics(
runner=runner,
metrics=metrics,
conn_config=self.service_connection_config,
+ entity=self.table_entity,
)
row = table_metric_computer.compute()
if row:
diff --git a/ingestion/src/metadata/profiler/metrics/core.py b/ingestion/src/metadata/profiler/metrics/core.py
index 70e387a7daeb..9cc219777a53 100644
--- a/ingestion/src/metadata/profiler/metrics/core.py
+++ b/ingestion/src/metadata/profiler/metrics/core.py
@@ -18,14 +18,11 @@
from abc import ABC, abstractmethod
from enum import Enum
from functools import wraps
-from typing import Any, Callable, Dict, Optional, Tuple, TypeVar
+from typing import Any, Dict, Optional, Tuple, TypeVar
from sqlalchemy import Column
from sqlalchemy.orm import DeclarativeMeta, Session
-from metadata.generated.schema.entity.data.table import Table
-from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor
-
# When creating complex metrics, use inherit_cache = CACHE
CACHE = True
@@ -90,9 +87,6 @@ def _new_init(self, *args, **kw):
return inner
-T = TypeVar("T")
-
-
class Metric(ABC):
"""
Parent class metric
@@ -159,13 +153,6 @@ def metric_type(self):
"""
return self.col.type.python_type if self.col else None
- def nosql_fn(self, client: NoSQLAdaptor) -> Callable[[Table], Optional[T]]:
- """
- Return the function to be used for NoSQL clients to calculate the metric.
- By default, returns a "do nothing" function that returns None.
- """
- return lambda table: None
-
TMetric = TypeVar("TMetric", bound=Metric)
diff --git a/ingestion/src/metadata/profiler/metrics/static/max.py b/ingestion/src/metadata/profiler/metrics/static/max.py
index 65f9ec7181c7..4eb7b933f4c9 100644
--- a/ingestion/src/metadata/profiler/metrics/static/max.py
+++ b/ingestion/src/metadata/profiler/metrics/static/max.py
@@ -12,16 +12,14 @@
"""
Max Metric definition
"""
-from functools import partial
-from typing import Callable, Optional
+# pylint: disable=duplicate-code
+
from sqlalchemy import TIME, column
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import GenericFunction
-from metadata.generated.schema.entity.data.table import Table
-from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor
-from metadata.profiler.metrics.core import CACHE, StaticMetric, T, _label
+from metadata.profiler.metrics.core import CACHE, StaticMetric, _label
from metadata.profiler.orm.functions.length import LenFn
from metadata.profiler.orm.registry import (
FLOAT_SET,
@@ -31,8 +29,6 @@
is_quantifiable,
)
-# pylint: disable=duplicate-code
-
class MaxFn(GenericFunction):
name = __qualname__
@@ -100,9 +96,3 @@ def df_fn(self, dfs=None):
max_ = max((df[self.col.name].max() for df in dfs))
return int(max_.timestamp() * 1000)
return 0
-
- def nosql_fn(self, adaptor: NoSQLAdaptor) -> Callable[[Table], Optional[T]]:
- """nosql function"""
- if is_quantifiable(self.col.type):
- return partial(adaptor.max, column=self.col)
- return lambda table: None
diff --git a/ingestion/src/metadata/profiler/metrics/static/mean.py b/ingestion/src/metadata/profiler/metrics/static/mean.py
index fe53306643d7..aaa5d78783eb 100644
--- a/ingestion/src/metadata/profiler/metrics/static/mean.py
+++ b/ingestion/src/metadata/profiler/metrics/static/mean.py
@@ -12,16 +12,16 @@
"""
AVG Metric definition
"""
-from functools import partial
-from typing import Callable, List, Optional, cast
+# pylint: disable=duplicate-code
+
+
+from typing import List, cast
from sqlalchemy import column, func
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import GenericFunction
-from metadata.generated.schema.entity.data.table import Table
-from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor
-from metadata.profiler.metrics.core import CACHE, StaticMetric, T, _label
+from metadata.profiler.metrics.core import CACHE, StaticMetric, _label
from metadata.profiler.orm.functions.length import LenFn
from metadata.profiler.orm.registry import (
FLOAT_SET,
@@ -32,9 +32,6 @@
)
from metadata.utils.logger import profiler_logger
-# pylint: disable=duplicate-code
-
-
logger = profiler_logger()
@@ -145,9 +142,3 @@ def df_fn(self, dfs=None):
f"Don't know how to process type {self.col.type} when computing MEAN"
)
return None
-
- def nosql_fn(self, adaptor: NoSQLAdaptor) -> Callable[[Table], Optional[T]]:
- """nosql function"""
- if is_quantifiable(self.col.type):
- return partial(adaptor.mean, column=self.col)
- return lambda table: None
diff --git a/ingestion/src/metadata/profiler/metrics/static/min.py b/ingestion/src/metadata/profiler/metrics/static/min.py
index 5731348708c7..d6e212a34055 100644
--- a/ingestion/src/metadata/profiler/metrics/static/min.py
+++ b/ingestion/src/metadata/profiler/metrics/static/min.py
@@ -12,16 +12,13 @@
"""
Min Metric definition
"""
-from functools import partial
-from typing import Callable, Optional
+# pylint: disable=duplicate-code
from sqlalchemy import TIME, column
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.functions import GenericFunction
-from metadata.generated.schema.entity.data.table import Table
-from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor
-from metadata.profiler.metrics.core import CACHE, StaticMetric, T, _label
+from metadata.profiler.metrics.core import CACHE, StaticMetric, _label
from metadata.profiler.orm.functions.length import LenFn
from metadata.profiler.orm.registry import (
FLOAT_SET,
@@ -31,8 +28,6 @@
is_quantifiable,
)
-# pylint: disable=duplicate-code
-
class MinFn(GenericFunction):
name = __qualname__
@@ -101,9 +96,3 @@ def df_fn(self, dfs=None):
min_ = min((df[self.col.name].min() for df in dfs))
return int(min_.timestamp() * 1000)
return 0
-
- def nosql_fn(self, adaptor: NoSQLAdaptor) -> Callable[[Table], Optional[T]]:
- """nosql function"""
- if is_quantifiable(self.col.type):
- return partial(adaptor.min, column=self.col)
- return lambda table: None
diff --git a/ingestion/src/metadata/profiler/metrics/static/row_count.py b/ingestion/src/metadata/profiler/metrics/static/row_count.py
index c3f70f9d152e..6891ab43b021 100644
--- a/ingestion/src/metadata/profiler/metrics/static/row_count.py
+++ b/ingestion/src/metadata/profiler/metrics/static/row_count.py
@@ -12,12 +12,11 @@
"""
Table Count Metric definition
"""
-from typing import Callable
+# pylint: disable=duplicate-code
+
from sqlalchemy import func
-from metadata.generated.schema.entity.data.table import Table
-from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor
from metadata.profiler.metrics.core import StaticMetric, _label
@@ -51,7 +50,3 @@ def fn(self):
def df_fn(self, dfs=None):
"""pandas function"""
return sum(len(df.index) for df in dfs)
-
- @classmethod
- def nosql_fn(cls, client: NoSQLAdaptor) -> Callable[[Table], int]:
- return client.item_count
diff --git a/ingestion/src/metadata/profiler/metrics/static/sum.py b/ingestion/src/metadata/profiler/metrics/static/sum.py
index b118ca1458db..dec3bbbb4b9a 100644
--- a/ingestion/src/metadata/profiler/metrics/static/sum.py
+++ b/ingestion/src/metadata/profiler/metrics/static/sum.py
@@ -12,20 +12,15 @@
"""
SUM Metric definition
"""
-from functools import partial
-from typing import Callable, Optional
+# pylint: disable=duplicate-code
from sqlalchemy import column
-from metadata.generated.schema.entity.data.table import Table
-from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor
-from metadata.profiler.metrics.core import StaticMetric, T, _label
+from metadata.profiler.metrics.core import StaticMetric, _label
from metadata.profiler.orm.functions.length import LenFn
from metadata.profiler.orm.functions.sum import SumFn
from metadata.profiler.orm.registry import is_concatenable, is_quantifiable
-# pylint: disable=duplicate-code
-
class Sum(StaticMetric):
"""
@@ -57,9 +52,3 @@ def df_fn(self, dfs=None):
if is_quantifiable(self.col.type):
return sum(df[self.col.name].sum() for df in dfs)
return None
-
- def nosql_fn(self, adaptor: NoSQLAdaptor) -> Callable[[Table], Optional[T]]:
- """nosql function"""
- if is_quantifiable(self.col.type):
- return partial(adaptor.sum, column=self.col)
- return lambda table: None
diff --git a/ingestion/src/metadata/profiler/orm/functions/sum.py b/ingestion/src/metadata/profiler/orm/functions/sum.py
index 545f3bbcbef6..0e8623947a24 100644
--- a/ingestion/src/metadata/profiler/orm/functions/sum.py
+++ b/ingestion/src/metadata/profiler/orm/functions/sum.py
@@ -40,6 +40,7 @@ def _(element, compiler, **kw):
@compiles(SumFn, Dialects.BigQuery)
+@compiles(SumFn, Dialects.Postgres)
def _(element, compiler, **kw):
"""Handle case where column type is INTEGER but SUM returns a NUMBER"""
proc = compiler.process(element.clauses, **kw)
diff --git a/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py b/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py
index 8d88b0d85d47..f2c8ab0bde3f 100644
--- a/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py
+++ b/ingestion/src/metadata/profiler/orm/functions/table_metric_computer.py
@@ -22,6 +22,8 @@
from sqlalchemy.sql.expression import ColumnOperators, and_, cte
from sqlalchemy.types import String
+from metadata.generated.schema.entity.data.table import Table as OMTable
+from metadata.generated.schema.entity.data.table import TableType
from metadata.profiler.metrics.registry import Metrics
from metadata.profiler.orm.registry import Dialects
from metadata.profiler.processor.runner import QueryRunner
@@ -31,7 +33,7 @@
COLUMN_COUNT = "columnCount"
COLUMN_NAMES = "columnNames"
-ROW_COUNT = "rowCount"
+ROW_COUNT = Metrics.ROW_COUNT().name()
SIZE_IN_BYTES = "sizeInBytes"
CREATE_DATETIME = "createDateTime"
@@ -43,13 +45,16 @@
class AbstractTableMetricComputer(ABC):
"""Base table computer"""
- def __init__(self, runner: QueryRunner, metrics: List[Metrics], conn_config):
+ def __init__(
+ self, runner: QueryRunner, metrics: List[Metrics], conn_config, entity: OMTable
+ ):
"""Instantiate base table computer"""
self._runner = runner
self._metrics = metrics
self._conn_config = conn_config
self._database = self._runner._session.get_bind().url.database
self._table = self._runner.table
+ self._entity = entity
@property
def database(self):
@@ -141,16 +146,6 @@ def compute(self):
class BaseTableMetricComputer(AbstractTableMetricComputer):
"""Base table computer"""
- def _check_and_return(self, res):
- """Check if the result is None and return the result or fallback
-
- Args:
- res (object): result
- """
- if res.rowCount is None:
- return super().compute()
- return res
-
def compute(self):
"""Default compute behavior for table metrics"""
return self.runner.select_first_from_table(
@@ -236,7 +231,9 @@ def compute(self):
)
res = self.runner._session.execute(query).first()
- if res.rowCount is None:
+ if res.rowCount is None or (
+ res.rowCount == 0 and self._entity.tableType == TableType.View
+ ):
# if we don't have any row count, fallback to the base logic
return super().compute()
return res
@@ -263,7 +260,9 @@ def compute(self):
)
res = self.runner._session.execute(query).first()
- if res.rowCount is None:
+ if res.rowCount is None or (
+ res.rowCount == 0 and self._entity.tableType == TableType.View
+ ):
# if we don't have any row count, fallback to the base logic
return super().compute()
return res
@@ -307,7 +306,9 @@ def table_storage(self):
)
res = self.runner._session.execute(query).first()
- if res.rowCount is None:
+ if res.rowCount is None or (
+ res.rowCount == 0 and self._entity.tableType == TableType.View
+ ):
# if we don't have any row count, fallback to the base logic
return super().compute()
return res
@@ -336,7 +337,9 @@ def tables(self):
where_clause,
)
res = self.runner._session.execute(query).first()
- if res.rowCount is None:
+ if res.rowCount is None or (
+ res.rowCount == 0 and self._entity.tableType == TableType.View
+ ):
# if we don't have any row count, fallback to the base logic
return super().compute()
return res
@@ -363,9 +366,16 @@ def compute(self):
)
res = self.runner._session.execute(query).first()
- if res.rowCount is None:
+ if res.rowCount is None or (
+ res.rowCount == 0 and self._entity.tableType == TableType.View
+ ):
# if we don't have any row count, fallback to the base logic
return super().compute()
+ res = res._asdict()
+ # innodb row count is an estimate we need to patch the row count with COUNT(*)
+ # https://dev.mysql.com/doc/refman/8.3/en/information-schema-innodb-tablestats-table.html
+ row_count = self.runner.select_first_from_table(Metrics.ROW_COUNT().fn())
+ res.update({ROW_COUNT: row_count.rowCount})
return res
@@ -390,7 +400,9 @@ def compute(self):
columns, self._build_table("svv_table_info", "pg_catalog"), where_clause
)
res = self.runner._session.execute(query).first()
- if res.rowCount is None:
+ if res.rowCount is None or (
+ res.rowCount == 0 and self._entity.tableType == TableType.View
+ ):
# if we don't have any row count, fallback to the base logic
return super().compute()
return res
@@ -400,9 +412,15 @@ class TableMetricComputer:
"""Table Metric Construct"""
def __init__(
- self, dialect: str, runner: QueryRunner, metrics: List[Metrics], conn_config
+ self,
+ dialect: str,
+ runner: QueryRunner,
+ metrics: List[Metrics],
+ conn_config,
+ entity: OMTable,
):
"""Instantiate table metric computer with a dialect computer"""
+ self._entity = entity
self._dialect = dialect
self._runner = runner
self._metrics = metrics
@@ -413,6 +431,7 @@ def __init__(
runner=self._runner,
metrics=self._metrics,
conn_config=self._conn_config,
+ entity=self._entity,
)
)
diff --git a/ingestion/src/metadata/profiler/processor/core.py b/ingestion/src/metadata/profiler/processor/core.py
index 5081b43304d7..ff0fd7cc0559 100644
--- a/ingestion/src/metadata/profiler/processor/core.py
+++ b/ingestion/src/metadata/profiler/processor/core.py
@@ -196,10 +196,7 @@ def _check_profile_and_handle(
CreateTableProfileRequest:
"""
for attrs, val in profile.tableProfile:
- if (
- attrs not in {"timestamp", "profileSample", "profileSampleType"}
- and val is not None
- ):
+ if attrs not in {"timestamp", "profileSample", "profileSampleType"} and val:
return
for col_element in profile.columnProfile:
diff --git a/ingestion/src/metadata/profiler/processor/sample_data_handler.py b/ingestion/src/metadata/profiler/processor/sample_data_handler.py
index 33bd65aca60f..f029d2836c98 100644
--- a/ingestion/src/metadata/profiler/processor/sample_data_handler.py
+++ b/ingestion/src/metadata/profiler/processor/sample_data_handler.py
@@ -17,8 +17,13 @@
from functools import singledispatch
from io import BytesIO
+from pydantic.json import ENCODERS_BY_TYPE
+
from metadata.clients.aws_client import AWSClient
from metadata.generated.schema.entity.data.table import Table, TableData
+from metadata.generated.schema.entity.services.connections.connectionBasicType import (
+ DataStorageConfig,
+)
from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials
from metadata.profiler.interface.profiler_interface import ProfilerInterface
from metadata.utils.helpers import clean_uri
@@ -27,15 +32,45 @@
logger = profiler_logger()
-def _get_object_key(table: Table, prefix: str, overwrite_data: bool) -> str:
+class PathPatternException(Exception):
+ """
+ Exception class need to validate the file path pattern
+ """
+
+
+def validate_path_pattern(file_path_format: str) -> None:
+ if not (
+ "{service_name}" in file_path_format
+ and "{database_name}" in file_path_format
+ and "{database_schema_name}" in file_path_format
+ and "{table_name}" in file_path_format
+ and file_path_format.endswith(".parquet")
+ ):
+ raise PathPatternException(
+ "Please provide a valid path pattern, "
+ "the pattern should include these components {service_name}, "
+ "{database_name}, {database_schema_name}, {table_name} and "
+ "it should end with extension .parquet"
+ )
+
+
+def _get_object_key(
+ table: Table, prefix: str, overwrite_data: bool, file_path_format: str
+) -> str:
+ validate_path_pattern(file_path_format)
+ file_name = file_path_format.format(
+ service_name=table.service.name,
+ database_name=table.database.name,
+ database_schema_name=table.databaseSchema.name,
+ table_name=table.name.__root__,
+ )
if not overwrite_data:
- file_name = f"sample_data_{datetime.now().strftime('%Y_%m_%d')}.parquet"
- else:
- file_name = "sample_data.parquet"
- path = str(table.fullyQualifiedName.__root__).replace(".", "/")
+ file_name = file_name.replace(
+ ".parquet", f"_{datetime.now().strftime('%Y_%m_%d')}.parquet"
+ )
if prefix:
- return f"{clean_uri(prefix)}/{path}/{file_name}"
- return f"{path}/{file_name}"
+ return f"{clean_uri(prefix)}/{file_name}"
+ return file_name
def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -> None:
@@ -45,9 +80,10 @@ def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -
import pandas as pd # pylint: disable=import-outside-toplevel
try:
- sample_storage_config = profiler_interface.storage_config
+ sample_storage_config: DataStorageConfig = profiler_interface.storage_config
if not sample_storage_config:
return
+ ENCODERS_BY_TYPE[bytes] = lambda v: v.decode("utf-8", "ignore")
deserialized_data = json.loads(data.json())
df = pd.DataFrame(
data=deserialized_data.get("rows", []),
@@ -59,6 +95,7 @@ def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -
table=profiler_interface.table_entity,
prefix=sample_storage_config.prefix,
overwrite_data=sample_storage_config.overwriteData,
+ file_path_format=sample_storage_config.filePathPattern,
)
upload_to_storage(
sample_storage_config.storageConfig,
diff --git a/ingestion/src/metadata/profiler/processor/sampler/nosql/sampler.py b/ingestion/src/metadata/profiler/processor/sampler/nosql/sampler.py
deleted file mode 100644
index 333d5ae712a8..000000000000
--- a/ingestion/src/metadata/profiler/processor/sampler/nosql/sampler.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from typing import Dict, List, Optional, Tuple
-
-from metadata.generated.schema.entity.data.table import ProfileSampleType, TableData
-from metadata.profiler.adaptors.nosql_adaptor import NoSQLAdaptor
-from metadata.profiler.processor.sampler.sampler_interface import SamplerInterface
-from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
-from metadata.utils.sqa_like_column import SQALikeColumn
-
-
-class NoSQLSampler(SamplerInterface):
- client: NoSQLAdaptor
-
- def _rdn_sample_from_user_query(self) -> List[Dict[str, any]]:
- """
- Get random sample from user query
- """
- limit = self._get_limit()
- return self.client.query(
- self.table, self.table.columns, self._profile_sample_query, limit
- )
-
- def _fetch_sample_data_from_user_query(self) -> TableData:
- """
- Fetch sample data based on a user query. Assuming the enging has one (example: MongoDB)
- If the engine does not support a custom query, an error will be raised.
- """
- records = self._rdn_sample_from_user_query()
- columns = [
- SQALikeColumn(name=column.name.__root__, type=column.dataType)
- for column in self.table.columns
- ]
- rows, cols = self.transpose_records(records, columns)
- return TableData(rows=rows, columns=[c.name for c in cols])
-
- def random_sample(self):
- pass
-
- def fetch_sample_data(self, columns: List[SQALikeColumn]) -> TableData:
- if self._profile_sample_query:
- return self._fetch_sample_data_from_user_query()
- return self._fetch_sample_data(columns)
-
- def _fetch_sample_data(self, columns: List[SQALikeColumn]):
- """
- returns sampled ometa dataframes
- """
- limit = self._get_limit()
- records = self.client.scan(self.table, self.table.columns, limit)
- rows, cols = self.transpose_records(records, columns)
- return TableData(rows=rows, columns=[col.name for col in cols])
-
- def _get_limit(self) -> Optional[int]:
- num_rows = self.client.item_count(self.table)
- if self.profile_sample_type == ProfileSampleType.PERCENTAGE:
- limit = num_rows * (self.profile_sample / 100)
- elif self.profile_sample_type == ProfileSampleType.ROWS:
- limit = self.profile_sample
- else:
- limit = SAMPLE_DATA_DEFAULT_COUNT
- return limit
-
- @staticmethod
- def transpose_records(
- records: List[Dict[str, any]], columns: List[SQALikeColumn]
- ) -> Tuple[List[List[any]], List[SQALikeColumn]]:
- rows = []
- for record in records:
- row = []
- for column in columns:
- row.append(record.get(column.name))
- rows.append(row)
- return rows, columns
diff --git a/ingestion/src/metadata/profiler/processor/sampler/sampler_factory.py b/ingestion/src/metadata/profiler/processor/sampler/sampler_factory.py
index 88584f4eb24e..e7c0f25e7e5a 100644
--- a/ingestion/src/metadata/profiler/processor/sampler/sampler_factory.py
+++ b/ingestion/src/metadata/profiler/processor/sampler/sampler_factory.py
@@ -21,14 +21,10 @@
from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
DatalakeConnection,
)
-from metadata.generated.schema.entity.services.connections.database.mongoDBConnection import (
- MongoDBConnection,
-)
from metadata.generated.schema.entity.services.connections.database.trinoConnection import (
TrinoConnection,
)
from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
-from metadata.profiler.processor.sampler.nosql.sampler import NoSQLSampler
from metadata.profiler.processor.sampler.pandas.sampler import DatalakeSampler
from metadata.profiler.processor.sampler.sqlalchemy.bigquery.sampler import (
BigQuerySampler,
@@ -63,4 +59,3 @@ def create(
sampler_factory_.register(BigQueryConnection.__name__, BigQuerySampler)
sampler_factory_.register(DatalakeConnection.__name__, DatalakeSampler)
sampler_factory_.register(TrinoConnection.__name__, TrinoSampler)
-sampler_factory_.register(MongoDBConnection.__name__, NoSQLSampler)
diff --git a/ingestion/src/metadata/profiler/processor/sampler/sampler_interface.py b/ingestion/src/metadata/profiler/processor/sampler/sampler_interface.py
index daba85fcebcc..8711affa2c49 100644
--- a/ingestion/src/metadata/profiler/processor/sampler/sampler_interface.py
+++ b/ingestion/src/metadata/profiler/processor/sampler/sampler_interface.py
@@ -17,7 +17,7 @@
from sqlalchemy import Column
-from metadata.generated.schema.entity.data.table import Table, TableData
+from metadata.generated.schema.entity.data.table import TableData
from metadata.profiler.api.models import ProfileSampleConfig
from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
from metadata.utils.sqa_like_column import SQALikeColumn
@@ -29,7 +29,7 @@ class SamplerInterface(ABC):
def __init__(
self,
client,
- table: Table,
+ table,
profile_sample_config: Optional[ProfileSampleConfig] = None,
partition_details: Optional[Dict] = None,
profile_sample_query: Optional[str] = None,
diff --git a/ingestion/src/metadata/profiler/source/databricks/profiler_source.py b/ingestion/src/metadata/profiler/source/databricks/profiler_source.py
new file mode 100644
index 000000000000..009bd1d6d774
--- /dev/null
+++ b/ingestion/src/metadata/profiler/source/databricks/profiler_source.py
@@ -0,0 +1,36 @@
+"""Extend the ProfilerSource class to add support for Databricks is_disconnect SQA method"""
+
+from metadata.generated.schema.entity.services.databaseService import DatabaseService
+from metadata.generated.schema.metadataIngestion.workflow import (
+ OpenMetadataWorkflowConfig,
+)
+from metadata.ingestion.ometa.ometa_api import OpenMetadata
+from metadata.profiler.source.base.profiler_source import ProfilerSource
+
+
+def is_disconnect(self, e, connection, cursor):
+ """is_disconnect method for the Databricks dialect"""
+ if "Invalid SessionHandle: SessionHandle" in str(e):
+ return True
+ return False
+
+
+class DataBricksProfilerSource(ProfilerSource):
+ """Databricks Profiler source"""
+
+ def __init__(
+ self,
+ config: OpenMetadataWorkflowConfig,
+ database: DatabaseService,
+ ometa_client: OpenMetadata,
+ ):
+ super().__init__(config, database, ometa_client)
+ self.set_is_disconnect()
+
+ def set_is_disconnect(self):
+ """Set the is_disconnect method for the Databricks dialect"""
+ from databricks.sqlalchemy import (
+ DatabricksDialect, # pylint: disable=import-outside-toplevel
+ )
+
+ DatabricksDialect.is_disconnect = is_disconnect
diff --git a/ingestion/src/metadata/profiler/source/metadata.py b/ingestion/src/metadata/profiler/source/metadata.py
index d14ce2ed4fd2..b266c066ddb6 100644
--- a/ingestion/src/metadata/profiler/source/metadata.py
+++ b/ingestion/src/metadata/profiler/source/metadata.py
@@ -43,6 +43,10 @@
logger = profiler_logger()
+TABLE_FIELDS = ["tableProfilerConfig", "columns", "customMetrics"]
+TAGS_FIELD = ["tags"]
+
+
class ProfilerSourceAndEntity(BaseModel):
"""Return class for the OpenMetadata Profiler Source"""
@@ -273,7 +277,9 @@ def get_table_entities(self, database):
"""
tables = self.metadata.list_all_entities(
entity=Table,
- fields=["tableProfilerConfig", "columns", "customMetrics"],
+ fields=TABLE_FIELDS
+ if not self.source_config.processPiiSensitive
+ else TABLE_FIELDS + TAGS_FIELD,
params={
"service": self.config.source.serviceName,
"database": fqn.build(
diff --git a/ingestion/src/metadata/profiler/source/profiler_source_factory.py b/ingestion/src/metadata/profiler/source/profiler_source_factory.py
index 6fd2973c0bd3..0e616354e8d9 100644
--- a/ingestion/src/metadata/profiler/source/profiler_source_factory.py
+++ b/ingestion/src/metadata/profiler/source/profiler_source_factory.py
@@ -16,8 +16,12 @@
from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import (
BigqueryType,
)
+from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
+ DatabricksType,
+)
from metadata.profiler.source.base.profiler_source import ProfilerSource
from metadata.profiler.source.bigquery.profiler_source import BigQueryProfilerSource
+from metadata.profiler.source.databricks.profiler_source import DataBricksProfilerSource
class ProfilerSourceFactory:
@@ -44,3 +48,7 @@ def create(self, source_type: str, *args, **kwargs) -> ProfilerSource:
BigqueryType.BigQuery.value.lower(),
BigQueryProfilerSource,
)
+profiler_source_factory.register_source(
+ DatabricksType.Databricks.value.lower(),
+ DataBricksProfilerSource,
+)
diff --git a/ingestion/src/metadata/readers/dataframe/json.py b/ingestion/src/metadata/readers/dataframe/json.py
index c2c16f26bc8f..20be18c0a8b4 100644
--- a/ingestion/src/metadata/readers/dataframe/json.py
+++ b/ingestion/src/metadata/readers/dataframe/json.py
@@ -16,7 +16,7 @@
import io
import json
import zipfile
-from typing import List, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
from metadata.readers.dataframe.base import DataFrameReader
from metadata.readers.dataframe.common import dataframe_to_chunks
@@ -47,7 +47,7 @@ class JSONDataFrameReader(DataFrameReader):
@staticmethod
def read_from_json(
key: str, json_text: bytes, decode: bool = False, **__
- ) -> List["DataFrame"]:
+ ) -> Tuple[List["DataFrame"], Optional[Dict[str, Any]]]:
"""
Decompress a JSON file (if needed) and read its contents
as a dataframe.
@@ -60,20 +60,25 @@ def read_from_json(
import pandas as pd
json_text = _get_json_text(key=key, text=json_text, decode=decode)
+ raw_data = None
try:
data = json.loads(json_text)
+ if isinstance(data, dict) and data.get("$schema"):
+ raw_data = json_text
except json.decoder.JSONDecodeError:
logger.debug("Failed to read as JSON object. Trying to read as JSON Lines")
data = [json.loads(json_obj) for json_obj in json_text.strip().split("\n")]
# if we get a scalar value (e.g. {"a":"b"}) then we need to specify the index
data = data if not isinstance(data, dict) else [data]
- return dataframe_to_chunks(pd.DataFrame.from_records(data))
+ return dataframe_to_chunks(pd.DataFrame.from_records(data)), raw_data
def _read(self, *, key: str, bucket_name: str, **kwargs) -> DatalakeColumnWrapper:
text = self.reader.read(key, bucket_name=bucket_name)
+ dataframes, raw_data = self.read_from_json(
+ key=key, json_text=text, decode=True, **kwargs
+ )
return DatalakeColumnWrapper(
- dataframes=self.read_from_json(
- key=key, json_text=text, decode=True, **kwargs
- )
+ dataframes=dataframes,
+ raw_data=raw_data,
)
diff --git a/ingestion/src/metadata/readers/dataframe/models.py b/ingestion/src/metadata/readers/dataframe/models.py
index 765e6c1ae783..67678b90e4c9 100644
--- a/ingestion/src/metadata/readers/dataframe/models.py
+++ b/ingestion/src/metadata/readers/dataframe/models.py
@@ -29,6 +29,7 @@ class DatalakeColumnWrapper(BaseModel):
columns: Optional[List[Column]]
dataframes: Optional[List[Any]] # pandas.Dataframe does not have any validators
+ raw_data: Any # in special cases like json schema, we need to store the raw data
class DatalakeTableSchemaWrapper(BaseModel):
diff --git a/ingestion/src/metadata/utils/credentials.py b/ingestion/src/metadata/utils/credentials.py
index ca5ab392a887..de2767e71d68 100644
--- a/ingestion/src/metadata/utils/credentials.py
+++ b/ingestion/src/metadata/utils/credentials.py
@@ -15,7 +15,7 @@
import json
import os
import tempfile
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
from cryptography.hazmat.primitives import serialization
from google import auth
@@ -25,6 +25,9 @@
GCPCredentials,
GcpCredentialsPath,
)
+from metadata.generated.schema.security.credentials.gcpExternalAccount import (
+ GcpExternalAccount,
+)
from metadata.generated.schema.security.credentials.gcpValues import (
GcpCredentialsValues,
)
@@ -85,30 +88,44 @@ def create_credential_tmp_file(credentials: dict) -> str:
return temp_file_path
-def build_google_credentials_dict(gcp_values: GcpCredentialsValues) -> Dict[str, str]:
+def build_google_credentials_dict(
+ gcp_values: Union[GcpCredentialsValues, GcpExternalAccount]
+) -> Dict[str, str]:
"""
Given GcPCredentialsValues, build a dictionary as the JSON file
downloaded from GCP with the service_account
:param gcp_values: GCP credentials
:return: Dictionary with credentials
"""
- private_key_str = gcp_values.privateKey.get_secret_value()
- # adding the replace string here to escape line break if passed from env
- private_key_str = private_key_str.replace("\\n", "\n")
- validate_private_key(private_key_str)
-
- return {
- "type": gcp_values.type,
- "project_id": gcp_values.projectId.__root__,
- "private_key_id": gcp_values.privateKeyId,
- "private_key": private_key_str,
- "client_email": gcp_values.clientEmail,
- "client_id": gcp_values.clientId,
- "auth_uri": str(gcp_values.authUri),
- "token_uri": str(gcp_values.tokenUri),
- "auth_provider_x509_cert_url": str(gcp_values.authProviderX509CertUrl),
- "client_x509_cert_url": str(gcp_values.clientX509CertUrl),
- }
+ if isinstance(gcp_values, GcpCredentialsValues):
+ private_key_str = gcp_values.privateKey.get_secret_value()
+ # adding the replace string here to escape line break if passed from env
+ private_key_str = private_key_str.replace("\\n", "\n")
+ validate_private_key(private_key_str)
+
+ return {
+ "type": gcp_values.type,
+ "project_id": gcp_values.projectId.__root__,
+ "private_key_id": gcp_values.privateKeyId,
+ "private_key": private_key_str,
+ "client_email": gcp_values.clientEmail,
+ "client_id": gcp_values.clientId,
+ "auth_uri": str(gcp_values.authUri),
+ "token_uri": str(gcp_values.tokenUri),
+ "auth_provider_x509_cert_url": str(gcp_values.authProviderX509CertUrl),
+ "client_x509_cert_url": str(gcp_values.clientX509CertUrl),
+ }
+ if isinstance(gcp_values, GcpExternalAccount):
+ return {
+ "type": gcp_values.externalType,
+ "audience": gcp_values.audience,
+ "subject_token_type": gcp_values.subjectTokenType,
+ "token_url": gcp_values.tokenURL,
+ "credential_source": gcp_values.credentialSource,
+ }
+ raise InvalidGcpConfigException(
+ f"Error trying to build GCP credentials dict due to Invalid GCP config {type(gcp_values)}"
+ )
def set_google_credentials(gcp_credentials: GCPCredentials) -> None:
diff --git a/ingestion/src/metadata/utils/datalake/datalake_utils.py b/ingestion/src/metadata/utils/datalake/datalake_utils.py
index e067443090f9..3630723eb617 100644
--- a/ingestion/src/metadata/utils/datalake/datalake_utils.py
+++ b/ingestion/src/metadata/utils/datalake/datalake_utils.py
@@ -17,10 +17,11 @@
import json
import random
import traceback
-from typing import Dict, List, Optional, Union, cast
+from typing import Any, Dict, List, Optional, Union, cast
from metadata.generated.schema.entity.data.table import Column, DataType
from metadata.ingestion.source.database.column_helpers import truncate_column_name
+from metadata.parsers.json_schema_parser import parse_json_schema
from metadata.readers.dataframe.models import (
DatalakeColumnWrapper,
DatalakeTableSchemaWrapper,
@@ -35,6 +36,7 @@ def fetch_dataframe(
config_source,
client,
file_fqn: DatalakeTableSchemaWrapper,
+ fetch_raw_data: bool = False,
**kwargs,
) -> Optional[List["DataFrame"]]:
"""
@@ -60,6 +62,8 @@ def fetch_dataframe(
df_wrapper: DatalakeColumnWrapper = df_reader.read(
key=key, bucket_name=bucket_name, **kwargs
)
+ if fetch_raw_data:
+ return df_wrapper.dataframes, df_wrapper.raw_data
return df_wrapper.dataframes
except Exception as err:
logger.error(
@@ -73,6 +77,8 @@ def fetch_dataframe(
# Here we need to blow things up. Without the dataframe we cannot move forward
raise err
+ if fetch_raw_data:
+ return None, None
return None
@@ -112,6 +118,7 @@ def create(
file_type: Optional[SupportedTypes] = None,
sample: bool = True,
shuffle: bool = False,
+ raw_data: Any = None,
):
"""Instantiate a column parser object with the appropriate parser
@@ -126,8 +133,14 @@ def create(
data_frame = cls._get_data_frame(data_frame, sample, shuffle)
if file_type == SupportedTypes.PARQUET:
parser = ParquetDataFrameColumnParser(data_frame)
- return cls(parser)
- parser = GenericDataFrameColumnParser(data_frame)
+ elif file_type in {
+ SupportedTypes.JSON,
+ SupportedTypes.JSONGZ,
+ SupportedTypes.JSONZIP,
+ }:
+ parser = JsonDataFrameColumnParser(data_frame, raw_data=raw_data)
+ else:
+ parser = GenericDataFrameColumnParser(data_frame)
return cls(parser)
@staticmethod
@@ -172,8 +185,9 @@ class GenericDataFrameColumnParser:
"bytes": DataType.BYTES,
}
- def __init__(self, data_frame: "DataFrame"):
+ def __init__(self, data_frame: "DataFrame", raw_data: Any = None):
self.data_frame = data_frame
+ self.raw_data = raw_data
def get_columns(self):
"""
@@ -472,3 +486,19 @@ def _get_pq_data_type(self, column):
data_type = self._data_formats.get(str(column.type), DataType.UNKNOWN)
return data_type
+
+
+class JsonDataFrameColumnParser(GenericDataFrameColumnParser):
+ """Given a dataframe object generated from a json file, parse the columns and return a list of Column objects."""
+
+ def get_columns(self):
+ """
+ method to process column details for json files
+ """
+ if self.raw_data:
+ try:
+ return parse_json_schema(schema_text=self.raw_data, cls=Column)
+ except Exception as exc:
+ logger.warning(f"Unable to parse the json schema: {exc}")
+ logger.debug(traceback.format_exc())
+ return self._get_columns(self.data_frame)
diff --git a/ingestion/src/metadata/utils/secrets/azure_kv_secrets_manager.py b/ingestion/src/metadata/utils/secrets/azure_kv_secrets_manager.py
index 4682fc23ace4..566c9154850c 100644
--- a/ingestion/src/metadata/utils/secrets/azure_kv_secrets_manager.py
+++ b/ingestion/src/metadata/utils/secrets/azure_kv_secrets_manager.py
@@ -17,9 +17,9 @@
from abc import ABC
from typing import Optional
-from azure.identity import ClientSecretCredential, DefaultAzureCredential
-from azure.keyvault.secrets import KeyVaultSecret, SecretClient
+from azure.keyvault.secrets import KeyVaultSecret
+from metadata.clients.azure_client import AzureClient
from metadata.generated.schema.security.secrets.secretsManagerClientLoader import (
SecretsManagerClientLoader,
)
@@ -105,23 +105,7 @@ def __init__(
):
super().__init__(provider=SecretsManagerProvider.azure_kv, loader=loader)
- if (
- self.credentials.tenantId
- and self.credentials.clientId
- and self.credentials.clientSecret
- ):
- azure_identity = ClientSecretCredential(
- tenant_id=self.credentials.tenantId,
- client_id=self.credentials.clientId,
- client_secret=self.credentials.clientSecret.get_secret_value(),
- )
- else:
- azure_identity = DefaultAzureCredential()
-
- self.client = SecretClient(
- vault_url=f"https://{self.credentials.vaultName}.vault.azure.net/",
- credential=azure_identity,
- )
+ self.client = AzureClient(self.credentials).create_secret_client()
def get_string_value(self, secret_id: str) -> str:
"""
diff --git a/ingestion/src/metadata/utils/source_hash.py b/ingestion/src/metadata/utils/source_hash.py
index 80599ef7d45e..db76ce42083d 100644
--- a/ingestion/src/metadata/utils/source_hash.py
+++ b/ingestion/src/metadata/utils/source_hash.py
@@ -14,9 +14,14 @@
"""
import hashlib
+import traceback
from typing import Dict, Optional
from metadata.ingestion.ometa.ometa_api import C
+from metadata.utils.logger import utils_logger
+
+logger = utils_logger()
+
SOURCE_HASH_EXCLUDE_FIELDS = {
"sourceHash": True,
@@ -25,19 +30,24 @@
def generate_source_hash(
create_request: C, exclude_fields: Optional[Dict] = None
-) -> str:
+) -> Optional[str]:
"""
Given a create_request model convert it to json string and generate a hash value
"""
-
- # We always want to exclude the sourceHash when generating the fingerprint
- exclude_fields = (
- SOURCE_HASH_EXCLUDE_FIELDS.update(exclude_fields)
- if exclude_fields
- else SOURCE_HASH_EXCLUDE_FIELDS
- )
-
- create_request_json = create_request.json(exclude=exclude_fields)
-
- json_bytes = create_request_json.encode("utf-8")
- return hashlib.md5(json_bytes).hexdigest()
+ try:
+ # We always want to exclude the sourceHash when generating the fingerprint
+ exclude_fields = (
+ SOURCE_HASH_EXCLUDE_FIELDS.update(exclude_fields)
+ if exclude_fields
+ else SOURCE_HASH_EXCLUDE_FIELDS
+ )
+
+ create_request_json = create_request.json(exclude=exclude_fields)
+
+ json_bytes = create_request_json.encode("utf-8")
+ return hashlib.md5(json_bytes).hexdigest()
+
+ except Exception as exc:
+ logger.warning(f"Failed to generate source hash due to - {exc}")
+ logger.debug(traceback.format_exc())
+ return None
diff --git a/ingestion/src/metadata/utils/storage_metadata_config.py b/ingestion/src/metadata/utils/storage_metadata_config.py
index 3eb12a1670c8..7cfbdd8324e2 100644
--- a/ingestion/src/metadata/utils/storage_metadata_config.py
+++ b/ingestion/src/metadata/utils/storage_metadata_config.py
@@ -17,6 +17,7 @@
import requests
+from metadata.clients.azure_client import AzureClient
from metadata.generated.schema.entity.services.connections.database.datalake.azureConfig import (
AzureConfig,
)
@@ -153,21 +154,7 @@ def _(config: StorageMetadataAdlsConfig) -> ManifestMetadataConfig:
else STORAGE_METADATA_MANIFEST_FILE_NAME
)
- from azure.identity import ( # pylint: disable=import-outside-toplevel
- ClientSecretCredential,
- )
- from azure.storage.blob import ( # pylint: disable=import-outside-toplevel
- BlobServiceClient,
- )
-
- blob_client = BlobServiceClient(
- account_url=f"https://{config.securityConfig.accountName}.blob.core.windows.net/",
- credential=ClientSecretCredential(
- config.securityConfig.tenantId,
- config.securityConfig.clientId,
- config.securityConfig.clientSecret.get_secret_value(),
- ),
- )
+ blob_client = AzureClient(config.securityConfig).create_blob_client()
reader = get_reader(
config_source=AzureConfig(securityConfig=config.securityConfig),
diff --git a/ingestion/src/metadata/utils/test_utils.py b/ingestion/src/metadata/utils/test_utils.py
deleted file mode 100644
index 999f0f30e487..000000000000
--- a/ingestion/src/metadata/utils/test_utils.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2024 Collate
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Utility functions for testing
-"""
-from contextlib import contextmanager
-
-
-class MultipleException(Exception):
- def __init__(self, exceptions):
- self.exceptions = exceptions
- super().__init__(f"Multiple exceptions occurred: {exceptions}")
-
-
-class ErrorHandler:
- """
- A context manager that accumulates errors and raises them at the end of the block.
- Useful for cleaning up resources and ensuring that all errors are raised at the end of a test.
- Example:
- ```
- from metadata.utils.test_utils import accumulate_errors
- with accumulate_errors() as error_handler:
- error_handler.try_execute(lambda : 1 / 0)
- error_handler.try_execute(print, "Hello, World!")
- ```
-
- ```
- > Hello, World!
- > Traceback (most recent call last):
- > ...
- > ZeroDivisionError: division by zero
- ```
- """
-
- def __init__(self):
- self.errors = []
-
- def try_execute(self, func, *args, **kwargs):
- try:
- func(*args, **kwargs)
- except Exception as e:
- self.errors.append(e)
-
- def raise_if_errors(self):
- if len(self.errors) == 1:
- raise self.errors[0]
- if len(self.errors) > 1:
- raise MultipleException(self.errors)
-
-
-@contextmanager
-def accumulate_errors():
- error_handler = ErrorHandler()
- try:
- yield error_handler
- finally:
- error_handler.raise_if_errors()
diff --git a/ingestion/src/metadata/workflow/base.py b/ingestion/src/metadata/workflow/base.py
index 5641b4e253af..1ad9ba63b79c 100644
--- a/ingestion/src/metadata/workflow/base.py
+++ b/ingestion/src/metadata/workflow/base.py
@@ -108,7 +108,7 @@ def __init__(
@property
def ingestion_pipeline(self):
"""Get or create the Ingestion Pipeline from the configuration"""
- if not self._ingestion_pipeline:
+ if not self._ingestion_pipeline and self.config.ingestionPipelineFQN:
self._ingestion_pipeline = self.get_or_create_ingestion_pipeline()
return self._ingestion_pipeline
diff --git a/ingestion/tests/cli_e2e/dbt/redshift/dbt.yaml b/ingestion/tests/cli_e2e/dbt/redshift/dbt.yaml
index f551ef127463..fcea4c2177fb 100644
--- a/ingestion/tests/cli_e2e/dbt/redshift/dbt.yaml
+++ b/ingestion/tests/cli_e2e/dbt/redshift/dbt.yaml
@@ -5,6 +5,7 @@ source:
config:
type: DBT
dbtConfigSource:
+ dbtConfigType: "http"
dbtCatalogHttpPath: $E2E_REDSHIFT_DBT_CATALOG_HTTP_FILE_PATH
dbtManifestHttpPath: $E2E_REDSHIFT_DBT_MANIFEST_HTTP_FILE_PATH
dbtRunResultsHttpPath: $E2E_REDSHIFT_DBT_RUN_RESULTS_HTTP_FILE_PATH
diff --git a/ingestion/tests/integration/ometa/test_ometa_app_api.py b/ingestion/tests/integration/ometa/test_ometa_app_api.py
new file mode 100644
index 000000000000..2565526d1b78
--- /dev/null
+++ b/ingestion/tests/integration/ometa/test_ometa_app_api.py
@@ -0,0 +1,36 @@
+# Copyright 2021 Collate
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+OpenMetadata high-level API App test
+"""
+from unittest import TestCase
+
+from metadata.generated.schema.entity.applications.app import App
+
+from ..integration_base import int_admin_ometa
+
+
+class OMetaTableTest(TestCase):
+ """
+ Run this integration test with the local API available
+ Install the ingestion package before running the tests
+ """
+
+ service_entity_id = None
+
+ metadata = int_admin_ometa()
+
+ def test_get_app(self):
+ """We can GET an app via the client"""
+ app = self.metadata.get_by_name(entity=App, fqn="SearchIndexingApplication")
+ self.assertIsNotNone(app)
+ self.assertEqual(app.name.__root__, "SearchIndexingApplication")
diff --git a/ingestion/tests/integration/ometa/test_ometa_custom_properties_api.py b/ingestion/tests/integration/ometa/test_ometa_custom_properties_api.py
index afe57999c705..8834acb121de 100644
--- a/ingestion/tests/integration/ometa/test_ometa_custom_properties_api.py
+++ b/ingestion/tests/integration/ometa/test_ometa_custom_properties_api.py
@@ -144,9 +144,12 @@ def create_custom_property(self):
# Create the table size property
ometa_custom_property_request = OMetaCustomProperties(
entity_type=Table,
- custom_property_type=CustomPropertyDataTypes.STRING,
createCustomPropertyRequest=CreateCustomPropertyRequest(
- name="TableSize", description="Size of the Table"
+ name="TableSize",
+ description="Size of the Table",
+ propertyType=self.metadata.get_property_type_ref(
+ CustomPropertyDataTypes.STRING
+ ),
),
)
self.metadata.create_or_update_custom_property(
@@ -156,9 +159,12 @@ def create_custom_property(self):
# Create the DataQuality property for a table
ometa_custom_property_request = OMetaCustomProperties(
entity_type=Table,
- custom_property_type=CustomPropertyDataTypes.MARKDOWN,
createCustomPropertyRequest=CreateCustomPropertyRequest(
- name="DataQuality", description="Quality Details of a Table"
+ name="DataQuality",
+ description="Quality Details of a Table",
+ propertyType=self.metadata.get_property_type_ref(
+ CustomPropertyDataTypes.MARKDOWN
+ ),
),
)
self.metadata.create_or_update_custom_property(
@@ -168,9 +174,12 @@ def create_custom_property(self):
# Create the SchemaCost property for database schema
ometa_custom_property_request = OMetaCustomProperties(
entity_type=DatabaseSchema,
- custom_property_type=CustomPropertyDataTypes.INTEGER,
createCustomPropertyRequest=CreateCustomPropertyRequest(
- name="SchemaAge", description="Age in years of a Schema"
+ name="SchemaAge",
+ description="Age in years of a Schema",
+ propertyType=self.metadata.get_property_type_ref(
+ CustomPropertyDataTypes.INTEGER
+ ),
),
)
self.metadata.create_or_update_custom_property(
diff --git a/ingestion/tests/integration/ometa/test_ometa_patch.py b/ingestion/tests/integration/ometa/test_ometa_patch.py
index a5eb78932c26..409536091f2c 100644
--- a/ingestion/tests/integration/ometa/test_ometa_patch.py
+++ b/ingestion/tests/integration/ometa/test_ometa_patch.py
@@ -17,17 +17,6 @@
from datetime import datetime
from unittest import TestCase
-from ingestion.tests.integration.integration_base import (
- generate_name,
- get_create_entity,
- get_create_service,
- get_create_team_entity,
- get_create_test_case,
- get_create_test_definition,
- get_create_test_suite,
- get_create_user_entity,
- int_admin_ometa,
-)
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
from metadata.generated.schema.entity.data.table import Column, DataType, Table
@@ -54,6 +43,18 @@
from metadata.ingestion.models.table_metadata import ColumnTag
from metadata.utils.helpers import find_column_in_table
+from ..integration_base import (
+ generate_name,
+ get_create_entity,
+ get_create_service,
+ get_create_team_entity,
+ get_create_test_case,
+ get_create_test_definition,
+ get_create_test_suite,
+ get_create_user_entity,
+ int_admin_ometa,
+)
+
PII_TAG_LABEL = TagLabel(
tagFQN="PII.Sensitive",
labelType=LabelType.Automated,
diff --git a/ingestion/tests/integration/ometa/test_ometa_topology_patch.py b/ingestion/tests/integration/ometa/test_ometa_topology_patch.py
new file mode 100644
index 000000000000..7db3fa52ef0f
--- /dev/null
+++ b/ingestion/tests/integration/ometa/test_ometa_topology_patch.py
@@ -0,0 +1,227 @@
+# Copyright 2021 Collate
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Topology Patch Integration Test
+"""
+from unittest import TestCase
+
+from metadata.generated.schema.api.data.createDatabase import CreateDatabaseRequest
+from metadata.generated.schema.api.data.createDatabaseSchema import (
+ CreateDatabaseSchemaRequest,
+)
+from metadata.generated.schema.api.data.createTable import CreateTableRequest
+from metadata.generated.schema.api.services.createDatabaseService import (
+ CreateDatabaseServiceRequest,
+)
+from metadata.generated.schema.entity.data.table import Column, DataType, Table
+from metadata.generated.schema.entity.services.connections.database.common.basicAuth import (
+ BasicAuth,
+)
+from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
+ MysqlConnection,
+)
+from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
+ OpenMetadataConnection,
+)
+from metadata.generated.schema.entity.services.databaseService import (
+ DatabaseConnection,
+ DatabaseService,
+ DatabaseServiceType,
+)
+from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
+ OpenMetadataJWTClientConfig,
+)
+from metadata.ingestion.models.patch_request import (
+ ALLOWED_COMMON_PATCH_FIELDS,
+ ARRAY_ENTITY_FIELDS,
+ RESTRICT_UPDATE_LIST,
+)
+from metadata.ingestion.ometa.ometa_api import OpenMetadata
+
+
+class TopologyPatchTest(TestCase):
+ """
+ Run this integration test with the local API available
+ Install the ingestion package before running the tests
+ """
+
+ server_config = OpenMetadataConnection(
+ hostPort="http://localhost:8585/api",
+ authProvider="openmetadata",
+ securityConfig=OpenMetadataJWTClientConfig(
+ jwtToken="eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
+ ),
+ )
+ metadata = OpenMetadata(server_config)
+
+ assert metadata.health_check()
+
+ service = CreateDatabaseServiceRequest(
+ name="test-service-topology-patch",
+ serviceType=DatabaseServiceType.Mysql,
+ connection=DatabaseConnection(
+ config=MysqlConnection(
+ username="username",
+ authType=BasicAuth(
+ password="password",
+ ),
+ hostPort="http://localhost:1234",
+ )
+ ),
+ )
+ service_type = "databaseService"
+
+ @classmethod
+ def setUpClass(cls) -> None:
+ """
+ Prepare ingredients
+ """
+
+ cls.service_entity = cls.metadata.create_or_update(data=cls.service)
+
+ create_db = CreateDatabaseRequest(
+ name="test-db-topology-patch",
+ service=cls.service_entity.fullyQualifiedName,
+ )
+
+ cls.create_db_entity = cls.metadata.create_or_update(data=create_db)
+
+ create_schema = CreateDatabaseSchemaRequest(
+ name="test-schema-topology-patch",
+ database=cls.create_db_entity.fullyQualifiedName,
+ )
+
+ cls.create_schema_entity = cls.metadata.create_or_update(data=create_schema)
+
+ create = CreateTableRequest(
+ name="test-topology-patch-table-one",
+ databaseSchema=cls.create_schema_entity.fullyQualifiedName,
+ columns=[
+ Column(
+ name="column1", dataType=DataType.BIGINT, description="test column1"
+ ),
+ Column(
+ name="column2", dataType=DataType.BIGINT, description="test column2"
+ ),
+ Column(
+ name="column3", dataType=DataType.BIGINT, description="test column3"
+ ),
+ Column(
+ name="column4", dataType=DataType.BIGINT, description="test column4"
+ ),
+ Column(
+ name="column5", dataType=DataType.BIGINT, description="test column5"
+ ),
+ ],
+ )
+ cls.table_entity_one = cls.metadata.create_or_update(create)
+
+ create = CreateTableRequest(
+ name="test-topology-patch-table-two",
+ databaseSchema=cls.create_schema_entity.fullyQualifiedName,
+ columns=[
+ Column(
+ name="column1", dataType=DataType.BIGINT, description="test column1"
+ ),
+ Column(
+ name="column2", dataType=DataType.BIGINT, description="test column2"
+ ),
+ Column(
+ name="column3", dataType=DataType.BIGINT, description="test column3"
+ ),
+ Column(
+ name="column4", dataType=DataType.BIGINT, description="test column4"
+ ),
+ Column(
+ name="column5", dataType=DataType.BIGINT, description="test column5"
+ ),
+ ],
+ )
+ cls.table_entity_two = cls.metadata.create_or_update(create)
+
+ @classmethod
+ def tearDownClass(cls) -> None:
+ """
+ Clean up
+ """
+
+ service_id = str(
+ cls.metadata.get_by_name(
+ entity=DatabaseService, fqn=cls.service.name.__root__
+ ).id.__root__
+ )
+
+ cls.metadata.delete(
+ entity=DatabaseService,
+ entity_id=service_id,
+ recursive=True,
+ hard_delete=True,
+ )
+
+ def test_topology_patch_table_columns_with_random_order(self):
+ """Check if the table columns are patched"""
+ new_columns_list = [
+ Column(name="column3", dataType=DataType.BIGINT),
+ Column(name="column4", dataType=DataType.BIGINT),
+ Column(name="column5", dataType=DataType.BIGINT),
+ Column(name="column1", dataType=DataType.BIGINT),
+ Column(name="column2", dataType=DataType.BIGINT),
+ ]
+ updated_table = self.table_entity_one.copy(deep=True)
+ updated_table.columns = new_columns_list
+ self.metadata.patch(
+ entity=type(self.table_entity_one),
+ source=self.table_entity_one,
+ destination=updated_table,
+ allowed_fields=ALLOWED_COMMON_PATCH_FIELDS,
+ restrict_update_fields=RESTRICT_UPDATE_LIST,
+ array_entity_fields=ARRAY_ENTITY_FIELDS,
+ )
+ table_entity = self.metadata.get_by_id(
+ entity=Table, entity_id=self.table_entity_one.id.__root__
+ )
+ self.assertEqual(table_entity.columns[0].description.__root__, "test column1")
+ self.assertEqual(table_entity.columns[1].description.__root__, "test column2")
+ self.assertEqual(table_entity.columns[2].description.__root__, "test column3")
+ self.assertEqual(table_entity.columns[3].description.__root__, "test column4")
+ self.assertEqual(table_entity.columns[4].description.__root__, "test column5")
+
+ def test_topology_patch_table_columns_with_add_del(self):
+ """Check if the table columns are patched"""
+ new_columns_list = [
+ Column(
+ name="column7", dataType=DataType.BIGINT, description="test column7"
+ ),
+ Column(name="column3", dataType=DataType.BIGINT),
+ Column(name="column5", dataType=DataType.BIGINT),
+ Column(name="column1", dataType=DataType.BIGINT),
+ Column(
+ name="column6", dataType=DataType.BIGINT, description="test column6"
+ ),
+ ]
+ updated_table = self.table_entity_two.copy(deep=True)
+ updated_table.columns = new_columns_list
+ self.metadata.patch(
+ entity=type(self.table_entity_two),
+ source=self.table_entity_two,
+ destination=updated_table,
+ allowed_fields=ALLOWED_COMMON_PATCH_FIELDS,
+ restrict_update_fields=RESTRICT_UPDATE_LIST,
+ array_entity_fields=ARRAY_ENTITY_FIELDS,
+ )
+ table_entity = self.metadata.get_by_id(
+ entity=Table, entity_id=self.table_entity_two.id.__root__
+ )
+ self.assertEqual(table_entity.columns[0].description.__root__, "test column1")
+ self.assertEqual(table_entity.columns[1].description.__root__, "test column3")
+ self.assertEqual(table_entity.columns[2].description.__root__, "test column5")
+ self.assertEqual(table_entity.columns[3].description.__root__, "test column7")
+ self.assertEqual(table_entity.columns[4].description.__root__, "test column6")
diff --git a/ingestion/tests/integration/profiler/__init__.py b/ingestion/tests/integration/profiler/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/ingestion/tests/integration/profiler/test_nosql_profiler.py b/ingestion/tests/integration/profiler/test_nosql_profiler.py
deleted file mode 100644
index 693ad7ec7c13..000000000000
--- a/ingestion/tests/integration/profiler/test_nosql_profiler.py
+++ /dev/null
@@ -1,333 +0,0 @@
-# Copyright 2024 Collate
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Test the NoSQL profiler using a MongoDB container
-To run this we need OpenMetadata server up and running.
-No sample data is required beforehand
-
-Test Steps:
-
-1. Start a MongoDB container
-2. Ingest data into OpenMetadata
-3. Run the profiler workflow
-4. Verify the profiler output
-5. Tear down the MongoDB container and delete the service from OpenMetadata
-"""
-
-from copy import deepcopy
-from datetime import datetime, timedelta
-from functools import partial
-from pathlib import Path
-from random import choice, randint
-from unittest import TestCase
-
-from pymongo import MongoClient, database
-from testcontainers.mongodb import MongoDbContainer
-
-from ingestion.tests.integration.integration_base import int_admin_ometa
-from metadata.generated.schema.entity.data.table import ColumnProfile, Table
-from metadata.generated.schema.entity.services.databaseService import DatabaseService
-from metadata.ingestion.ometa.ometa_api import OpenMetadata
-from metadata.profiler.api.models import TableConfig
-from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
-from metadata.utils.helpers import datetime_to_ts
-from metadata.utils.test_utils import accumulate_errors
-from metadata.utils.time_utils import get_end_of_day_timestamp_mill
-from metadata.workflow.metadata import MetadataWorkflow
-from metadata.workflow.profiler import ProfilerWorkflow
-from metadata.workflow.workflow_output_handler import print_status
-
-SERVICE_NAME = Path(__file__).stem
-
-
-def add_query_config(config, table_config: TableConfig) -> dict:
- config_copy = deepcopy(config)
- config_copy["processor"]["config"].setdefault("tableConfig", [])
- config_copy["processor"]["config"]["tableConfig"].append(table_config)
- return config_copy
-
-
-def get_ingestion_config(mongo_port: str, mongo_user: str, mongo_pass: str):
- return {
- "source": {
- "type": "mongodb",
- "serviceName": SERVICE_NAME,
- "serviceConnection": {
- "config": {
- "type": "MongoDB",
- "hostPort": f"localhost:{mongo_port}",
- "username": mongo_user,
- "password": mongo_pass,
- }
- },
- "sourceConfig": {"config": {"type": "DatabaseMetadata"}},
- },
- "sink": {"type": "metadata-rest", "config": {}},
- "workflowConfig": {
- "loggerLevel": "DEBUG",
- "openMetadataServerConfig": {
- "hostPort": "http://localhost:8585/api",
- "authProvider": "openmetadata",
- "securityConfig": {
- "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
- },
- },
- },
- }
-
-
-TEST_DATABASE = "test-database"
-EMPTY_COLLECTION = "empty-collection"
-TEST_COLLECTION = "test-collection"
-NUM_ROWS = 200
-
-
-def random_row():
- return {
- "name": choice(["John", "Jane", "Alice", "Bob"]),
- "age": randint(20, 60),
- "city": choice(["New York", "Chicago", "San Francisco"]),
- "nested": {"key": "value" + str(randint(1, 10))},
- }
-
-
-TEST_DATA = [random_row() for _ in range(NUM_ROWS)] + [
- {
- "name": "John",
- "age": 60,
- "city": "New York",
- },
- {
- "name": "Jane",
- "age": 20,
- "city": "New York",
- },
-]
-
-
-class NoSQLProfiler(TestCase):
- """datalake profiler E2E test"""
-
- mongo_container: MongoDbContainer
- client: MongoClient
- db: database.Database
- collection: database.Collection
- ingestion_config: dict
- metadata: OpenMetadata
-
- @classmethod
- def setUpClass(cls) -> None:
- cls.metadata = int_admin_ometa()
- cls.mongo_container = MongoDbContainer("mongo:7.0.5-jammy")
- cls.mongo_container.start()
- cls.client = MongoClient(cls.mongo_container.get_connection_url())
- cls.db = cls.client[TEST_DATABASE]
- cls.collection = cls.db[TEST_COLLECTION]
- cls.collection.insert_many(TEST_DATA)
- cls.db.create_collection(EMPTY_COLLECTION)
- cls.ingestion_config = get_ingestion_config(
- cls.mongo_container.get_exposed_port("27017"), "test", "test"
- )
- # cls.client["admin"].command("grantRolesToUser", "test", roles=["userAdminAnyDatabase"])
- ingestion_workflow = MetadataWorkflow.create(
- cls.ingestion_config,
- )
- ingestion_workflow.execute()
- ingestion_workflow.raise_from_status()
- print_status(ingestion_workflow)
- ingestion_workflow.stop()
-
- @classmethod
- def tearDownClass(cls):
- with accumulate_errors() as error_handler:
- error_handler.try_execute(partial(cls.mongo_container.stop, force=True))
- error_handler.try_execute(cls.delete_service)
-
- @classmethod
- def delete_service(cls):
- service_id = str(
- cls.metadata.get_by_name(
- entity=DatabaseService, fqn=SERVICE_NAME
- ).id.__root__
- )
- cls.metadata.delete(
- entity=DatabaseService,
- entity_id=service_id,
- recursive=True,
- hard_delete=True,
- )
-
- def test_setup_teardown(self):
- """
- does nothing. useful to check if the setup and teardown methods are working
- """
- pass
-
- def run_profiler_workflow(self, config):
- profiler_workflow = ProfilerWorkflow.create(config)
- profiler_workflow.execute()
- status = profiler_workflow.result_status()
- profiler_workflow.stop()
- assert status == 0
-
- def test_simple(self):
- workflow_config = deepcopy(self.ingestion_config)
- workflow_config["source"]["sourceConfig"]["config"].update(
- {
- "type": "Profiler",
- }
- )
- workflow_config["processor"] = {
- "type": "orm-profiler",
- "config": {},
- }
- self.run_profiler_workflow(workflow_config)
-
- cases = [
- {
- "collection": EMPTY_COLLECTION,
- "expected": {
- "rowCount": 0,
- "columns": [],
- },
- },
- {
- "collection": TEST_COLLECTION,
- "expected": {
- "rowCount": len(TEST_DATA),
- "columns": [
- ColumnProfile(
- name="age",
- timestamp=datetime.now().timestamp(),
- max=60,
- min=20,
- ),
- ],
- },
- },
- ]
-
- for tc in cases:
- collection = tc["collection"]
- expected = tc["expected"]
- collection_profile = self.metadata.get_profile_data(
- f"{SERVICE_NAME}.default.{TEST_DATABASE}.{collection}",
- datetime_to_ts(datetime.now() - timedelta(seconds=10)),
- get_end_of_day_timestamp_mill(),
- )
- assert collection_profile.entities
- assert collection_profile.entities[-1].rowCount == expected["rowCount"]
- column_profile = self.metadata.get_profile_data(
- f"{SERVICE_NAME}.default.{TEST_DATABASE}.{collection}.age",
- datetime_to_ts(datetime.now() - timedelta(seconds=10)),
- get_end_of_day_timestamp_mill(),
- profile_type=ColumnProfile,
- )
- assert (len(column_profile.entities) > 0) == (
- len(tc["expected"]["columns"]) > 0
- )
- if len(expected["columns"]) > 0:
- for c1, c2 in zip(column_profile.entities, expected["columns"]):
- assert c1.name == c2.name
- assert c1.max == c2.max
- assert c1.min == c2.min
-
- table = self.metadata.get_by_name(
- Table, f"{SERVICE_NAME}.default.{TEST_DATABASE}.{TEST_COLLECTION}"
- )
- sample_data = self.metadata.get_sample_data(table)
- assert [c.__root__ for c in sample_data.sampleData.columns] == [
- "_id",
- "name",
- "age",
- "city",
- "nested",
- ]
- assert len(sample_data.sampleData.rows) == SAMPLE_DATA_DEFAULT_COUNT
-
- def test_custom_query(self):
- workflow_config = deepcopy(self.ingestion_config)
- workflow_config["source"]["sourceConfig"]["config"].update(
- {
- "type": "Profiler",
- }
- )
- query_age = TEST_DATA[0]["age"]
- workflow_config["processor"] = {
- "type": "orm-profiler",
- "config": {
- "tableConfig": [
- {
- "fullyQualifiedName": f"{SERVICE_NAME}.default.{TEST_DATABASE}.{TEST_COLLECTION}",
- "profileQuery": '{"age": %s}' % query_age,
- }
- ],
- },
- }
- self.run_profiler_workflow(workflow_config)
-
- cases = [
- {
- "collection": EMPTY_COLLECTION,
- "expected": {
- "rowCount": 0,
- "columns": [],
- },
- },
- {
- "collection": TEST_COLLECTION,
- "expected": {
- "rowCount": len(TEST_DATA),
- "columns": [
- ColumnProfile(
- name="age",
- timestamp=datetime.now().timestamp(),
- max=query_age,
- min=query_age,
- ),
- ],
- },
- },
- ]
-
- for tc in cases:
- collection = tc["collection"]
- expected_row_count = tc["expected"]["rowCount"]
-
- collection_profile = self.metadata.get_profile_data(
- f"{SERVICE_NAME}.default.{TEST_DATABASE}.{collection}",
- datetime_to_ts(datetime.now() - timedelta(seconds=10)),
- get_end_of_day_timestamp_mill(),
- )
- assert collection_profile.entities, collection
- assert (
- collection_profile.entities[-1].rowCount == expected_row_count
- ), collection
- column_profile = self.metadata.get_profile_data(
- f"{SERVICE_NAME}.default.{TEST_DATABASE}.{collection}.age",
- datetime_to_ts(datetime.now() - timedelta(seconds=10)),
- get_end_of_day_timestamp_mill(),
- profile_type=ColumnProfile,
- )
- assert (len(column_profile.entities) > 0) == (
- len(tc["expected"]["columns"]) > 0
- )
- table = self.metadata.get_by_name(
- Table, f"{SERVICE_NAME}.default.{TEST_DATABASE}.{TEST_COLLECTION}"
- )
- sample_data = self.metadata.get_sample_data(table)
- age_column_index = [
- col.__root__ for col in sample_data.sampleData.columns
- ].index("age")
- assert all(
- [r[age_column_index] == query_age for r in sample_data.sampleData.rows]
- )
diff --git a/ingestion/tests/unit/metadata/cli/resources/profiler_workflow.py b/ingestion/tests/unit/metadata/cli/resources/profiler_workflow.py
index 5a9573d50381..b9bfe8e7ed15 100644
--- a/ingestion/tests/unit/metadata/cli/resources/profiler_workflow.py
+++ b/ingestion/tests/unit/metadata/cli/resources/profiler_workflow.py
@@ -1,7 +1,7 @@
"""
This file has been generated from dag_runner.j2
"""
-from openmetadata.workflows import workflow_factory
+from openmetadata_managed_apis.workflows import workflow_factory
workflow = workflow_factory.WorkflowFactory.create(
"/airflow/dag_generated_configs/local_redshift_profiler_e9AziRXs.json"
diff --git a/ingestion/tests/unit/metadata/cli/resources/profiler_workflow.txt b/ingestion/tests/unit/metadata/cli/resources/profiler_workflow.txt
index b3945ba7e255..bdb70bb1fd91 100644
--- a/ingestion/tests/unit/metadata/cli/resources/profiler_workflow.txt
+++ b/ingestion/tests/unit/metadata/cli/resources/profiler_workflow.txt
@@ -2,7 +2,7 @@
This file has been generated from dag_runner.j2
"""
from airflow import DAG
-from openmetadata.workflows import workflow_factory
+from openmetadata_managed_apis.workflows import workflow_factory
workflow = workflow_factory.WorkflowFactory.create("/airflow/dag_generated_configs/local_redshift_profiler_e9AziRXs.json")
workflow.generate_dag(globals())
\ No newline at end of file
diff --git a/ingestion/tests/unit/test_azure_credentials.py b/ingestion/tests/unit/test_azure_credentials.py
new file mode 100644
index 000000000000..bb1f03f96c51
--- /dev/null
+++ b/ingestion/tests/unit/test_azure_credentials.py
@@ -0,0 +1,63 @@
+import unittest
+from unittest.mock import patch
+
+from metadata.clients.azure_client import AzureClient
+from metadata.generated.schema.security.credentials.azureCredentials import (
+ AzureCredentials,
+)
+
+
+class TestAzureClient(unittest.TestCase):
+ @patch("azure.identity.ClientSecretCredential")
+ @patch("azure.identity.DefaultAzureCredential")
+ def test_create_client(
+ self,
+ mock_default_credential,
+ mock_client_secret_credential,
+ ):
+ # Test with ClientSecretCredential
+ credentials = AzureCredentials(
+ clientId="clientId", clientSecret="clientSecret", tenantId="tenantId"
+ )
+ instance = AzureClient(credentials)
+ instance.create_client()
+
+ mock_client_secret_credential.assert_called_once()
+ mock_client_secret_credential.reset_mock()
+
+ credentials = AzureCredentials(
+ clientId="clientId",
+ )
+ instance = AzureClient(credentials)
+
+ instance.create_client()
+
+ mock_default_credential.assert_called_once()
+
+ @patch("azure.storage.blob.BlobServiceClient")
+ def test_create_blob_client(self, mock_blob_service_client):
+ credentials = AzureCredentials(
+ clientId="clientId", clientSecret="clientSecret", tenantId="tenantId"
+ )
+ with self.assertRaises(ValueError):
+ AzureClient(credentials=credentials).create_blob_client()
+
+ credentials.accountName = "accountName"
+ AzureClient(credentials=credentials).create_blob_client()
+ mock_blob_service_client.assert_called_once()
+
+ @patch("azure.keyvault.secrets.SecretClient")
+ def test_create_secret_client(self, mock_secret_client):
+ credentials = AzureCredentials(
+ clientId="clientId", clientSecret="clientSecret", tenantId="tenantId"
+ )
+ with self.assertRaises(ValueError):
+ AzureClient(credentials=credentials).create_secret_client()
+
+ credentials.vaultName = "vaultName"
+ AzureClient(credentials=credentials).create_secret_client()
+ mock_secret_client.assert_called_once()
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/ingestion/tests/unit/test_build_connection_url.py b/ingestion/tests/unit/test_build_connection_url.py
new file mode 100644
index 000000000000..8cf60dae6774
--- /dev/null
+++ b/ingestion/tests/unit/test_build_connection_url.py
@@ -0,0 +1,138 @@
+import unittest
+from unittest.mock import patch
+
+from azure.core.credentials import AccessToken
+from azure.identity import ClientSecretCredential
+
+from metadata.generated.schema.entity.services.connections.database.azureSQLConnection import (
+ Authentication,
+ AuthenticationMode,
+ AzureSQLConnection,
+)
+from metadata.generated.schema.entity.services.connections.database.common.azureConfig import (
+ AzureConfigurationSource,
+)
+from metadata.generated.schema.entity.services.connections.database.common.basicAuth import (
+ BasicAuth,
+)
+from metadata.generated.schema.entity.services.connections.database.mysqlConnection import (
+ MysqlConnection,
+)
+from metadata.generated.schema.entity.services.connections.database.postgresConnection import (
+ PostgresConnection,
+)
+from metadata.generated.schema.security.credentials.azureCredentials import (
+ AzureCredentials,
+)
+from metadata.ingestion.source.database.azuresql.connection import get_connection_url
+from metadata.ingestion.source.database.mysql.connection import (
+ get_connection as mysql_get_connection,
+)
+from metadata.ingestion.source.database.postgres.connection import (
+ get_connection as postgres_get_connection,
+)
+
+
+class TestGetConnectionURL(unittest.TestCase):
+ def test_get_connection_url_wo_active_directory_password(self):
+ connection = AzureSQLConnection(
+ driver="SQL Server",
+ hostPort="myserver.database.windows.net",
+ database="mydb",
+ username="myuser",
+ password="mypassword",
+ authenticationMode=AuthenticationMode(
+ authentication=Authentication.ActiveDirectoryPassword,
+ encrypt=True,
+ trustServerCertificate=False,
+ connectionTimeout=45,
+ ),
+ )
+ expected_url = "mssql+pyodbc://?odbc_connect=Driver%3DSQL+Server%3BServer%3Dmyserver.database.windows.net%3BDatabase%3Dmydb%3BUid%3Dmyuser%3BPwd%3Dmypassword%3BEncrypt%3Dyes%3BTrustServerCertificate%3Dno%3BConnection+Timeout%3D45%3BAuthentication%3DActiveDirectoryPassword%3B"
+ self.assertEqual(str(get_connection_url(connection)), expected_url)
+
+ connection = AzureSQLConnection(
+ driver="SQL Server",
+ hostPort="myserver.database.windows.net",
+ database="mydb",
+ username="myuser",
+ password="mypassword",
+ authenticationMode=AuthenticationMode(
+ authentication=Authentication.ActiveDirectoryPassword,
+ ),
+ )
+
+ expected_url = "mssql+pyodbc://?odbc_connect=Driver%3DSQL+Server%3BServer%3Dmyserver.database.windows.net%3BDatabase%3Dmydb%3BUid%3Dmyuser%3BPwd%3Dmypassword%3BEncrypt%3Dno%3BTrustServerCertificate%3Dno%3BConnection+Timeout%3D30%3BAuthentication%3DActiveDirectoryPassword%3B"
+ self.assertEqual(str(get_connection_url(connection)), expected_url)
+
+ def test_get_connection_url_mysql(self):
+ connection = MysqlConnection(
+ username="openmetadata_user",
+ authType=BasicAuth(password="openmetadata_password"),
+ hostPort="localhost:3306",
+ databaseSchema="openmetadata_db",
+ )
+ engine_connection = mysql_get_connection(connection)
+ self.assertEqual(
+ str(engine_connection.url),
+ "mysql+pymysql://openmetadata_user:openmetadata_password@localhost:3306/openmetadata_db",
+ )
+ connection = MysqlConnection(
+ username="openmetadata_user",
+ authType=AzureConfigurationSource(
+ azureConfig=AzureCredentials(
+ clientId="clientid",
+ tenantId="tenantid",
+ clientSecret="clientsecret",
+ scopes="scope1,scope2",
+ )
+ ),
+ hostPort="localhost:3306",
+ databaseSchema="openmetadata_db",
+ )
+ with patch.object(
+ ClientSecretCredential,
+ "get_token",
+ return_value=AccessToken(token="mocked_token", expires_on=100),
+ ):
+ engine_connection = mysql_get_connection(connection)
+ self.assertEqual(
+ str(engine_connection.url),
+ "mysql+pymysql://openmetadata_user:mocked_token@localhost:3306/openmetadata_db",
+ )
+
+ def test_get_connection_url_postgres(self):
+ connection = PostgresConnection(
+ username="openmetadata_user",
+ authType=BasicAuth(password="openmetadata_password"),
+ hostPort="localhost:3306",
+ database="openmetadata_db",
+ )
+ engine_connection = postgres_get_connection(connection)
+ self.assertEqual(
+ str(engine_connection.url),
+ "postgresql+psycopg2://openmetadata_user:openmetadata_password@localhost:3306/openmetadata_db",
+ )
+ connection = PostgresConnection(
+ username="openmetadata_user",
+ authType=AzureConfigurationSource(
+ azureConfig=AzureCredentials(
+ clientId="clientid",
+ tenantId="tenantid",
+ clientSecret="clientsecret",
+ scopes="scope1,scope2",
+ )
+ ),
+ hostPort="localhost:3306",
+ database="openmetadata_db",
+ )
+ with patch.object(
+ ClientSecretCredential,
+ "get_token",
+ return_value=AccessToken(token="mocked_token", expires_on=100),
+ ):
+ engine_connection = postgres_get_connection(connection)
+ self.assertEqual(
+ str(engine_connection.url),
+ "postgresql+psycopg2://openmetadata_user:mocked_token@localhost:3306/openmetadata_db",
+ )
diff --git a/ingestion/tests/unit/test_credentials.py b/ingestion/tests/unit/test_credentials.py
index 6721da1aa999..ff385d814f5a 100644
--- a/ingestion/tests/unit/test_credentials.py
+++ b/ingestion/tests/unit/test_credentials.py
@@ -15,6 +15,9 @@
from pydantic import SecretStr
+from metadata.generated.schema.security.credentials.gcpExternalAccount import (
+ GcpExternalAccount,
+)
from metadata.generated.schema.security.credentials.gcpValues import (
GcpCredentialsValues,
)
@@ -29,7 +32,7 @@ class TestCredentials(TestCase):
Validate credentials handling
"""
- def test_build_google_credentials_dict(self):
+ def test_build_service_account_google_credentials_dict(self):
"""
Check how we can validate GCS values
"""
@@ -52,7 +55,7 @@ def test_build_google_credentials_dict(self):
-----END RSA PRIVATE KEY-----"""
gcp_values = GcpCredentialsValues(
- type="my_type",
+ type="service_account",
projectId=["project_id"],
privateKeyId="private_key_id",
privateKey=private_key,
@@ -62,7 +65,7 @@ def test_build_google_credentials_dict(self):
)
expected_dict = {
- "type": "my_type",
+ "type": "service_account",
"project_id": ["project_id"],
"private_key_id": "private_key_id",
"private_key": private_key,
@@ -82,3 +85,25 @@ def test_build_google_credentials_dict(self):
with self.assertRaises(InvalidPrivateKeyException):
build_google_credentials_dict(gcp_values)
+
+ def test_build_external_account_google_credentials_dict(self):
+ """
+ Check how we can validate GCS values
+ """
+ gcp_values = GcpExternalAccount(
+ externalType="external_account",
+ audience="audience",
+ subjectTokenType="subject_token_type",
+ tokenURL="token_url",
+ credentialSource={"environmentId": "environment_id"},
+ )
+
+ expected_dict = {
+ "type": "external_account",
+ "audience": "audience",
+ "subject_token_type": "subject_token_type",
+ "token_url": "token_url",
+ "credential_source": {"environmentId": "environment_id"},
+ }
+
+ self.assertEqual(expected_dict, build_google_credentials_dict(gcp_values))
diff --git a/ingestion/tests/unit/test_dbt.py b/ingestion/tests/unit/test_dbt.py
index 072abc5b6615..962332d53a61 100644
--- a/ingestion/tests/unit/test_dbt.py
+++ b/ingestion/tests/unit/test_dbt.py
@@ -46,6 +46,7 @@
"config": {
"type": "DBT",
"dbtConfigSource": {
+ "dbtConfigType": "local",
"dbtCatalogFilePath": "sample/dbt_files/catalog.json",
"dbtManifestFilePath": "sample/dbt_files/manifest.json",
"dbtRunResultsFilePath": "sample/dbt_files/run_results.json",
diff --git a/ingestion/tests/unit/test_json_schema_parser.py b/ingestion/tests/unit/test_json_schema_parser.py
index 09f5f91d299a..4fd2c9b58636 100644
--- a/ingestion/tests/unit/test_json_schema_parser.py
+++ b/ingestion/tests/unit/test_json_schema_parser.py
@@ -30,15 +30,18 @@ class JsonSchemaParserTests(TestCase):
"properties": {
"firstName": {
"type": "string",
+ "title": "First Name",
"description": "The person's first name."
},
"lastName": {
"type": "string",
+ "title": "Last Name",
"description": "The person's last name."
},
"age": {
"description": "Age in years which must be equal to or greater than zero.",
"type": "integer",
+ "title": "Person Age",
"minimum": 0
}
}
@@ -58,6 +61,12 @@ def test_field_names(self):
}
self.assertEqual(field_names, {"firstName", "lastName", "age"})
+ # validate display names
+ field_display_names = {
+ str(field.displayName) for field in self.parsed_schema[0].children
+ }
+ self.assertEqual(field_display_names, {"First Name", "Last Name", "Person Age"})
+
def test_field_types(self):
field_types = {
str(field.dataType.name) for field in self.parsed_schema[0].children
diff --git a/ingestion/tests/unit/test_workflow_parse.py b/ingestion/tests/unit/test_workflow_parse.py
index 106097ecc127..549c7d5a4a1a 100644
--- a/ingestion/tests/unit/test_workflow_parse.py
+++ b/ingestion/tests/unit/test_workflow_parse.py
@@ -700,3 +700,150 @@ def test_parsing_automation_workflow_athena(self):
"1 validation error for AthenaConnection\ns3StagingDir\n invalid or missing URL scheme (type=value_error.url.scheme)",
str(err.exception),
)
+
+ def test_parsing_dbt_workflow_ok(self):
+ """
+ Test dbt workflow Config parsing OK
+ """
+
+ config_dict = {
+ "source": {
+ "type": "dbt",
+ "serviceName": "dbt_prod",
+ "sourceConfig": {
+ "config": {
+ "type": "DBT",
+ "dbtConfigSource": {
+ "dbtConfigType": "local",
+ "dbtCatalogFilePath": "/path/to/catalog.json",
+ "dbtManifestFilePath": "/path/to/manifest.json",
+ "dbtRunResultsFilePath": "/path/to/run_results.json",
+ },
+ "dbtUpdateDescriptions": True,
+ "includeTags": True,
+ "dbtClassificationName": "dbtTags",
+ "databaseFilterPattern": {"includes": ["test"]},
+ "schemaFilterPattern": {
+ "includes": ["test1"],
+ "excludes": [".*schema.*"],
+ },
+ "tableFilterPattern": {
+ "includes": ["test3"],
+ "excludes": [".*table_name.*"],
+ },
+ }
+ },
+ },
+ "sink": {"type": "metadata-rest", "config": {}},
+ "workflowConfig": {
+ "loggerLevel": "DEBUG",
+ "openMetadataServerConfig": {
+ "hostPort": "http://localhost:8585/api",
+ "authProvider": "openmetadata",
+ "securityConfig": {"jwtToken": "jwt_token"},
+ },
+ },
+ }
+
+ self.assertIsNotNone(parse_workflow_config_gracefully(config_dict))
+
+ def test_parsing_dbt_workflow_ko(self):
+ """
+ Test dbt workflow Config parsing OK
+ """
+
+ config_dict_type_error_ko = {
+ "source": {
+ "type": "dbt",
+ "serviceName": "dbt_prod",
+ "sourceConfig": {
+ "config": {
+ "type": "DBT",
+ "dbtConfigSource": {
+ "dbtConfigType": "cloud",
+ "dbtCloudAuthToken": "token",
+ "dbtCloudAccountId": "ID",
+ "dbtCloudJobId": "JOB ID",
+ },
+ "dbtUpdateDescriptions": True,
+ "includeTags": True,
+ "dbtClassificationName": "dbtTags",
+ "databaseFilterPattern": {"includes": ["test"]},
+ "schemaFilterPattern": {
+ "includes": ["test1"],
+ "excludes": [".*schema.*"],
+ },
+ "tableFilterPattern": {
+ "includes": ["test3"],
+ "excludes": [".*table_name.*"],
+ },
+ }
+ },
+ },
+ "sink": {"type": "metadata-rest", "config": {}},
+ "workflowConfig": {
+ "loggerLevel": "DEBUG",
+ "openMetadataServerConfig": {
+ "hostPort": "http://localhost:8585/api",
+ "authProvider": "openmetadata",
+ "securityConfig": {"jwtToken": "jwt_token"},
+ },
+ },
+ }
+ with self.assertRaises(ParsingConfigurationError) as err:
+ parse_workflow_config_gracefully(config_dict_type_error_ko)
+ self.assertIn(
+ "We encountered an error parsing the configuration of your DbtCloudConfig.\nYou might need to review your config based on the original cause of this failure:\n\t - Missing parameter 'dbtCloudUrl'",
+ str(err.exception),
+ )
+
+ def test_parsing_dbt_pipeline_ko(self):
+ """
+ Test dbt workflow Config parsing OK
+ """
+
+ config_dict_dbt_pipeline_ko = {
+ "source": {
+ "type": "dbt",
+ "serviceName": "dbt_prod",
+ "sourceConfig": {
+ "config": {
+ "type": "DBT",
+ "dbtConfigSource": {
+ "dbtConfigType": "cloud",
+ "dbtCloudAuthToken": "token",
+ "dbtCloudAccountId": "ID",
+ "dbtCloudJobId": "JOB ID",
+ "dbtCloudUrl": "https://clouddbt.com",
+ },
+ "dbtUpdateDescription": True,
+ "includeTags": True,
+ "dbtClassificationName": "dbtTags",
+ "databaseFilterPattern": {"includes": ["test"]},
+ "schemaFilterPattern": {
+ "includes": ["test1"],
+ "excludes": [".*schema.*"],
+ },
+ "tableFilterPattern": {
+ "includes": ["test3"],
+ "excludes": [".*table_name.*"],
+ },
+ }
+ },
+ },
+ "sink": {"type": "metadata-rest", "config": {}},
+ "workflowConfig": {
+ "loggerLevel": "DEBUG",
+ "openMetadataServerConfig": {
+ "hostPort": "http://localhost:8585/api",
+ "authProvider": "openmetadata",
+ "securityConfig": {"jwtToken": "jwt_token"},
+ },
+ },
+ }
+ with self.assertRaises(ParsingConfigurationError) as err:
+ parse_workflow_config_gracefully(config_dict_dbt_pipeline_ko)
+ self.assertIn(
+ "We encountered an error parsing the configuration of your DbtPipeline.\nYou might need to review your config based on the original cause of this failure:\n\t - Extra parameter 'dbtUpdateDescription'",
+ str(err.exception),
+ )
diff --git a/ingestion/tests/unit/topology/dashboard/test_metabase.py b/ingestion/tests/unit/topology/dashboard/test_metabase.py
index 5e5160d5536c..fb5f3783e643 100644
--- a/ingestion/tests/unit/topology/dashboard/test_metabase.py
+++ b/ingestion/tests/unit/topology/dashboard/test_metabase.py
@@ -47,12 +47,12 @@
from metadata.ingestion.source.dashboard.metabase import metadata as MetabaseMetadata
from metadata.ingestion.source.dashboard.metabase.metadata import MetabaseSource
from metadata.ingestion.source.dashboard.metabase.models import (
+ DashCard,
DatasetQuery,
MetabaseChart,
MetabaseDashboardDetails,
MetabaseTable,
Native,
- OrderedCard,
)
from metadata.utils import fqn
@@ -127,7 +127,7 @@
MOCK_CHARTS = [
- OrderedCard(
+ DashCard(
card=MetabaseChart(
description="Test Chart",
table_id=1,
@@ -138,7 +138,7 @@
display="chart1",
)
),
- OrderedCard(
+ DashCard(
card=MetabaseChart(
description="Test Chart",
table_id=1,
@@ -151,7 +151,7 @@
display="chart2",
)
),
- OrderedCard(card=MetabaseChart(name="chart3", id="3")),
+ DashCard(card=MetabaseChart(name="chart3", id="3")),
]
@@ -170,7 +170,7 @@
)
MOCK_DASHBOARD_DETAILS = MetabaseDashboardDetails(
- description="SAMPLE DESCRIPTION", name="test_db", id="1", ordered_cards=MOCK_CHARTS
+ description="SAMPLE DESCRIPTION", name="test_db", id="1", dashcards=MOCK_CHARTS
)
@@ -302,21 +302,21 @@ def test_yield_lineage(self, *_):
# test out _yield_lineage_from_api
mock_dashboard = deepcopy(MOCK_DASHBOARD_DETAILS)
- mock_dashboard.ordered_cards = [MOCK_DASHBOARD_DETAILS.ordered_cards[0]]
+ mock_dashboard.dashcards = [MOCK_DASHBOARD_DETAILS.dashcards[0]]
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=mock_dashboard, db_service_name="db.service.name"
)
self.assertEqual(next(result).right, EXPECTED_LINEAGE)
# test out _yield_lineage_from_query
- mock_dashboard.ordered_cards = [MOCK_DASHBOARD_DETAILS.ordered_cards[1]]
+ mock_dashboard.dashcards = [MOCK_DASHBOARD_DETAILS.dashcards[1]]
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=mock_dashboard, db_service_name="db.service.name"
)
self.assertEqual(next(result).right, EXPECTED_LINEAGE)
# test out if no query type
- mock_dashboard.ordered_cards = [MOCK_DASHBOARD_DETAILS.ordered_cards[2]]
+ mock_dashboard.dashcards = [MOCK_DASHBOARD_DETAILS.dashcards[2]]
result = self.metabase.yield_dashboard_lineage_details(
dashboard_details=mock_dashboard, db_service_name="db.service.name"
)
diff --git a/ingestion/tests/unit/topology/database/test_datalake.py b/ingestion/tests/unit/topology/database/test_datalake.py
index 3819f8864557..8579f71cf60e 100644
--- a/ingestion/tests/unit/topology/database/test_datalake.py
+++ b/ingestion/tests/unit/topology/database/test_datalake.py
@@ -33,7 +33,10 @@
from metadata.ingestion.source.database.datalake.metadata import DatalakeSource
from metadata.readers.dataframe.avro import AvroDataFrameReader
from metadata.readers.dataframe.json import JSONDataFrameReader
-from metadata.utils.datalake.datalake_utils import GenericDataFrameColumnParser
+from metadata.utils.datalake.datalake_utils import (
+ GenericDataFrameColumnParser,
+ JsonDataFrameColumnParser,
+)
mock_datalake_config = {
"source": {
@@ -231,6 +234,60 @@
EXAMPLE_JSON_COL_4 = deepcopy(EXAMPLE_JSON_COL_3)
+
+EXAMPLE_JSON_TEST_5 = """
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "title": "Person",
+ "type": "object",
+ "properties": {
+ "firstName": {
+ "type": "string",
+ "title": "First Name",
+ "description": "The person's first name."
+ },
+ "lastName": {
+ "title": "Last Name",
+ "type": "string",
+ "description": "The person's last name."
+ },
+ "age": {
+ "type": "integer",
+ "description": "Age in years.",
+ "minimum": 0
+ }
+ },
+ "required": ["firstName", "lastName"]
+}
+"""
+
+EXAMPLE_JSON_COL_5 = [
+ Column(
+ name="Person",
+ dataType="RECORD",
+ children=[
+ Column(
+ name="firstName",
+ dataType="STRING",
+ description="The person's first name.",
+ displayName="First Name",
+ ),
+ Column(
+ name="lastName",
+ dataType="STRING",
+ description="The person's last name.",
+ displayName="Last Name",
+ ),
+ Column(
+ name="age",
+ dataType="INT",
+ description="Age in years.",
+ ),
+ ],
+ )
+]
+
+
EXAMPLE_JSON_COL_4[3].children[3].children = [
Column(
name="lat",
@@ -446,10 +503,10 @@ def test_json_file_parse(self):
actual_df_1 = JSONDataFrameReader.read_from_json(
key="file.json", json_text=EXAMPLE_JSON_TEST_1, decode=True
- )[0]
+ )[0][0]
actual_df_2 = JSONDataFrameReader.read_from_json(
key="file.json", json_text=EXAMPLE_JSON_TEST_2, decode=True
- )[0]
+ )[0][0]
assert actual_df_1.compare(exp_df_list).empty
assert actual_df_2.compare(exp_df_obj).empty
@@ -458,7 +515,7 @@ def test_json_file_parse(self):
actual_df_3 = JSONDataFrameReader.read_from_json(
key="file.json", json_text=EXAMPLE_JSON_TEST_3, decode=True
- )[0]
+ )[0][0]
actual_cols_3 = GenericDataFrameColumnParser._get_columns(
actual_df_3
) # pylint: disable=protected-access
@@ -466,12 +523,19 @@ def test_json_file_parse(self):
actual_df_4 = JSONDataFrameReader.read_from_json(
key="file.json", json_text=EXAMPLE_JSON_TEST_4, decode=True
- )[0]
+ )[0][0]
actual_cols_4 = GenericDataFrameColumnParser._get_columns(
actual_df_4
) # pylint: disable=protected-access
assert actual_cols_4 == EXAMPLE_JSON_COL_4
+ actual_df_5, raw_data = JSONDataFrameReader.read_from_json(
+ key="file.json", json_text=EXAMPLE_JSON_TEST_5, decode=True
+ )
+ json_parser = JsonDataFrameColumnParser(actual_df_5[0], raw_data=raw_data)
+ actual_cols_5 = json_parser.get_columns()
+ assert actual_cols_5 == EXAMPLE_JSON_COL_5
+
def test_avro_file_parse(self):
columns = AvroDataFrameReader.read_from_avro(AVRO_SCHEMA_FILE)
Column.__eq__ = custom_column_compare
diff --git a/ingestion/tests/unit/topology/storage/test_storage.py b/ingestion/tests/unit/topology/storage/test_storage.py
index 3920821dd32b..0cb7a311b6bf 100644
--- a/ingestion/tests/unit/topology/storage/test_storage.py
+++ b/ingestion/tests/unit/topology/storage/test_storage.py
@@ -298,15 +298,18 @@ def test_generate_structured_container(self):
def test_extract_column_definitions(self):
with patch(
"metadata.ingestion.source.storage.storage_service.fetch_dataframe",
- return_value=[
- pd.DataFrame.from_dict(
- [
- {"transaction_id": 1, "transaction_value": 100},
- {"transaction_id": 2, "transaction_value": 200},
- {"transaction_id": 3, "transaction_value": 300},
- ]
- )
- ],
+ return_value=(
+ [
+ pd.DataFrame.from_dict(
+ [
+ {"transaction_id": 1, "transaction_value": 100},
+ {"transaction_id": 2, "transaction_value": 200},
+ {"transaction_id": 3, "transaction_value": 300},
+ ]
+ )
+ ],
+ None,
+ ),
):
Column.__eq__ = custom_column_compare
self.assertListEqual(
diff --git a/openmetadata-airflow-apis/pyproject.toml b/openmetadata-airflow-apis/pyproject.toml
index 38871258438a..8118fceb2e5e 100644
--- a/openmetadata-airflow-apis/pyproject.toml
+++ b/openmetadata-airflow-apis/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
# since it helps us organize and isolate version management
[project]
name = "openmetadata_managed_apis"
-version = "1.4.0.0.dev0"
+version = "1.3.4.0"
readme = "README.md"
authors = [
{name = "OpenMetadata Committers"}
diff --git a/openmetadata-clients/openmetadata-java-client/pom.xml b/openmetadata-clients/openmetadata-java-client/pom.xml
index c8f2739eeaab..a37e6a2732b2 100644
--- a/openmetadata-clients/openmetadata-java-client/pom.xml
+++ b/openmetadata-clients/openmetadata-java-client/pom.xml
@@ -5,7 +5,7 @@
openmetadata-clients
org.open-metadata
- 1.4.0-SNAPSHOT
+ 1.3.4
4.0.0
diff --git a/openmetadata-clients/pom.xml b/openmetadata-clients/pom.xml
index a00e9e611994..22b3245db209 100644
--- a/openmetadata-clients/pom.xml
+++ b/openmetadata-clients/pom.xml
@@ -5,7 +5,7 @@
platform
org.open-metadata
- 1.4.0-SNAPSHOT
+ 1.3.4
4.0.0
diff --git a/openmetadata-dist/pom.xml b/openmetadata-dist/pom.xml
index e027cbe69863..a760b5a19f07 100644
--- a/openmetadata-dist/pom.xml
+++ b/openmetadata-dist/pom.xml
@@ -20,7 +20,7 @@
platform
org.open-metadata
- 1.4.0-SNAPSHOT
+ 1.3.4
openmetadata-dist
diff --git a/openmetadata-docs/content/partials/v1.3/connectors/metadata/connectors-list.md b/openmetadata-docs/content/partials/v1.3/connectors/metadata/connectors-list.md
new file mode 100644
index 000000000000..c70d6f179fd2
--- /dev/null
+++ b/openmetadata-docs/content/partials/v1.3/connectors/metadata/connectors-list.md
@@ -0,0 +1,7 @@
+{% connectorsListContainer %}
+
+{% connectorInfoCard name="Amundsen" stage="PROD" href="/connectors/metadata/amundsen" platform="OpenMetadata" / %}
+{% connectorInfoCard name="Atlas" stage="PROD" href="/connectors/metadata/atlas" platform="OpenMetadata" / %}
+{% connectorInfoCard name="Alation" stage="PROD" href="/connectors/metadata/alation" platform="Collate" / %}
+
+{% /connectorsListContainer %}
\ No newline at end of file
diff --git a/openmetadata-docs/content/partials/v1.4/connectors/metadata/connectors-list.md b/openmetadata-docs/content/partials/v1.4/connectors/metadata/connectors-list.md
new file mode 100644
index 000000000000..c70d6f179fd2
--- /dev/null
+++ b/openmetadata-docs/content/partials/v1.4/connectors/metadata/connectors-list.md
@@ -0,0 +1,7 @@
+{% connectorsListContainer %}
+
+{% connectorInfoCard name="Amundsen" stage="PROD" href="/connectors/metadata/amundsen" platform="OpenMetadata" / %}
+{% connectorInfoCard name="Atlas" stage="PROD" href="/connectors/metadata/atlas" platform="OpenMetadata" / %}
+{% connectorInfoCard name="Alation" stage="PROD" href="/connectors/metadata/alation" platform="Collate" / %}
+
+{% /connectorsListContainer %}
\ No newline at end of file
diff --git a/openmetadata-docs/content/v1.1.x/connectors/database/oracle/index.md b/openmetadata-docs/content/v1.1.x/connectors/database/oracle/index.md
index 2b9b62d00a22..98d72291d852 100644
--- a/openmetadata-docs/content/v1.1.x/connectors/database/oracle/index.md
+++ b/openmetadata-docs/content/v1.1.x/connectors/database/oracle/index.md
@@ -57,6 +57,9 @@ GRANT new_role TO user_name;
-- GRANT CREATE SESSION PRIVILEGE TO USER
GRANT CREATE SESSION TO new_role;
+
+-- GRANT SELECT CATALOG ROLE PRIVILEGE TO FETCH METADATA TO ROLE / USER
+GRANT SELECT_CATALOG_ROLE TO new_role;
```
With just these permissions, your user should be able to ingest the schemas, but not the tables inside them. To get
diff --git a/openmetadata-docs/content/v1.1.x/connectors/database/oracle/yaml.md b/openmetadata-docs/content/v1.1.x/connectors/database/oracle/yaml.md
index 1be404f0d675..59390c13835b 100644
--- a/openmetadata-docs/content/v1.1.x/connectors/database/oracle/yaml.md
+++ b/openmetadata-docs/content/v1.1.x/connectors/database/oracle/yaml.md
@@ -61,8 +61,11 @@ CREATE ROLE new_role;
-- GRANT ROLE TO USER
GRANT new_role TO user_name;
--- GRANT CREATE SESSION PRIVILEGE TO USER
+-- GRANT CREATE SESSION PRIVILEGE TO ROLE / USER
GRANT CREATE SESSION TO new_role;
+
+-- GRANT SELECT CATALOG ROLE PRIVILEGE TO FETCH METADATA TO ROLE / USER
+GRANT SELECT_CATALOG_ROLE TO new_role;
```
With just these permissions, your user should be able to ingest the schemas, but not the tables inside them. To get
diff --git a/openmetadata-docs/content/v1.2.x/connectors/database/bigquery/yaml.md b/openmetadata-docs/content/v1.2.x/connectors/database/bigquery/yaml.md
index 308e25001d25..c8cc103b652a 100644
--- a/openmetadata-docs/content/v1.2.x/connectors/database/bigquery/yaml.md
+++ b/openmetadata-docs/content/v1.2.x/connectors/database/bigquery/yaml.md
@@ -127,7 +127,7 @@ You can checkout [this](https://cloud.google.com/iam/docs/keys-create-delete#iam
**1.** Passing the raw credential values provided by BigQuery. This requires us to provide the following information, all provided by BigQuery:
- - **type**: Credentials Type is the type of the account, for a service account the value of this field is `service_account`. To fetch this key, look for the value associated with the `type` key in the service account key file.
+ - **type**: Supported values are service_account and external_account.For service accounts, the value of this field is service_account.For external identities, the value of this field is external_account.
- **projectId**: A project ID is a unique string used to differentiate your project from all others in Google Cloud. To fetch this key, look for the value associated with the `project_id` key in the service account key file. You can also pass multiple project id to ingest metadata from different BigQuery projects into one service.
- **privateKeyId**: This is a unique identifier for the private key associated with the service account. To fetch this key, look for the value associated with the `private_key_id` key in the service account file.
- **privateKey**: This is the private key associated with the service account that is used to authenticate and authorize access to BigQuery. To fetch this key, look for the value associated with the `private_key` key in the service account file.
@@ -136,7 +136,11 @@ You can checkout [this](https://cloud.google.com/iam/docs/keys-create-delete#iam
- **authUri**: This is the URI for the authorization server. To fetch this key, look for the value associated with the `auth_uri` key in the service account key file. The default value to Auth URI is https://accounts.google.com/o/oauth2/auth.
- **tokenUri**: The Google Cloud Token URI is a specific endpoint used to obtain an OAuth 2.0 access token from the Google Cloud IAM service. This token allows you to authenticate and access various Google Cloud resources and APIs that require authorization. To fetch this key, look for the value associated with the `token_uri` key in the service account credentials file. Default Value to Token URI is https://oauth2.googleapis.com/token.
- **authProviderX509CertUrl**: This is the URL of the certificate that verifies the authenticity of the authorization server. To fetch this key, look for the value associated with the `auth_provider_x509_cert_url` key in the service account key file. The Default value for Auth Provider X509Cert URL is https://www.googleapis.com/oauth2/v1/certs
- - **clientX509CertUrl**: This is the URL of the certificate that verifies the authenticity of the service account. To fetch this key, look for the value associated with the `client_x509_cert_url` key in the service account key file.
+ - **clientX509CertUrl**: This is the URL of the certificate that verifies the authenticity of the service account. To fetch this key, look for the value associated with the `client_x509_cert_url` key in the service account key file.
+ - **audience**: This is the Google Security Token Service audience which contains the resource name for the workload identity pool and the provider identifier in that pool.
+ - **subjectTokenType**: This is Google Security Token Service subject token type based on the OAuth 2.0 token exchange spec.Required when using type external_account.
+ - **tokenURL**: This is Google Security Token Service token exchange endpoint.Required when using type external_account.
+ - **credentialSource**: This object defines the mechanism used to retrieve the external credential from the local environment so that it can be exchanged for a GCP access token via the STS endpoint.
**2.** Passing a local file path that contains the credentials:
- **gcpCredentialsPath**
diff --git a/openmetadata-docs/content/v1.2.x/connectors/database/oracle/index.md b/openmetadata-docs/content/v1.2.x/connectors/database/oracle/index.md
index 3b91358c84ca..e62a037bdaf4 100644
--- a/openmetadata-docs/content/v1.2.x/connectors/database/oracle/index.md
+++ b/openmetadata-docs/content/v1.2.x/connectors/database/oracle/index.md
@@ -59,6 +59,9 @@ GRANT new_role TO user_name;
-- GRANT CREATE SESSION PRIVILEGE TO USER
GRANT CREATE SESSION TO new_role;
+
+-- GRANT SELECT CATALOG ROLE PRIVILEGE TO FETCH METADATA TO ROLE / USER
+GRANT SELECT_CATALOG_ROLE TO new_role;
```
With just these permissions, your user should be able to ingest the schemas, but not the tables inside them. To get
diff --git a/openmetadata-docs/content/v1.2.x/connectors/database/oracle/yaml.md b/openmetadata-docs/content/v1.2.x/connectors/database/oracle/yaml.md
index 87eef17bca5b..2ff448db4202 100644
--- a/openmetadata-docs/content/v1.2.x/connectors/database/oracle/yaml.md
+++ b/openmetadata-docs/content/v1.2.x/connectors/database/oracle/yaml.md
@@ -65,6 +65,9 @@ GRANT new_role TO user_name;
-- GRANT CREATE SESSION PRIVILEGE TO USER
GRANT CREATE SESSION TO new_role;
+
+-- GRANT SELECT CATALOG ROLE PRIVILEGE TO FETCH METADATA TO ROLE / USER
+GRANT SELECT_CATALOG_ROLE TO new_role;
```
With just these permissions, your user should be able to ingest the schemas, but not the tables inside them. To get
diff --git a/openmetadata-docs/content/v1.2.x/connectors/ingestion/workflows/profiler/external_workflow.md b/openmetadata-docs/content/v1.2.x/connectors/ingestion/workflows/profiler/external_workflow.md
index f994343f820a..2892be3b76e7 100644
--- a/openmetadata-docs/content/v1.2.x/connectors/ingestion/workflows/profiler/external_workflow.md
+++ b/openmetadata-docs/content/v1.2.x/connectors/ingestion/workflows/profiler/external_workflow.md
@@ -105,6 +105,7 @@ processor:
# bucketName: awsdatalake-testing
# prefix: data/sales/demo1
# overwriteData: false
+ # filePathPattern: "{service_name}/{database_name}_{database_schema_name}_{table_name}.parquet"
# storageConfig:
# awsRegion: us-east-2
# awsAccessKeyId:
@@ -123,6 +124,7 @@ processor:
# bucketName: awsdatalake-testing
# prefix: data/sales/demo1
# overwriteData: false
+ # filePathPattern: "{service_name}/{database_name}_{database_schema_name}_{table_name}.parquet"
# storageConfig:
# awsRegion: us-east-2
# awsAccessKeyId:
diff --git a/openmetadata-docs/content/v1.2.x/connectors/ingestion/workflows/profiler/sample_data.md b/openmetadata-docs/content/v1.2.x/connectors/ingestion/workflows/profiler/sample_data.md
index b16dadc31a78..87c45d220e01 100644
--- a/openmetadata-docs/content/v1.2.x/connectors/ingestion/workflows/profiler/sample_data.md
+++ b/openmetadata-docs/content/v1.2.x/connectors/ingestion/workflows/profiler/sample_data.md
@@ -93,6 +93,7 @@ The OpenMetadata UI will always show 50 or fewer rows of sample data. *Sample Da
- **Bucket Name**: A bucket name is a unique identifier used to organize and store data objects. It's similar to a folder name, but it's used for object storage rather than file storage.
- **Prefix**: The prefix of a data source refers to the first part of the data path that identifies the source or origin of the data. The generated sample data parquet file will be uploaded to this prefix path in your bucket.
- **Overwrite Sample Data**: If this flag is enabled, only one parquet file will be generated per table to store the sample data. Otherwise, a parquet file will be generated for each day when the profiler workflow runs.
+- **File Path Pattern**: You can customize how the file will be stored into your storage bucket, by default the file gets stored at the following path `{service_name}/{database_name}/{database_schema_name}/{table_name}/sample_data.parquet`. For instance you do want all the files to be generated in a single folder then you can provide the path like `{service_name}_{database_name}_{database_schema_name}_{table_name}.parquet` not that the pattern must contain the following elements `{service_name}`, `{database_name}`, `{database_schema_name}` `{table_name}` and the pattern must end with the extension `.parquet` and using these elements you can create your own custom pattern.
#### Connection Details for AWS S3
diff --git a/openmetadata-docs/content/v1.2.x/deployment/security/enable-password-masking.md b/openmetadata-docs/content/v1.2.x/deployment/security/enable-password-masking.md
deleted file mode 100644
index 541ed54094c5..000000000000
--- a/openmetadata-docs/content/v1.2.x/deployment/security/enable-password-masking.md
+++ /dev/null
@@ -1,67 +0,0 @@
----
-title: Enable password masking
-slug: /deployment/security/enable-password-masking
----
-
-# Enable password masking
-
-The **1.0.0** version of OpenMetadata now includes a new feature that allows users to activate password masking.
-This feature was added in response to feedback from our community of users who expressed concerns about the security of
-their passwords when using our application.
-
-With the password masking feature enabled, all API calls made by your application will replace the password fields with
-asterisks (*) before sending the request. This will prevent the password from being sent in plain text. Even though
-passwords are replaced by asterisks, it will not affect when editing a connection, saving will update the passwords only
-if they are changed.
-
-{% image
-caption="Editing a service connection with masked password."
-src="/images/v1.2/deployment/mask-password/edit-connection.png"
-alt="mask-password" /%}
-
-However, note that the `ingestion-bot` user will still send the password in plain text as it needs to access the API
-without any obstructions. This is because the `ingestion-bot` user requires full access to the API, and any masking
-would hinder its ability to perform its tasks.
-
-{% note %}
-
-In future releases, the password masking feature will be activated by default.
-
-The feature will be automatically enabled to provide an added layer of security for all API calls made.
-
-{% /note %}
-
-## How to enable the feature
-
-To activate the password masking feature in your application, follow the steps below:
-
-### Docker
-
-Add the following environment variable to the list:
-
-```yaml
-# openmetadata.prod.env
-MASK_PASSWORDS_API=true
-```
-
-### Bare Metal
-
-Edit the `openmetadata.yaml` file as it is shown below:
-
-```yaml
-security:
- maskPasswordsAPI: true
-```
-
-### Kubernetes
-
-Update your helm `maskPasswordsApi` value:
-
-```yaml
-# openmetadata.prod.values.yml
-openmetadata:
- config:
- ...
- maskPasswordsApi: true
- ...
-```
\ No newline at end of file
diff --git a/openmetadata-docs/content/v1.2.x/menu.md b/openmetadata-docs/content/v1.2.x/menu.md
index 5ae3974d82e7..2bf6eb93b172 100644
--- a/openmetadata-docs/content/v1.2.x/menu.md
+++ b/openmetadata-docs/content/v1.2.x/menu.md
@@ -154,8 +154,6 @@ site_menu:
url: /deployment/security/enable-jwt-tokens
- category: Deployment / Enable Security / JWT Troubleshooting
url: /deployment/security/jwt-troubleshooting
- - category: Deployment / Enable Security / Enable Password Masking
- url: /deployment/security/enable-password-masking
- category: Deployment / Enable Secrets Manager
url: /deployment/secrets-manager
diff --git a/openmetadata-docs/content/v1.3.x/connectors/database/oracle/index.md b/openmetadata-docs/content/v1.3.x/connectors/database/oracle/index.md
index ad43e7cefeab..73e04c031e88 100644
--- a/openmetadata-docs/content/v1.3.x/connectors/database/oracle/index.md
+++ b/openmetadata-docs/content/v1.3.x/connectors/database/oracle/index.md
@@ -42,6 +42,9 @@ GRANT new_role TO user_name;
-- GRANT CREATE SESSION PRIVILEGE TO USER
GRANT CREATE SESSION TO new_role;
+
+-- GRANT SELECT CATALOG ROLE PRIVILEGE TO FETCH METADATA TO ROLE / USER
+GRANT SELECT_CATALOG_ROLE TO new_role;
```
With just these permissions, your user should be able to ingest the schemas, but not the tables inside them. To get
diff --git a/openmetadata-docs/content/v1.3.x/connectors/database/oracle/yaml.md b/openmetadata-docs/content/v1.3.x/connectors/database/oracle/yaml.md
index 31f909a143a1..86799f4ba57a 100644
--- a/openmetadata-docs/content/v1.3.x/connectors/database/oracle/yaml.md
+++ b/openmetadata-docs/content/v1.3.x/connectors/database/oracle/yaml.md
@@ -42,6 +42,9 @@ GRANT new_role TO user_name;
-- GRANT CREATE SESSION PRIVILEGE TO USER
GRANT CREATE SESSION TO new_role;
+
+-- GRANT SELECT CATALOG ROLE PRIVILEGE TO FETCH METADATA TO ROLE / USER
+GRANT SELECT_CATALOG_ROLE TO new_role;
```
With just these permissions, your user should be able to ingest the schemas, but not the tables inside them. To get
diff --git a/openmetadata-docs/content/v1.3.x/deployment/ingestion/external/gcs-composer.md b/openmetadata-docs/content/v1.3.x/deployment/ingestion/external/gcs-composer.md
index 3bead257c0a8..02e028ba279e 100644
--- a/openmetadata-docs/content/v1.3.x/deployment/ingestion/external/gcs-composer.md
+++ b/openmetadata-docs/content/v1.3.x/deployment/ingestion/external/gcs-composer.md
@@ -13,7 +13,7 @@ This approach has been last tested against:
- Composer version 2.5.4
- Airflow version 2.6.3
-It also requires the ingestion package to be at least `openmetadata-ingestion==1.3.0.0`.
+It also requires the ingestion package to be at least `openmetadata-ingestion==1.3.0.1`.
## Using the Python Operator
diff --git a/openmetadata-docs/content/v1.3.x/deployment/security/enable-password-masking.md b/openmetadata-docs/content/v1.3.x/deployment/security/enable-password-masking.md
deleted file mode 100644
index 43c97bc8e194..000000000000
--- a/openmetadata-docs/content/v1.3.x/deployment/security/enable-password-masking.md
+++ /dev/null
@@ -1,67 +0,0 @@
----
-title: Enable password masking
-slug: /deployment/security/enable-password-masking
----
-
-# Enable password masking
-
-The **1.0.0** version of OpenMetadata now includes a new feature that allows users to activate password masking.
-This feature was added in response to feedback from our community of users who expressed concerns about the security of
-their passwords when using our application.
-
-With the password masking feature enabled, all API calls made by your application will replace the password fields with
-asterisks (*) before sending the request. This will prevent the password from being sent in plain text. Even though
-passwords are replaced by asterisks, it will not affect when editing a connection, saving will update the passwords only
-if they are changed.
-
-{% image
-caption="Editing a service connection with masked password."
-src="/images/v1.3/deployment/mask-password/edit-connection.png"
-alt="mask-password" /%}
-
-However, note that the `ingestion-bot` user will still send the password in plain text as it needs to access the API
-without any obstructions. This is because the `ingestion-bot` user requires full access to the API, and any masking
-would hinder its ability to perform its tasks.
-
-{% note %}
-
-In future releases, the password masking feature will be activated by default.
-
-The feature will be automatically enabled to provide an added layer of security for all API calls made.
-
-{% /note %}
-
-## How to enable the feature
-
-To activate the password masking feature in your application, follow the steps below:
-
-### Docker
-
-Add the following environment variable to the list:
-
-```yaml
-# openmetadata.prod.env
-MASK_PASSWORDS_API=true
-```
-
-### Bare Metal
-
-Edit the `openmetadata.yaml` file as it is shown below:
-
-```yaml
-security:
- maskPasswordsAPI: true
-```
-
-### Kubernetes
-
-Update your helm `maskPasswordsApi` value:
-
-```yaml
-# openmetadata.prod.values.yml
-openmetadata:
- config:
- ...
- maskPasswordsApi: true
- ...
-```
\ No newline at end of file
diff --git a/openmetadata-docs/content/v1.3.x/menu.md b/openmetadata-docs/content/v1.3.x/menu.md
index 0166257d5fa4..0275a0e14505 100644
--- a/openmetadata-docs/content/v1.3.x/menu.md
+++ b/openmetadata-docs/content/v1.3.x/menu.md
@@ -156,8 +156,6 @@ site_menu:
url: /deployment/security/enable-jwt-tokens
- category: Deployment / Enable Security / JWT Troubleshooting
url: /deployment/security/jwt-troubleshooting
- - category: Deployment / Enable Security / Enable Password Masking
- url: /deployment/security/enable-password-masking
- category: Deployment / Enable Secrets Manager
url: /deployment/secrets-manager
diff --git a/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/database/oracle/index.md b/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/database/oracle/index.md
new file mode 100644
index 000000000000..1241234b75e7
--- /dev/null
+++ b/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/database/oracle/index.md
@@ -0,0 +1,103 @@
+---
+title: Oracle
+slug: /connectors/database/oracle
+---
+
+{% connectorDetailsHeader
+name="Oracle"
+stage="PROD"
+platform="OpenMetadata"
+availableFeatures=["Metadata", "Query Usage", "Data Profiler", "Data Quality", "dbt", "Lineage", "Column-level Lineage", "Stored Procedures"]
+unavailableFeatures=["Owners", "Tags"]
+/ %}
+
+In this section, we provide guides and references to use the Oracle connector.
+
+Configure and schedule Oracle metadata and profiler workflows from the OpenMetadata UI:
+
+- [Requirements](#requirements)
+- [Metadata Ingestion](#metadata-ingestion)
+- [Data Profiler](/connectors/ingestion/workflows/profiler)
+- [Data Quality](/connectors/ingestion/workflows/data-quality)
+- [Lineage](/connectors/ingestion/lineage)
+- [dbt Integration](/connectors/ingestion/workflows/dbt)
+
+{% partial file="/v1.4/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/oracle/yaml"} /%}
+
+## Requirements
+
+**Note**: To retrieve metadata from an Oracle database, we use the `python-oracledb` library, which provides support for versions 12c, 18c, 19c, and 21c.
+
+To ingest metadata from oracle user must have `CREATE SESSION` privilege for the user.
+
+```sql
+-- CREATE USER
+CREATE USER user_name IDENTIFIED BY admin_password;
+
+-- CREATE ROLE
+CREATE ROLE new_role;
+
+-- GRANT ROLE TO USER
+GRANT new_role TO user_name;
+
+-- GRANT CREATE SESSION PRIVILEGE TO USER
+GRANT CREATE SESSION TO new_role;
+
+-- GRANT SELECT CATALOG ROLE PRIVILEGE TO FETCH METADATA TO ROLE / USER
+GRANT SELECT_CATALOG_ROLE TO new_role;
+```
+
+With just these permissions, your user should be able to ingest the schemas, but not the tables inside them. To get
+the tables, you should grant `SELECT` permissions to the tables you are interested in. E.g.,
+
+```sql
+SELECT ON ADMIN.EXAMPLE_TABLE TO new_role;
+```
+
+You can find further information [here](https://docs.oracle.com/javadb/10.8.3.0/ref/rrefsqljgrant.html). Note that
+there is no routine out of the box in Oracle to grant SELECT to a full schema.
+
+## Metadata Ingestion
+
+{% partial
+ file="/v1.4/connectors/metadata-ingestion-ui.md"
+ variables={
+ connector: "Oracle",
+ selectServicePath: "/images/v1.4/connectors/oracle/select-service.png",
+ addNewServicePath: "/images/v1.4/connectors/oracle/add-new-service.png",
+ serviceConnectionPath: "/images/v1.4/connectors/oracle/service-connection.png",
+}
+/%}
+
+{% stepsContainer %}
+{% extraContent parentTagName="stepsContainer" %}
+
+#### Connection Details
+
+- **Username**: Specify the User to connect to Oracle. It should have enough privileges to read all the metadata.
+- **Password**: Password to connect to Oracle.
+- **Host and Port**: Enter the fully qualified hostname and port number for your Oracle deployment in the Host and Port field.
+- **Database Name**: Optional name to give to the database in OpenMetadata. If left blank, we will use default as the database name. It is recommended to use the database name same as the SID, This ensures accurate results and proper identification of tables during profiling, data quality checks and dbt workflow.
+- **Oracle Connection Type** : Select the Oracle Connection Type. The type can either be `Oracle Service Name` or `Database Schema`
+ - **Oracle Service Name**: The Oracle Service name is the TNS alias that you give when you remotely connect to your database and this Service name is recorded in tnsnames.
+ - **Database Schema**: The name of the database schema available in Oracle that you want to connect with.
+- **Oracle instant client directory**: The directory pointing to where the `instantclient` binaries for Oracle are located. In the ingestion Docker image we
+ provide them by default at `/instantclient`. If this parameter is informed (it is by default), we will run the [thick oracle client](https://python-oracledb.readthedocs.io/en/latest/user_guide/initialization.html#initializing-python-oracledb).
+ We are shipping the binaries for ARM and AMD architectures from [here](https://www.oracle.com/database/technologies/instant-client/linux-x86-64-downloads.html)
+ and [here](https://www.oracle.com/database/technologies/instant-client/linux-arm-aarch64-downloads.html) for the instant client version 19.
+
+{% partial file="/v1.4/connectors/database/advanced-configuration.md" /%}
+
+{% /extraContent %}
+
+{% partial file="/v1.4/connectors/test-connection.md" /%}
+
+{% partial file="/v1.4/connectors/database/configure-ingestion.md" /%}
+
+{% partial file="/v1.4/connectors/ingestion-schedule-and-deploy.md" /%}
+
+{% /stepsContainer %}
+
+{% partial file="/v1.4/connectors/troubleshooting.md" /%}
+
+{% partial file="/v1.4/connectors/database/related.md" /%}
diff --git a/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/database/oracle/yaml.md b/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/database/oracle/yaml.md
new file mode 100644
index 000000000000..907347b2880f
--- /dev/null
+++ b/openmetadata-docs/content/v1.4.x-SNAPSHOT/connectors/database/oracle/yaml.md
@@ -0,0 +1,219 @@
+---
+title: Run the Oracle Connector Externally
+slug: /connectors/database/oracle/yaml
+---
+
+{% connectorDetailsHeader
+name="Oracle"
+stage="PROD"
+platform="OpenMetadata"
+availableFeatures=["Metadata", "Query Usage", "Data Profiler", "Data Quality", "dbt", "Lineage", "Column-level Lineage", "Stored Procedures"]
+unavailableFeatures=["Owners", "Tags"]
+/ %}
+
+In this section, we provide guides and references to use the Oracle connector.
+
+Configure and schedule Oracle metadata and profiler workflows from the OpenMetadata UI:
+
+- [Requirements](#requirements)
+- [Metadata Ingestion](#metadata-ingestion)
+- [Data Profiler](#data-profiler)
+- [Data Quality](#data-quality)
+- [Lineage](#lineage)
+- [dbt Integration](#dbt-integration)
+
+{% partial file="/v1.4/connectors/external-ingestion-deployment.md" /%}
+
+## Requirements
+
+**Note**: To retrieve metadata from an Oracle database, the python-oracledb library can be utilized, which provides support for versions 12c, 18c, 19c, and 21c.
+
+To ingest metadata from oracle user must have `CREATE SESSION` privilege for the user.
+
+```sql
+-- CREATE USER
+CREATE USER user_name IDENTIFIED BY admin_password;
+
+-- CREATE ROLE
+CREATE ROLE new_role;
+
+-- GRANT ROLE TO USER
+GRANT new_role TO user_name;
+
+-- GRANT CREATE SESSION PRIVILEGE TO USER
+GRANT CREATE SESSION TO new_role;
+
+-- GRANT SELECT CATALOG ROLE PRIVILEGE TO FETCH METADATA TO ROLE / USER
+GRANT SELECT_CATALOG_ROLE TO new_role;
+```
+
+With just these permissions, your user should be able to ingest the schemas, but not the tables inside them. To get
+the tables, you should grant `SELECT` permissions to the tables you are interested in. E.g.,
+
+```sql
+SELECT ON ADMIN.EXAMPLE_TABLE TO new_role;
+```
+
+You can find further information [here](https://docs.oracle.com/javadb/10.8.3.0/ref/rrefsqljgrant.html). Note that
+there is no routine out of the box in Oracle to grant SELECT to a full schema.
+
+### Python Requirements
+
+To run the Oracle ingestion, you will need to install:
+
+```bash
+pip3 install "openmetadata-ingestion[oracle]"
+```
+
+## Metadata Ingestion
+
+All connectors are defined as JSON Schemas.
+[Here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/oracleConnection.json)
+you can find the structure to create a connection to Oracle.
+
+In order to create and run a Metadata Ingestion workflow, we will follow
+the steps to create a YAML configuration able to connect to the source,
+process the Entities if needed, and reach the OpenMetadata server.
+
+The workflow is modeled around the following
+[JSON Schema](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/workflow.json)
+
+### 1. Define the YAML Config
+
+This is a sample config for Oracle:
+
+{% codePreview %}
+
+{% codeInfoContainer %}
+
+#### Source Configuration - Service Connection
+
+{% codeInfo srNumber=1 %}
+
+**username**: Specify the User to connect to Oracle. It should have enough privileges to read all the metadata.
+
+{% /codeInfo %}
+
+{% codeInfo srNumber=2 %}
+
+**password**: Password to connect to Oracle.
+
+{% /codeInfo %}
+
+{% codeInfo srNumber=3 %}
+
+**hostPort**: Enter the fully qualified hostname and port number for your Oracle deployment in the Host and Port field.
+
+{% /codeInfo %}
+
+{% codeInfo srNumber=4 %}
+
+**oracleConnectionType** :
+- **oracleServiceName**: The Oracle Service name is the TNS alias that you give when you remotely connect to your database and this Service name is recorded in tnsnames.
+- **databaseSchema**: The name of the database schema available in Oracle that you want to connect with.
+- **Oracle instant client directory**: The directory pointing to where the `instantclient` binaries for Oracle are located. In the ingestion Docker image we
+ provide them by default at `/instantclient`. If this parameter is informed (it is by default), we will run the [thick oracle client](https://python-oracledb.readthedocs.io/en/latest/user_guide/initialization.html#initializing-python-oracledb).
+ We are shipping the binaries for ARM and AMD architectures from [here](https://www.oracle.com/database/technologies/instant-client/linux-x86-64-downloads.html)
+ and [here](https://www.oracle.com/database/technologies/instant-client/linux-arm-aarch64-downloads.html) for the instant client version 19.
+
+{% /codeInfo %}
+
+{% codeInfo srNumber=23 %}
+
+**databaseName**: Optional name to give to the database in OpenMetadata. If left blank, we will use default as the database name. It is recommended to use the database name same as the SID, This ensures accurate results and proper identification of tables during profiling, data quality checks and dbt workflow.
+
+{% /codeInfo %}
+
+{% partial file="/v1.4/connectors/yaml/database/source-config-def.md" /%}
+
+{% partial file="/v1.4/connectors/yaml/ingestion-sink-def.md" /%}
+
+{% partial file="/v1.4/connectors/yaml/workflow-config-def.md" /%}
+
+#### Advanced Configuration
+
+{% codeInfo srNumber=5 %}
+
+**Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Athena during the connection. These details must be added as Key-Value pairs.
+
+{% /codeInfo %}
+
+{% codeInfo srNumber=6 %}
+
+**Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to Athena during the connection. These details must be added as Key-Value pairs.
+
+- In case you are using Single-Sign-On (SSO) for authentication, add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "sso_login_url"`
+
+{% /codeInfo %}
+
+{% /codeInfoContainer %}
+
+{% codeBlock fileName="filename.yaml" %}
+
+```yaml
+source:
+ type: oracle
+ serviceName: local_oracle
+ serviceConnection:
+ config:
+ type: Oracle
+```
+```yaml {% srNumber=1 %}
+ hostPort: hostPort
+```
+```yaml {% srNumber=2 %}
+ username: username
+```
+```yaml {% srNumber=3 %}
+ password: password
+```
+```yaml {% srNumber=4 %}
+ # The type can either be oracleServiceName or databaseSchema
+ oracleConnectionType:
+ oracleServiceName: serviceName
+ # databaseSchema: schema
+```
+```yaml {% srNumber=23 %}
+ databaseName: custom_db_display_name
+```
+```yaml {% srNumber=5 %}
+ # connectionOptions:
+ # key: value
+```
+```yaml {% srNumber=6 %}
+ # connectionArguments:
+ # key: value
+```
+
+{% partial file="/v1.4/connectors/yaml/database/source-config.md" /%}
+
+{% partial file="/v1.4/connectors/yaml/ingestion-sink.md" /%}
+
+{% partial file="/v1.4/connectors/yaml/workflow-config.md" /%}
+
+{% /codeBlock %}
+
+{% /codePreview %}
+
+{% partial file="/v1.4/connectors/yaml/ingestion-cli.md" /%}
+
+{% partial file="/v1.4/connectors/yaml/data-profiler.md" variables={connector: "oracle"} /%}
+
+{% partial file="/v1.4/connectors/yaml/data-quality.md" /%}
+
+## Lineage
+
+You can learn more about how to ingest lineage [here](/connectors/ingestion/workflows/lineage).
+
+## dbt Integration
+
+{% tilesContainer %}
+
+{% tile
+ icon="mediation"
+ title="dbt Integration"
+ description="Learn more about how to ingest dbt models' definitions and their lineage."
+ link="/connectors/ingestion/workflows/dbt" /%}
+
+{% /tilesContainer %}
+
diff --git a/openmetadata-service/pom.xml b/openmetadata-service/pom.xml
index 45fc311e15cd..0b18742e502f 100644
--- a/openmetadata-service/pom.xml
+++ b/openmetadata-service/pom.xml
@@ -5,7 +5,7 @@
platform
org.open-metadata
- 1.4.0-SNAPSHOT
+ 1.3.4
4.0.0
openmetadata-service
@@ -16,13 +16,14 @@
${project.basedir}/target/site/jacoco-aggregate/jacoco.xml
${project.basedir}/src/test/java
1.19.4
- 2.23.3
+ 2.25.21
1.11.2
4.7.3
0.5.11
2.9.0
2.3.4
- 2.3.2
+ 2.5.0-rc1
+ 5.7.0
@@ -37,6 +38,33 @@
+
+ org.pac4j
+ pac4j-core
+ ${pac4j.version}
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+
+ org.pac4j
+ pac4j-oidc
+ ${pac4j.version}
+
+
+ net.minidev
+ json-smart
+
+
+
+
+ net.minidev
+ json-smart
+ 2.5.0
+
org.open-metadata
common
@@ -298,7 +326,21 @@
testcontainers
${org.testcontainers.version}
test
+
+
+ org.apache.commons
+ commons-compress
+
+
+
+
+
+ org.apache.commons
+ commons-compress
+ 1.26.1
+ test
+
org.testcontainers
junit-jupiter
@@ -335,6 +377,12 @@
2.40
test
+
+ org.assertj
+ assertj-core
+ 3.25.3
+ test
+
javax.json
@@ -498,6 +546,12 @@
quartz
${quartz.version}
+
+
+ com.mchange
+ c3p0
+ 0.10.0
+
com.fasterxml.woodstox
woodstox-core
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/Entity.java b/openmetadata-service/src/main/java/org/openmetadata/service/Entity.java
index 67a75d67d974..ed0a81c9973b 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/Entity.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/Entity.java
@@ -579,4 +579,8 @@ public static SearchIndex buildSearchIndex(String entityType, Object entity) {
}
throw new BadRequestException("searchrepository not initialized");
}
+
+ public static T getDao() {
+ return (T) collectionDAO;
+ }
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java b/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java
index 418e67086a90..6ffc08bde0d5 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/OpenMetadataApplication.java
@@ -13,7 +13,7 @@
package org.openmetadata.service;
-import static org.openmetadata.service.util.MicrometerBundleSingleton.setWebAnalyticsEvents;
+import static org.openmetadata.service.security.SecurityUtil.tryCreateOidcClient;
import io.dropwizard.Application;
import io.dropwizard.configuration.EnvironmentVariableSubstitutor;
@@ -25,6 +25,7 @@
import io.dropwizard.jersey.errors.EarlyEofExceptionMapper;
import io.dropwizard.jersey.errors.LoggingExceptionMapper;
import io.dropwizard.jersey.jackson.JsonProcessingExceptionMapper;
+import io.dropwizard.jetty.MutableServletContextHandler;
import io.dropwizard.lifecycle.Managed;
import io.dropwizard.server.DefaultServerFactory;
import io.dropwizard.setup.Bootstrap;
@@ -38,7 +39,6 @@
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
-import java.time.temporal.ChronoUnit;
import java.util.EnumSet;
import java.util.Optional;
import javax.naming.ConfigurationException;
@@ -53,6 +53,7 @@
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.eclipse.jetty.http.pathmap.ServletPathSpec;
+import org.eclipse.jetty.server.session.SessionHandler;
import org.eclipse.jetty.servlet.FilterHolder;
import org.eclipse.jetty.servlet.ServletHolder;
import org.eclipse.jetty.websocket.server.NativeWebSocketServletContainerInitializer;
@@ -60,12 +61,12 @@
import org.glassfish.jersey.media.multipart.MultiPartFeature;
import org.glassfish.jersey.server.ServerProperties;
import org.jdbi.v3.core.Jdbi;
-import org.jdbi.v3.core.statement.SqlLogger;
-import org.jdbi.v3.core.statement.StatementContext;
import org.jdbi.v3.sqlobject.SqlObjects;
import org.openmetadata.schema.api.security.AuthenticationConfiguration;
import org.openmetadata.schema.api.security.AuthorizerConfiguration;
+import org.openmetadata.schema.api.security.ClientType;
import org.openmetadata.schema.services.connections.metadata.AuthProvider;
+import org.openmetadata.service.apps.ApplicationHandler;
import org.openmetadata.service.apps.scheduler.AppScheduler;
import org.openmetadata.service.config.OMWebBundle;
import org.openmetadata.service.config.OMWebConfiguration;
@@ -80,21 +81,26 @@
import org.openmetadata.service.fernet.Fernet;
import org.openmetadata.service.jdbi3.CollectionDAO;
import org.openmetadata.service.jdbi3.EntityRepository;
+import org.openmetadata.service.jdbi3.MigrationDAO;
import org.openmetadata.service.jdbi3.locator.ConnectionAwareAnnotationSqlLocator;
import org.openmetadata.service.jdbi3.locator.ConnectionType;
import org.openmetadata.service.migration.Migration;
+import org.openmetadata.service.migration.MigrationValidationClient;
import org.openmetadata.service.migration.api.MigrationWorkflow;
import org.openmetadata.service.monitoring.EventMonitor;
+import org.openmetadata.service.monitoring.EventMonitorConfiguration;
import org.openmetadata.service.monitoring.EventMonitorFactory;
import org.openmetadata.service.monitoring.EventMonitorPublisher;
import org.openmetadata.service.resources.CollectionRegistry;
import org.openmetadata.service.resources.databases.DatasourceConfig;
import org.openmetadata.service.resources.settings.SettingsCache;
import org.openmetadata.service.search.SearchRepository;
-import org.openmetadata.service.secrets.SecretsManager;
import org.openmetadata.service.secrets.SecretsManagerFactory;
-import org.openmetadata.service.secrets.SecretsManagerUpdateService;
import org.openmetadata.service.secrets.masker.EntityMaskerFactory;
+import org.openmetadata.service.security.AuthCallbackServlet;
+import org.openmetadata.service.security.AuthLoginServlet;
+import org.openmetadata.service.security.AuthLogoutServlet;
+import org.openmetadata.service.security.AuthRefreshServlet;
import org.openmetadata.service.security.Authorizer;
import org.openmetadata.service.security.NoopAuthorizer;
import org.openmetadata.service.security.NoopFilter;
@@ -115,6 +121,9 @@
import org.openmetadata.service.util.MicrometerBundleSingleton;
import org.openmetadata.service.util.incidentSeverityClassifier.IncidentSeverityClassifierInterface;
import org.openmetadata.service.util.jdbi.DatabaseAuthenticationProviderFactory;
+import org.openmetadata.service.util.jdbi.OMSqlLogger;
+import org.pac4j.core.util.CommonHelper;
+import org.pac4j.oidc.client.OidcClient;
import org.quartz.SchedulerException;
/** Main catalog application */
@@ -123,6 +132,8 @@ public class OpenMetadataApplication extends Application() {});
- environment.jersey().register(new JsonProcessingExceptionMapper(true));
- environment.jersey().register(new EarlyEofExceptionMapper());
- environment.jersey().register(JsonMappingExceptionMapper.class);
- environment
- .healthChecks()
- .register("OpenMetadataServerHealthCheck", new OpenMetadataServerHealthCheck());
// start event hub before registering publishers
EventPubSub.start();
+ ApplicationHandler.initialize(catalogConfig);
registerResources(catalogConfig, environment, jdbi);
// Register Event Handler
@@ -214,10 +223,6 @@ public void run(OpenMetadataApplicationConfig catalogConfig, Environment environ
// Register Event publishers
registerEventPublisher(catalogConfig);
- // update entities secrets if required
- new SecretsManagerUpdateService(secretsManager, catalogConfig.getClusterName())
- .updateEntities();
-
// start authorizer after event publishers
// authorizer creates admin/bot users, ES publisher should start before to index users created
// by authorizer
@@ -226,31 +231,130 @@ public void run(OpenMetadataApplicationConfig catalogConfig, Environment environ
// authenticationHandler Handles auth related activities
authenticatorHandler.init(catalogConfig);
- setWebAnalyticsEvents(catalogConfig);
+ registerMicrometerFilter(environment, catalogConfig.getEventMonitorConfiguration());
+
+ initializeWebsockets(catalogConfig, environment);
+ registerSamlServlets(catalogConfig, environment);
+
+ // Asset Servlet Registration
+ registerAssetServlet(catalogConfig.getWebConfiguration(), environment);
+
+ // Handle Pipeline Service Client Status job
+ PipelineServiceStatusJobHandler pipelineServiceStatusJobHandler =
+ PipelineServiceStatusJobHandler.create(
+ catalogConfig.getPipelineServiceClientConfiguration(), catalogConfig.getClusterName());
+ pipelineServiceStatusJobHandler.addPipelineServiceStatusJob();
+
+ // Register Auth Handlers
+ registerAuthServlets(catalogConfig, environment);
+ }
+
+ private void registerAuthServlets(OpenMetadataApplicationConfig config, Environment environment) {
+ if (config.getAuthenticationConfiguration() != null
+ && config
+ .getAuthenticationConfiguration()
+ .getClientType()
+ .equals(ClientType.CONFIDENTIAL)) {
+ CommonHelper.assertNotNull(
+ "OidcConfiguration", config.getAuthenticationConfiguration().getOidcConfiguration());
+
+ // Set up a Session Manager
+ MutableServletContextHandler contextHandler = environment.getApplicationContext();
+ if (contextHandler.getSessionHandler() == null) {
+ contextHandler.setSessionHandler(new SessionHandler());
+ }
+
+ // Register Servlets
+ OidcClient oidcClient =
+ tryCreateOidcClient(config.getAuthenticationConfiguration().getOidcConfiguration());
+ oidcClient.setCallbackUrl(
+ config.getAuthenticationConfiguration().getOidcConfiguration().getCallbackUrl());
+ ServletRegistration.Dynamic authLogin =
+ environment
+ .servlets()
+ .addServlet(
+ "oauth_login",
+ new AuthLoginServlet(
+ oidcClient,
+ config.getAuthenticationConfiguration().getOidcConfiguration().getServerUrl(),
+ config.getAuthenticationConfiguration().getJwtPrincipalClaims()));
+ authLogin.addMapping("/api/v1/auth/login");
+ ServletRegistration.Dynamic authCallback =
+ environment
+ .servlets()
+ .addServlet(
+ "auth_callback",
+ new AuthCallbackServlet(
+ oidcClient,
+ config.getAuthenticationConfiguration().getOidcConfiguration().getServerUrl(),
+ config.getAuthenticationConfiguration().getJwtPrincipalClaims()));
+ authCallback.addMapping("/callback");
+
+ ServletRegistration.Dynamic authLogout =
+ environment
+ .servlets()
+ .addServlet(
+ "auth_logout",
+ new AuthLogoutServlet(
+ config
+ .getAuthenticationConfiguration()
+ .getOidcConfiguration()
+ .getServerUrl()));
+ authLogout.addMapping("/api/v1/auth/logout");
+
+ ServletRegistration.Dynamic refreshServlet =
+ environment
+ .servlets()
+ .addServlet(
+ "auth_refresh",
+ new AuthRefreshServlet(
+ oidcClient,
+ config
+ .getAuthenticationConfiguration()
+ .getOidcConfiguration()
+ .getServerUrl()));
+ refreshServlet.addMapping("/api/v1/auth/refresh");
+ }
+ }
+
+ private void registerHealthCheck(Environment environment) {
+ environment
+ .healthChecks()
+ .register("OpenMetadataServerHealthCheck", new OpenMetadataServerHealthCheck());
+ }
+
+ private void registerExceptionMappers(Environment environment) {
+ environment.jersey().register(CatalogGenericExceptionMapper.class);
+ // Override constraint violation mapper to catch Json validation errors
+ environment.jersey().register(new ConstraintViolationExceptionMapper());
+ // Restore dropwizard default exception mappers
+ environment.jersey().register(new LoggingExceptionMapper<>() {});
+ environment.jersey().register(new JsonProcessingExceptionMapper(true));
+ environment.jersey().register(new EarlyEofExceptionMapper());
+ environment.jersey().register(JsonMappingExceptionMapper.class);
+ }
+
+ private void registerMicrometerFilter(
+ Environment environment, EventMonitorConfiguration eventMonitorConfiguration) {
FilterRegistration.Dynamic micrometerFilter =
environment.servlets().addFilter("OMMicrometerHttpFilter", new OMMicrometerHttpFilter());
micrometerFilter.addMappingForUrlPatterns(
- EnumSet.allOf(DispatcherType.class),
- true,
- catalogConfig.getEventMonitorConfiguration().getPathPattern());
- initializeWebsockets(catalogConfig, environment);
- registerSamlHandlers(catalogConfig, environment);
+ EnumSet.allOf(DispatcherType.class), true, eventMonitorConfiguration.getPathPattern());
+ }
+ private void registerAssetServlet(OMWebConfiguration webConfiguration, Environment environment) {
// Handle Asset Using Servlet
OpenMetadataAssetServlet assetServlet =
- new OpenMetadataAssetServlet(
- "/assets", "/", "index.html", catalogConfig.getWebConfiguration());
+ new OpenMetadataAssetServlet("/assets", "/", "index.html", webConfiguration);
String pathPattern = "/" + '*';
environment.servlets().addServlet("static", assetServlet).addMapping(pathPattern);
+ }
- // Handle Pipeline Service Client Status job
- PipelineServiceStatusJobHandler pipelineServiceStatusJobHandler =
- PipelineServiceStatusJobHandler.create(
- catalogConfig.getPipelineServiceClientConfiguration(), catalogConfig.getClusterName());
- pipelineServiceStatusJobHandler.addPipelineServiceStatusJob();
+ protected CollectionDAO getDao(Jdbi jdbi) {
+ return jdbi.onDemand(CollectionDAO.class);
}
- private void registerSamlHandlers(
+ private void registerSamlServlets(
OpenMetadataApplicationConfig catalogConfig, Environment environment)
throws IOException, CertificateException, KeyStoreException, NoSuchAlgorithmException {
if (catalogConfig.getAuthenticationConfiguration() != null
@@ -280,25 +384,7 @@ private Jdbi createAndSetupJDBI(Environment environment, DataSourceFactory dbFac
});
Jdbi jdbi = new JdbiFactory().build(environment, dbFactory, "database");
- SqlLogger sqlLogger =
- new SqlLogger() {
- @Override
- public void logBeforeExecution(StatementContext context) {
- LOG.debug("sql {}, parameters {}", context.getRenderedSql(), context.getBinding());
- }
-
- @Override
- public void logAfterExecution(StatementContext context) {
- LOG.debug(
- "sql {}, parameters {}, timeTaken {} ms",
- context.getRenderedSql(),
- context.getBinding(),
- context.getElapsedTime(ChronoUnit.MILLIS));
- }
- };
- if (LOG.isDebugEnabled()) {
- jdbi.setSqlLogger(sqlLogger);
- }
+ jdbi.setSqlLogger(new OMSqlLogger());
// Set the Database type for choosing correct queries from annotations
jdbi.getConfig(SqlObjects.class)
.setSqlLocator(new ConnectionAwareAnnotationSqlLocator(dbFactory.getDriverClass()));
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/AbstractNativeApplication.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/AbstractNativeApplication.java
index 02edefe7a56c..f92d546dbde2 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/AbstractNativeApplication.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/AbstractNativeApplication.java
@@ -1,7 +1,7 @@
package org.openmetadata.service.apps;
import static org.openmetadata.service.apps.scheduler.AbstractOmAppJobListener.JOB_LISTENER_NAME;
-import static org.openmetadata.service.apps.scheduler.AppScheduler.APP_INFO_KEY;
+import static org.openmetadata.service.apps.scheduler.AppScheduler.APP_NAME;
import static org.openmetadata.service.exception.CatalogExceptionMessage.LIVE_APP_SCHEDULE_ERR;
import java.util.List;
@@ -13,6 +13,7 @@
import org.openmetadata.schema.entity.app.App;
import org.openmetadata.schema.entity.app.AppRunRecord;
import org.openmetadata.schema.entity.app.AppType;
+import org.openmetadata.schema.entity.app.ScheduleTimeline;
import org.openmetadata.schema.entity.app.ScheduleType;
import org.openmetadata.schema.entity.app.ScheduledExecutionContext;
import org.openmetadata.schema.entity.applications.configuration.ApplicationConfig;
@@ -61,8 +62,23 @@ public void init(App app) {
@Override
public void install() {
+ // If the app does not have any Schedule Return without scheduling
+ if (app.getAppSchedule() != null
+ && app.getAppSchedule().getScheduleTimeline().equals(ScheduleTimeline.NONE)) {
+ return;
+ }
if (app.getAppType() == AppType.Internal
&& app.getScheduleType().equals(ScheduleType.Scheduled)) {
+ try {
+ ApplicationHandler.getInstance().removeOldJobs(app);
+ ApplicationHandler.getInstance().migrateQuartzConfig(app);
+ ApplicationHandler.getInstance().fixCorruptedInstallation(app);
+ } catch (SchedulerException e) {
+ throw AppException.byMessage(
+ "ApplicationHandler",
+ "SchedulerError",
+ "Error while migrating application configuration: " + app.getName());
+ }
scheduleInternal();
} else if (app.getAppType() == AppType.External
&& app.getScheduleType().equals(ScheduleType.Scheduled)) {
@@ -197,9 +213,9 @@ protected void validateServerExecutableApp(AppRuntime context) {
@Override
public void execute(JobExecutionContext jobExecutionContext) {
// This is the part of the code that is executed by the scheduler
- App jobApp =
- JsonUtils.readOrConvertValue(
- jobExecutionContext.getJobDetail().getJobDataMap().get(APP_INFO_KEY), App.class);
+ String appName = (String) jobExecutionContext.getJobDetail().getJobDataMap().get(APP_NAME);
+ App jobApp = collectionDAO.applicationDAO().findEntityByName(appName);
+ ApplicationHandler.getInstance().setAppRuntimeProperties(jobApp);
// Initialise the Application
this.init(jobApp);
@@ -212,6 +228,14 @@ public void configure() {
/* Not needed by default */
}
+ @Override
+ public void raisePreviewMessage(App app) {
+ throw AppException.byMessage(
+ app.getName(),
+ "Preview",
+ "App is in Preview Mode. Enable it from the server configuration.");
+ }
+
public static AppRuntime getAppRuntime(App app) {
return JsonUtils.convertValue(app.getRuntime(), ScheduledExecutionContext.class);
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/AppException.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/AppException.java
index dc8dcba82760..7387e59ea867 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/AppException.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/AppException.java
@@ -6,7 +6,7 @@
public class AppException extends WebServiceException {
private static final String BY_NAME_MESSAGE = "Application [%s] Exception [%s] due to [%s].";
- private static final String ERROR_TYPE = "PIPELINE_SERVICE_ERROR";
+ private static final String ERROR_TYPE = "APPLICATION_ERROR";
public AppException(String message) {
super(Response.Status.BAD_REQUEST, ERROR_TYPE, message);
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/ApplicationHandler.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/ApplicationHandler.java
index c70edba97d81..a9ab04ea0343 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/ApplicationHandler.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/ApplicationHandler.java
@@ -1,85 +1,219 @@
package org.openmetadata.service.apps;
+import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
+import static org.openmetadata.service.apps.scheduler.AppScheduler.APPS_JOB_GROUP;
+import static org.openmetadata.service.apps.scheduler.AppScheduler.APP_INFO_KEY;
+import static org.openmetadata.service.apps.scheduler.AppScheduler.APP_NAME;
+
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
-import java.util.HashMap;
+import java.util.Collection;
+import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
+import org.openmetadata.schema.api.configuration.apps.AppPrivateConfig;
+import org.openmetadata.schema.api.configuration.apps.AppsPrivateConfiguration;
import org.openmetadata.schema.entity.app.App;
+import org.openmetadata.service.OpenMetadataApplicationConfig;
+import org.openmetadata.service.apps.scheduler.AppScheduler;
import org.openmetadata.service.exception.UnhandledServerException;
+import org.openmetadata.service.jdbi3.AppRepository;
import org.openmetadata.service.jdbi3.CollectionDAO;
+import org.openmetadata.service.jdbi3.EntityRepository;
import org.openmetadata.service.search.SearchRepository;
+import org.openmetadata.service.util.JsonUtils;
+import org.openmetadata.service.util.OpenMetadataConnectionBuilder;
+import org.quartz.JobDataMap;
+import org.quartz.JobDetail;
+import org.quartz.JobKey;
+import org.quartz.SchedulerException;
+import org.quartz.impl.matchers.GroupMatcher;
@Slf4j
public class ApplicationHandler {
- private static HashMap instances = new HashMap<>();
+ @Getter private static ApplicationHandler instance;
+ private final OpenMetadataApplicationConfig config;
+ private final AppsPrivateConfiguration privateConfiguration;
+ private final AppRepository appRepository;
+
+ private ApplicationHandler(OpenMetadataApplicationConfig config) {
+ this.config = config;
+ this.privateConfiguration = config.getAppsPrivateConfiguration();
+ this.appRepository = new AppRepository();
+ }
+
+ public static void initialize(OpenMetadataApplicationConfig config) {
+ if (instance != null) {
+ return;
+ }
+ instance = new ApplicationHandler(config);
+ }
+
+ /**
+ * Load the apps' OM configuration and private parameters
+ */
+ public void setAppRuntimeProperties(App app) {
+ app.setOpenMetadataServerConnection(
+ new OpenMetadataConnectionBuilder(config, app.getBot().getName()).build());
- public static Object getAppInstance(String className) {
- return instances.get(className);
+ if (privateConfiguration != null
+ && !nullOrEmpty(privateConfiguration.getAppsPrivateConfiguration())) {
+ for (AppPrivateConfig appPrivateConfig : privateConfiguration.getAppsPrivateConfiguration()) {
+ if (app.getName().equals(appPrivateConfig.getName())) {
+ app.setPreview(appPrivateConfig.getPreview());
+ app.setPrivateConfiguration(appPrivateConfig.getParameters());
+ }
+ }
+ }
}
- private ApplicationHandler() {
- /*Helper*/
+ public Boolean isPreview(String appName) {
+ if (privateConfiguration != null
+ && !nullOrEmpty(privateConfiguration.getAppsPrivateConfiguration())) {
+ for (AppPrivateConfig appPrivateConfig : privateConfiguration.getAppsPrivateConfiguration()) {
+ if (appName.equals(appPrivateConfig.getName())) {
+ return appPrivateConfig.getPreview();
+ }
+ }
+ }
+ return false;
}
- public static void triggerApplicationOnDemand(
+ public void triggerApplicationOnDemand(
App app, CollectionDAO daoCollection, SearchRepository searchRepository) {
runMethodFromApplication(app, daoCollection, searchRepository, "triggerOnDemand");
}
- public static void installApplication(
+ public void installApplication(
App app, CollectionDAO daoCollection, SearchRepository searchRepository) {
runMethodFromApplication(app, daoCollection, searchRepository, "install");
}
- public static void configureApplication(
+ public void configureApplication(
App app, CollectionDAO daoCollection, SearchRepository searchRepository) {
runMethodFromApplication(app, daoCollection, searchRepository, "configure");
}
- public static Object runAppInit(
- App app, CollectionDAO daoCollection, SearchRepository searchRepository)
+ public Object runAppInit(App app, CollectionDAO daoCollection, SearchRepository searchRepository)
throws ClassNotFoundException,
NoSuchMethodException,
InvocationTargetException,
InstantiationException,
IllegalAccessException {
+ // add private runtime properties
+ setAppRuntimeProperties(app);
Class> clz = Class.forName(app.getClassName());
Object resource =
clz.getDeclaredConstructor(CollectionDAO.class, SearchRepository.class)
.newInstance(daoCollection, searchRepository);
+ // Raise preview message if the app is in Preview mode
+ if (Boolean.TRUE.equals(app.getPreview())) {
+ Method preview = resource.getClass().getMethod("raisePreviewMessage", App.class);
+ preview.invoke(resource, app);
+ }
+
// Call init Method
Method initMethod = resource.getClass().getMethod("init", App.class);
initMethod.invoke(resource, app);
- instances.put(app.getClassName(), resource);
-
return resource;
}
- /** Load an App from its className and call its methods dynamically */
- public static void runMethodFromApplication(
+ /**
+ * Load an App from its className and call its methods dynamically
+ */
+ public void runMethodFromApplication(
App app, CollectionDAO daoCollection, SearchRepository searchRepository, String methodName) {
// Native Application
try {
- Object resource = getAppInstance(app.getClassName());
- if (resource == null) {
- resource = runAppInit(app, daoCollection, searchRepository);
- }
-
+ Object resource = runAppInit(app, daoCollection, searchRepository);
// Call method on demand
Method scheduleMethod = resource.getClass().getMethod(methodName);
scheduleMethod.invoke(resource);
- } catch (NoSuchMethodException
- | InstantiationException
- | IllegalAccessException
- | InvocationTargetException e) {
+ } catch (NoSuchMethodException | InstantiationException | IllegalAccessException e) {
LOG.error("Exception encountered", e);
- throw new UnhandledServerException("Exception encountered", e);
+ throw new UnhandledServerException(e.getMessage());
} catch (ClassNotFoundException e) {
- throw new UnhandledServerException("Exception encountered", e);
+ throw new UnhandledServerException(e.getMessage());
+ } catch (InvocationTargetException e) {
+ throw AppException.byMessage(app.getName(), methodName, e.getTargetException().getMessage());
+ }
+ }
+
+ public void migrateQuartzConfig(App application) throws SchedulerException {
+ JobDetail jobDetails =
+ AppScheduler.getInstance()
+ .getScheduler()
+ .getJobDetail(new JobKey(application.getName(), APPS_JOB_GROUP));
+ if (jobDetails == null) {
+ return;
+ }
+ JobDataMap jobDataMap = jobDetails.getJobDataMap();
+ if (jobDataMap == null) {
+ return;
+ }
+ String appInfo = jobDataMap.getString(APP_INFO_KEY);
+ if (appInfo == null) {
+ return;
}
+ LOG.info("migrating app quartz configuration for {}", application.getName());
+ App updatedApp = JsonUtils.readOrConvertValue(appInfo, App.class);
+ App currentApp = appRepository.getDao().findEntityById(application.getId());
+ updatedApp.setOpenMetadataServerConnection(null);
+ updatedApp.setPrivateConfiguration(null);
+ updatedApp.setScheduleType(currentApp.getScheduleType());
+ updatedApp.setAppSchedule(currentApp.getAppSchedule());
+ updatedApp.setUpdatedBy(currentApp.getUpdatedBy());
+ updatedApp.setFullyQualifiedName(currentApp.getFullyQualifiedName());
+ EntityRepository.EntityUpdater updater =
+ appRepository.getUpdater(currentApp, updatedApp, EntityRepository.Operation.PATCH);
+ updater.update();
+ AppScheduler.getInstance().deleteScheduledApplication(updatedApp);
+ AppScheduler.getInstance().addApplicationSchedule(updatedApp);
+ LOG.info("migrated app configuration for {}", application.getName());
+ }
+
+ public void fixCorruptedInstallation(App application) throws SchedulerException {
+ JobDetail jobDetails =
+ AppScheduler.getInstance()
+ .getScheduler()
+ .getJobDetail(new JobKey(application.getName(), APPS_JOB_GROUP));
+ if (jobDetails == null) {
+ return;
+ }
+ JobDataMap jobDataMap = jobDetails.getJobDataMap();
+ if (jobDataMap == null) {
+ return;
+ }
+ String appName = jobDataMap.getString(APP_NAME);
+ if (appName == null) {
+ LOG.info("corrupt entry for app {}, reinstalling", application.getName());
+ App app = appRepository.getDao().findEntityByName(application.getName());
+ AppScheduler.getInstance().deleteScheduledApplication(app);
+ AppScheduler.getInstance().addApplicationSchedule(app);
+ }
+ }
+
+ public void removeOldJobs(App app) throws SchedulerException {
+ Collection jobKeys =
+ AppScheduler.getInstance()
+ .getScheduler()
+ .getJobKeys(GroupMatcher.groupContains(APPS_JOB_GROUP));
+ jobKeys.forEach(
+ jobKey -> {
+ try {
+ Class> clz =
+ AppScheduler.getInstance().getScheduler().getJobDetail(jobKey).getJobClass();
+ if (!jobKey.getName().equals(app.getName())
+ && clz.getName().equals(app.getClassName())) {
+ LOG.info("deleting old job {}", jobKey.getName());
+ AppScheduler.getInstance().getScheduler().deleteJob(jobKey);
+ }
+ } catch (SchedulerException e) {
+ LOG.error("Error deleting job {}", jobKey.getName(), e);
+ }
+ });
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/NativeApplication.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/NativeApplication.java
index 58e6edb8d2c4..206a612766d2 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/NativeApplication.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/NativeApplication.java
@@ -13,5 +13,7 @@ public interface NativeApplication extends Job {
void configure();
+ void raisePreviewMessage(App app);
+
default void startApp(JobExecutionContext jobExecutionContext) {}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/EventAlertProducer.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/EventAlertProducer.java
deleted file mode 100644
index 874473064215..000000000000
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/EventAlertProducer.java
+++ /dev/null
@@ -1,3 +0,0 @@
-package org.openmetadata.service.apps.bundles.changeEvent;
-
-public class EventAlertProducer {}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/gchat/GChatPublisher.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/gchat/GChatPublisher.java
index de49397f942b..1aa906896c7d 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/gchat/GChatPublisher.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/gchat/GChatPublisher.java
@@ -54,7 +54,7 @@ public GChatPublisher(SubscriptionDestination subscription) {
client = getClient(subscription.getTimeout(), subscription.getReadTimeout());
// Build Target
- if (webhook.getEndpoint() != null) {
+ if (webhook != null && webhook.getEndpoint() != null) {
String gChatWebhookURL = webhook.getEndpoint().toString();
if (!CommonUtil.nullOrEmpty(gChatWebhookURL)) {
target = client.target(gChatWebhookURL).request();
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/msteams/MSTeamsPublisher.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/msteams/MSTeamsPublisher.java
index ff2d1cf40823..588bd71d081c 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/msteams/MSTeamsPublisher.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/msteams/MSTeamsPublisher.java
@@ -54,7 +54,7 @@ public MSTeamsPublisher(SubscriptionDestination subscription) {
client = getClient(subscription.getTimeout(), subscription.getReadTimeout());
// Build Target
- if (webhook.getEndpoint() != null) {
+ if (webhook != null && webhook.getEndpoint() != null) {
String msTeamsWebhookURL = webhook.getEndpoint().toString();
if (!CommonUtil.nullOrEmpty(msTeamsWebhookURL)) {
target = client.target(msTeamsWebhookURL).request();
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/slack/SlackEventPublisher.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/slack/SlackEventPublisher.java
index 9ad9d7ca27c7..36a42d990ce3 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/slack/SlackEventPublisher.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/changeEvent/slack/SlackEventPublisher.java
@@ -52,7 +52,7 @@ public SlackEventPublisher(SubscriptionDestination subscription) {
client = getClient(subscription.getTimeout(), subscription.getReadTimeout());
// Build Target
- if (webhook.getEndpoint() != null) {
+ if (webhook != null && webhook.getEndpoint() != null) {
String slackWebhookURL = webhook.getEndpoint().toString();
if (!CommonUtil.nullOrEmpty(slackWebhookURL)) {
target = client.target(slackWebhookURL).request();
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/insights/DataInsightsReportApp.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/insights/DataInsightsReportApp.java
index adedbf76f772..56f9ab118533 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/insights/DataInsightsReportApp.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/insights/DataInsightsReportApp.java
@@ -8,7 +8,7 @@
import static org.openmetadata.schema.type.DataReportIndex.ENTITY_REPORT_DATA_INDEX;
import static org.openmetadata.service.Entity.KPI;
import static org.openmetadata.service.Entity.TEAM;
-import static org.openmetadata.service.apps.scheduler.AppScheduler.APP_INFO_KEY;
+import static org.openmetadata.service.apps.scheduler.AppScheduler.APP_NAME;
import static org.openmetadata.service.util.SubscriptionUtil.getAdminsData;
import static org.openmetadata.service.util.Utilities.getMonthAndDateFromEpoch;
@@ -70,9 +70,8 @@ public DataInsightsReportApp(CollectionDAO collectionDAO, SearchRepository searc
@Override
public void execute(JobExecutionContext jobExecutionContext) {
- App app =
- JsonUtils.readOrConvertValue(
- jobExecutionContext.getJobDetail().getJobDataMap().get(APP_INFO_KEY), App.class);
+ String appName = (String) jobExecutionContext.getJobDetail().getJobDataMap().get(APP_NAME);
+ App app = collectionDAO.applicationDAO().findEntityByName(appName);
// Calculate time diff
long currentTime = Instant.now().toEpochMilli();
long scheduleTime = currentTime - 604800000L;
@@ -295,6 +294,7 @@ private DataInsightDescriptionAndOwnerTemplate createDescriptionTemplate(
PERCENTAGE_OF_ENTITIES_WITH_DESCRIPTION_BY_TYPE,
currentPercentCompleted,
currentPercentCompleted - previousPercentCompleted,
+ (int) currentCompletedDescription,
numberOfDaysChange,
dateMap);
}
@@ -304,6 +304,7 @@ private DataInsightDescriptionAndOwnerTemplate createDescriptionTemplate(
PERCENTAGE_OF_ENTITIES_WITH_DESCRIPTION_BY_TYPE,
0D,
0D,
+ 0,
numberOfDaysChange,
dateMap);
}
@@ -363,6 +364,7 @@ private DataInsightDescriptionAndOwnerTemplate createOwnershipTemplate(
PERCENTAGE_OF_ENTITIES_WITH_OWNER_BY_TYPE,
currentPercentCompleted,
currentPercentCompleted - previousPercentCompleted,
+ (int) currentHasOwner,
numberOfDaysChange,
dateMap);
}
@@ -371,6 +373,7 @@ private DataInsightDescriptionAndOwnerTemplate createOwnershipTemplate(
PERCENTAGE_OF_ENTITIES_WITH_OWNER_BY_TYPE,
0D,
0D,
+ 0,
numberOfDaysChange,
dateMap);
}
@@ -409,6 +412,7 @@ private DataInsightDescriptionAndOwnerTemplate createTierTemplate(
return new DataInsightDescriptionAndOwnerTemplate(
DataInsightDescriptionAndOwnerTemplate.MetricType.TIER,
null,
+ "0",
0D,
KPI_NOT_SET,
0D,
@@ -422,6 +426,7 @@ private DataInsightDescriptionAndOwnerTemplate createTierTemplate(
return new DataInsightDescriptionAndOwnerTemplate(
DataInsightDescriptionAndOwnerTemplate.MetricType.TIER,
null,
+ "0",
0D,
KPI_NOT_SET,
0D,
@@ -504,6 +509,7 @@ private DataInsightDescriptionAndOwnerTemplate getTemplate(
DataInsightChartResult.DataInsightChartType chartType,
Double percentCompleted,
Double percentChange,
+ int totalAssets,
int numberOfDaysChange,
Map dateMap) {
@@ -525,8 +531,8 @@ private DataInsightDescriptionAndOwnerTemplate getTemplate(
if (isKpiAvailable) {
targetKpi =
- String.valueOf(
- Double.parseDouble(validKpi.getTargetDefinition().get(0).getValue()) * 100);
+ String.format(
+ "%.2f", Double.parseDouble(validKpi.getTargetDefinition().get(0).getValue()) * 100);
KpiResult result = getKpiResult(validKpi.getName());
if (result != null) {
isTargetMet = result.getTargetResult().get(0).getTargetMet();
@@ -547,6 +553,7 @@ private DataInsightDescriptionAndOwnerTemplate getTemplate(
return new DataInsightDescriptionAndOwnerTemplate(
metricType,
criteria,
+ String.valueOf(totalAssets),
percentCompleted,
targetKpi,
percentChange,
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexApp.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexApp.java
index 9127443e6dbe..1bbe997598f0 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexApp.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/SearchIndexApp.java
@@ -1,9 +1,10 @@
package org.openmetadata.service.apps.bundles.searchIndex;
+import static org.openmetadata.schema.system.IndexingError.ErrorSource.READER;
import static org.openmetadata.service.apps.scheduler.AbstractOmAppJobListener.APP_RUN_STATS;
+import static org.openmetadata.service.apps.scheduler.AppScheduler.ON_DEMAND_JOB;
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.ENTITY_TYPE_KEY;
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.getTotalRequestToProcess;
-import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.getUpdatedStats;
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.isDataInsightIndex;
import java.util.ArrayList;
@@ -19,7 +20,6 @@
import org.openmetadata.schema.analytics.ReportData;
import org.openmetadata.schema.entity.app.App;
import org.openmetadata.schema.entity.app.AppRunRecord;
-import org.openmetadata.schema.entity.app.AppRunType;
import org.openmetadata.schema.entity.app.FailureContext;
import org.openmetadata.schema.entity.app.SuccessContext;
import org.openmetadata.schema.service.configuration.elasticsearch.ElasticSearchConfiguration;
@@ -110,64 +110,29 @@ public void init(App app) {
if (request.getEntities().contains(ALL)) {
request.setEntities(ALL_ENTITIES);
}
- int totalRecords = getTotalRequestToProcess(request.getEntities(), collectionDAO);
- this.jobData = request;
- this.jobData.setStats(
- new Stats()
- .withJobStats(
- new StepStats()
- .withTotalRecords(totalRecords)
- .withFailedRecords(0)
- .withSuccessRecords(0)));
- request
- .getEntities()
- .forEach(
- entityType -> {
- if (!isDataInsightIndex(entityType)) {
- List fields = List.of("*");
- PaginatedEntitiesSource source =
- new PaginatedEntitiesSource(entityType, jobData.getBatchSize(), fields);
- if (!CommonUtil.nullOrEmpty(request.getAfterCursor())) {
- source.setCursor(request.getAfterCursor());
- }
- paginatedEntitiesSources.add(source);
- } else {
- paginatedDataInsightSources.add(
- new PaginatedDataInsightSource(
- collectionDAO, entityType, jobData.getBatchSize()));
- }
- });
- if (searchRepository.getSearchType().equals(ElasticSearchConfiguration.SearchType.OPENSEARCH)) {
- this.entityProcessor = new OpenSearchEntitiesProcessor(totalRecords);
- this.dataInsightProcessor = new OpenSearchDataInsightProcessor(totalRecords);
- this.searchIndexSink = new OpenSearchIndexSink(searchRepository, totalRecords);
- } else {
- this.entityProcessor = new ElasticSearchEntitiesProcessor(totalRecords);
- this.dataInsightProcessor = new ElasticSearchDataInsightProcessor(totalRecords);
- this.searchIndexSink = new ElasticSearchIndexSink(searchRepository, totalRecords);
- }
+ jobData = request;
}
@Override
public void startApp(JobExecutionContext jobExecutionContext) {
try {
+ initializeJob();
LOG.info("Executing Reindexing Job with JobData : {}", jobData);
// Update Job Status
jobData.setStatus(EventPublisherJob.Status.RUNNING);
// Make recreate as false for onDemand
- AppRunType runType =
- AppRunType.fromValue(
- (String) jobExecutionContext.getJobDetail().getJobDataMap().get("triggerType"));
+ String runType =
+ (String) jobExecutionContext.getJobDetail().getJobDataMap().get("triggerType");
- // Schedule Run has recreate as false always
- if (runType.equals(AppRunType.Scheduled)) {
+ // Schedule Run has re-create set to false
+ if (!runType.equals(ON_DEMAND_JOB)) {
jobData.setRecreateIndex(false);
}
// Run ReIndexing
- entitiesReIndex();
- dataInsightReindex();
+ entitiesReIndex(jobExecutionContext);
+ dataInsightReindex(jobExecutionContext);
// Mark Job as Completed
updateJobStatus();
} catch (Exception ex) {
@@ -182,12 +147,46 @@ public void startApp(JobExecutionContext jobExecutionContext) {
jobData.setStatus(EventPublisherJob.Status.FAILED);
jobData.setFailure(indexingError);
} finally {
- // store job details in Database
- jobExecutionContext.getJobDetail().getJobDataMap().put(APP_RUN_STATS, jobData.getStats());
- // Update Record to db
- updateRecordToDb(jobExecutionContext);
// Send update
- sendUpdates();
+ sendUpdates(jobExecutionContext);
+ }
+ }
+
+ private void initializeJob() {
+ int totalRecords = getTotalRequestToProcess(jobData.getEntities(), collectionDAO);
+ this.jobData.setStats(
+ new Stats()
+ .withJobStats(
+ new StepStats()
+ .withTotalRecords(totalRecords)
+ .withFailedRecords(0)
+ .withSuccessRecords(0)));
+ jobData
+ .getEntities()
+ .forEach(
+ entityType -> {
+ if (!isDataInsightIndex(entityType)) {
+ List fields = List.of("*");
+ PaginatedEntitiesSource source =
+ new PaginatedEntitiesSource(entityType, jobData.getBatchSize(), fields);
+ if (!CommonUtil.nullOrEmpty(jobData.getAfterCursor())) {
+ source.setCursor(jobData.getAfterCursor());
+ }
+ paginatedEntitiesSources.add(source);
+ } else {
+ paginatedDataInsightSources.add(
+ new PaginatedDataInsightSource(
+ collectionDAO, entityType, jobData.getBatchSize()));
+ }
+ });
+ if (searchRepository.getSearchType().equals(ElasticSearchConfiguration.SearchType.OPENSEARCH)) {
+ this.entityProcessor = new OpenSearchEntitiesProcessor(totalRecords);
+ this.dataInsightProcessor = new OpenSearchDataInsightProcessor(totalRecords);
+ this.searchIndexSink = new OpenSearchIndexSink(searchRepository, totalRecords);
+ } else {
+ this.entityProcessor = new ElasticSearchEntitiesProcessor(totalRecords);
+ this.dataInsightProcessor = new ElasticSearchDataInsightProcessor(totalRecords);
+ this.searchIndexSink = new ElasticSearchIndexSink(searchRepository, totalRecords);
}
}
@@ -212,7 +211,7 @@ public void updateRecordToDb(JobExecutionContext jobExecutionContext) {
pushAppStatusUpdates(jobExecutionContext, appRecord, true);
}
- private void entitiesReIndex() {
+ private void entitiesReIndex(JobExecutionContext jobExecutionContext) {
Map contextData = new HashMap<>();
for (PaginatedEntitiesSource paginatedEntitiesSource : paginatedEntitiesSources) {
reCreateIndexes(paginatedEntitiesSource.getEntityType());
@@ -223,17 +222,31 @@ private void entitiesReIndex() {
resultList = paginatedEntitiesSource.readNext(null);
if (!resultList.getData().isEmpty()) {
searchIndexSink.write(entityProcessor.process(resultList, contextData), contextData);
+ if (!resultList.getErrors().isEmpty()) {
+ throw new SearchIndexException(
+ new IndexingError()
+ .withErrorSource(READER)
+ .withLastFailedCursor(paginatedEntitiesSource.getLastFailedCursor())
+ .withSubmittedCount(paginatedEntitiesSource.getBatchSize())
+ .withSuccessCount(resultList.getData().size())
+ .withFailedCount(resultList.getErrors().size())
+ .withMessage(
+ "Issues in Reading A Batch For Entities. Check Errors Corresponding to Entities.")
+ .withFailedEntities(resultList.getErrors()));
+ }
}
} catch (SearchIndexException rx) {
+ jobData.setStatus(EventPublisherJob.Status.FAILED);
jobData.setFailure(rx.getIndexingError());
+ } finally {
+ updateStats(paginatedEntitiesSource.getEntityType(), paginatedEntitiesSource.getStats());
+ sendUpdates(jobExecutionContext);
}
}
- updateStats(paginatedEntitiesSource.getEntityType(), paginatedEntitiesSource.getStats());
- sendUpdates();
}
}
- private void dataInsightReindex() {
+ private void dataInsightReindex(JobExecutionContext jobExecutionContext) {
Map contextData = new HashMap<>();
for (PaginatedDataInsightSource paginatedDataInsightSource : paginatedDataInsightSources) {
reCreateIndexes(paginatedDataInsightSource.getEntityType());
@@ -247,17 +260,23 @@ private void dataInsightReindex() {
dataInsightProcessor.process(resultList, contextData), contextData);
}
} catch (SearchIndexException ex) {
+ jobData.setStatus(EventPublisherJob.Status.FAILED);
jobData.setFailure(ex.getIndexingError());
+ } finally {
+ updateStats(
+ paginatedDataInsightSource.getEntityType(), paginatedDataInsightSource.getStats());
+ sendUpdates(jobExecutionContext);
}
}
- updateStats(
- paginatedDataInsightSource.getEntityType(), paginatedDataInsightSource.getStats());
- sendUpdates();
}
}
- private void sendUpdates() {
+ private void sendUpdates(JobExecutionContext jobExecutionContext) {
try {
+ // store job details in Database
+ jobExecutionContext.getJobDetail().getJobDataMap().put(APP_RUN_STATS, jobData.getStats());
+ // Update Record to db
+ updateRecordToDb(jobExecutionContext);
if (WebSocketManager.getInstance() != null) {
WebSocketManager.getInstance()
.broadCastMessageToAll(
@@ -275,7 +294,8 @@ public void updateStats(String entityType, StepStats currentEntityStats) {
// Update Entity Level Stats
StepStats entityLevelStats = jobDataStats.getEntityStats();
if (entityLevelStats == null) {
- entityLevelStats = new StepStats();
+ entityLevelStats =
+ new StepStats().withTotalRecords(null).withFailedRecords(null).withSuccessRecords(null);
}
entityLevelStats.withAdditionalProperty(entityType, currentEntityStats);
@@ -286,8 +306,17 @@ public void updateStats(String entityType, StepStats currentEntityStats) {
new StepStats()
.withTotalRecords(getTotalRequestToProcess(jobData.getEntities(), collectionDAO));
}
- getUpdatedStats(
- stats, currentEntityStats.getSuccessRecords(), currentEntityStats.getFailedRecords());
+
+ stats.setSuccessRecords(
+ entityLevelStats.getAdditionalProperties().values().stream()
+ .map(s -> (StepStats) s)
+ .mapToInt(StepStats::getSuccessRecords)
+ .sum());
+ stats.setFailedRecords(
+ entityLevelStats.getAdditionalProperties().values().stream()
+ .map(s -> (StepStats) s)
+ .mapToInt(StepStats::getFailedRecords)
+ .sum());
// Update for the Job
jobDataStats.setJobStats(stats);
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/scheduler/AbstractOmAppJobListener.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/scheduler/AbstractOmAppJobListener.java
index 2d4afc2733b4..cadf140af8f2 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/scheduler/AbstractOmAppJobListener.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/scheduler/AbstractOmAppJobListener.java
@@ -1,6 +1,6 @@
package org.openmetadata.service.apps.scheduler;
-import static org.openmetadata.service.apps.scheduler.AppScheduler.APP_INFO_KEY;
+import static org.openmetadata.service.apps.scheduler.AppScheduler.APP_NAME;
import java.util.HashMap;
import java.util.Map;
@@ -8,9 +8,9 @@
import org.apache.commons.lang.exception.ExceptionUtils;
import org.openmetadata.schema.entity.app.App;
import org.openmetadata.schema.entity.app.AppRunRecord;
-import org.openmetadata.schema.entity.app.AppRunType;
import org.openmetadata.schema.entity.app.FailureContext;
import org.openmetadata.schema.entity.app.SuccessContext;
+import org.openmetadata.service.apps.ApplicationHandler;
import org.openmetadata.service.jdbi3.CollectionDAO;
import org.openmetadata.service.util.JsonUtils;
import org.quartz.JobDataMap;
@@ -35,38 +35,37 @@ public String getName() {
@Override
public void jobToBeExecuted(JobExecutionContext jobExecutionContext) {
- AppRunType runType =
- AppRunType.fromValue(
- (String) jobExecutionContext.getJobDetail().getJobDataMap().get("triggerType"));
- App jobApp =
- JsonUtils.readOrConvertValue(
- jobExecutionContext.getJobDetail().getJobDataMap().get(APP_INFO_KEY), App.class);
+ String runType = (String) jobExecutionContext.getJobDetail().getJobDataMap().get("triggerType");
+ String appName = (String) jobExecutionContext.getJobDetail().getJobDataMap().get(APP_NAME);
+ App jobApp = collectionDAO.applicationDAO().findEntityByName(appName);
+ ApplicationHandler.getInstance().setAppRuntimeProperties(jobApp);
JobDataMap dataMap = jobExecutionContext.getJobDetail().getJobDataMap();
long jobStartTime = System.currentTimeMillis();
- AppRunRecord runRecord;
+ AppRunRecord runRecord =
+ new AppRunRecord()
+ .withAppId(jobApp.getId())
+ .withStartTime(jobStartTime)
+ .withTimestamp(jobStartTime)
+ .withRunType(runType)
+ .withStatus(AppRunRecord.Status.RUNNING)
+ .withScheduleInfo(jobApp.getAppSchedule());
+ ;
boolean update = false;
if (jobExecutionContext.isRecovering()) {
- runRecord =
+ AppRunRecord latestRunRecord =
JsonUtils.readValue(
collectionDAO.appExtensionTimeSeriesDao().getLatestAppRun(jobApp.getId()),
AppRunRecord.class);
+ if (latestRunRecord != null) {
+ runRecord = latestRunRecord;
+ }
update = true;
- } else {
- runRecord =
- new AppRunRecord()
- .withAppId(jobApp.getId())
- .withStartTime(jobStartTime)
- .withTimestamp(jobStartTime)
- .withRunType(runType)
- .withStatus(AppRunRecord.Status.RUNNING)
- .withScheduleInfo(jobApp.getAppSchedule());
}
// Put the Context in the Job Data Map
dataMap.put(SCHEDULED_APP_RUN_EXTENSION, JsonUtils.pojoToJson(runRecord));
// Insert new Record Run
pushApplicationStatusUpdates(jobExecutionContext, runRecord, update);
-
this.doJobToBeExecuted(jobExecutionContext);
}
@@ -125,10 +124,13 @@ public void pushApplicationStatusUpdates(
JobExecutionContext context, AppRunRecord runRecord, boolean update) {
JobDataMap dataMap = context.getJobDetail().getJobDataMap();
if (dataMap.containsKey(SCHEDULED_APP_RUN_EXTENSION)) {
- App jobApp =
- JsonUtils.readOrConvertValue(
- context.getJobDetail().getJobDataMap().get(APP_INFO_KEY), App.class);
- updateStatus(jobApp.getId(), runRecord, update);
+ // Update the Run Record in Data Map
+ dataMap.put(SCHEDULED_APP_RUN_EXTENSION, JsonUtils.pojoToJson(runRecord));
+
+ // Push Updates to the Database
+ String appName = (String) context.getJobDetail().getJobDataMap().get(APP_NAME);
+ UUID appId = collectionDAO.applicationDAO().findEntityByName(appName).getId();
+ updateStatus(appId, runRecord, update);
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/scheduler/AppScheduler.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/scheduler/AppScheduler.java
index 3aebf1f85e86..98e84ccdf12e 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/scheduler/AppScheduler.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/scheduler/AppScheduler.java
@@ -17,15 +17,14 @@
import org.openmetadata.common.utils.CommonUtil;
import org.openmetadata.schema.AppRuntime;
import org.openmetadata.schema.entity.app.App;
-import org.openmetadata.schema.entity.app.AppRunType;
import org.openmetadata.schema.entity.app.AppSchedule;
+import org.openmetadata.schema.entity.app.ScheduleTimeline;
import org.openmetadata.service.OpenMetadataApplicationConfig;
import org.openmetadata.service.apps.NativeApplication;
import org.openmetadata.service.exception.UnhandledServerException;
import org.openmetadata.service.jdbi3.CollectionDAO;
import org.openmetadata.service.jdbi3.locator.ConnectionType;
import org.openmetadata.service.search.SearchRepository;
-import org.openmetadata.service.util.JsonUtils;
import org.quartz.CronScheduleBuilder;
import org.quartz.JobBuilder;
import org.quartz.JobDataMap;
@@ -43,6 +42,7 @@
@Slf4j
public class AppScheduler {
private static final Map defaultAppScheduleConfig = new HashMap<>();
+ public static final String ON_DEMAND_JOB = "OnDemandJob";
static {
defaultAppScheduleConfig.put("org.quartz.scheduler.instanceName", "AppScheduler");
@@ -66,7 +66,7 @@ public class AppScheduler {
public static final String APPS_JOB_GROUP = "OMAppsJobGroup";
public static final String APPS_TRIGGER_GROUP = "OMAppsJobGroup";
public static final String APP_INFO_KEY = "applicationInfoKey";
- public static final String SEARCH_CLIENT_KEY = "searchClientKey";
+ public static final String APP_NAME = "appName";
private static AppScheduler instance;
private static volatile boolean initialized = false;
@Getter private final Scheduler scheduler;
@@ -135,16 +135,17 @@ public static AppScheduler getInstance() {
public void addApplicationSchedule(App application) {
try {
- if (scheduler.getJobDetail(new JobKey(application.getId().toString(), APPS_JOB_GROUP))
- != null) {
+ if (scheduler.getJobDetail(new JobKey(application.getName(), APPS_JOB_GROUP)) != null) {
LOG.info("Job already exists for the application, skipping the scheduling");
return;
}
AppRuntime context = getAppRuntime(application);
if (Boolean.TRUE.equals(context.getEnabled())) {
- JobDetail jobDetail = jobBuilder(application, application.getId().toString());
- Trigger trigger = trigger(application);
- scheduler.scheduleJob(jobDetail, trigger);
+ JobDetail jobDetail = jobBuilder(application, application.getName());
+ if (!application.getAppSchedule().getScheduleTimeline().equals(ScheduleTimeline.NONE)) {
+ Trigger trigger = trigger(application);
+ scheduler.scheduleJob(jobDetail, trigger);
+ }
} else {
LOG.info("[Applications] App cannot be scheduled since it is disabled");
}
@@ -155,14 +156,21 @@ public void addApplicationSchedule(App application) {
}
public void deleteScheduledApplication(App app) throws SchedulerException {
- scheduler.deleteJob(new JobKey(app.getId().toString(), APPS_JOB_GROUP));
- scheduler.unscheduleJob(new TriggerKey(app.getId().toString(), APPS_TRIGGER_GROUP));
+ // Scheduled Jobs
+ scheduler.deleteJob(new JobKey(app.getName(), APPS_JOB_GROUP));
+ scheduler.unscheduleJob(new TriggerKey(app.getName(), APPS_TRIGGER_GROUP));
+
+ // OnDemand Jobs
+ scheduler.deleteJob(
+ new JobKey(String.format("%s-%s", app.getName(), ON_DEMAND_JOB), APPS_JOB_GROUP));
+ scheduler.unscheduleJob(
+ new TriggerKey(String.format("%s-%s", app.getName(), ON_DEMAND_JOB), APPS_TRIGGER_GROUP));
}
private JobDetail jobBuilder(App app, String jobIdentity) throws ClassNotFoundException {
JobDataMap dataMap = new JobDataMap();
- dataMap.put(APP_INFO_KEY, JsonUtils.pojoToJson(app));
- dataMap.put("triggerType", AppRunType.Scheduled.value());
+ dataMap.put(APP_NAME, app.getName());
+ dataMap.put("triggerType", app.getAppSchedule().getScheduleTimeline().value());
Class extends NativeApplication> clz =
(Class extends NativeApplication>) Class.forName(app.getClassName());
JobBuilder jobBuilder =
@@ -175,7 +183,7 @@ private JobDetail jobBuilder(App app, String jobIdentity) throws ClassNotFoundEx
private Trigger trigger(App app) {
return TriggerBuilder.newTrigger()
- .withIdentity(app.getId().toString(), APPS_TRIGGER_GROUP)
+ .withIdentity(app.getName(), APPS_TRIGGER_GROUP)
.withSchedule(getCronSchedule(app.getAppSchedule()))
.build();
}
@@ -187,7 +195,7 @@ public static void shutDown() throws SchedulerException {
}
public static CronScheduleBuilder getCronSchedule(AppSchedule scheduleInfo) {
- switch (scheduleInfo.getScheduleType()) {
+ switch (scheduleInfo.getScheduleTimeline()) {
case HOURLY:
return CronScheduleBuilder.cronSchedule("0 0 * ? * *");
case DAILY:
@@ -210,12 +218,11 @@ public static CronScheduleBuilder getCronSchedule(AppSchedule scheduleInfo) {
public void triggerOnDemandApplication(App application) {
try {
JobDetail jobDetailScheduled =
- scheduler.getJobDetail(new JobKey(application.getId().toString(), APPS_JOB_GROUP));
+ scheduler.getJobDetail(new JobKey(application.getName(), APPS_JOB_GROUP));
JobDetail jobDetailOnDemand =
scheduler.getJobDetail(
new JobKey(
- String.format("%s-%s", application.getId(), AppRunType.OnDemand.value()),
- APPS_JOB_GROUP));
+ String.format("%s-%s", application.getName(), ON_DEMAND_JOB), APPS_JOB_GROUP));
// Check if the job is already running
List currentJobs = scheduler.getCurrentlyExecutingJobs();
for (JobExecutionContext context : currentJobs) {
@@ -231,14 +238,13 @@ public void triggerOnDemandApplication(App application) {
AppRuntime context = getAppRuntime(application);
if (Boolean.TRUE.equals(context.getEnabled())) {
JobDetail newJobDetail =
- jobBuilder(
- application,
- String.format("%s-%s", application.getId(), AppRunType.OnDemand.value()));
- newJobDetail.getJobDataMap().put("triggerType", AppRunType.OnDemand.value());
+ jobBuilder(application, String.format("%s-%s", application.getName(), ON_DEMAND_JOB));
+ newJobDetail.getJobDataMap().put("triggerType", ON_DEMAND_JOB);
+ newJobDetail.getJobDataMap().put(APP_NAME, application.getFullyQualifiedName());
Trigger trigger =
TriggerBuilder.newTrigger()
.withIdentity(
- String.format("%s-%s", application.getId(), AppRunType.OnDemand.value()),
+ String.format("%s-%s", application.getName(), ON_DEMAND_JOB),
APPS_TRIGGER_GROUP)
.startNow()
.build();
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/events/AuditEventHandler.java b/openmetadata-service/src/main/java/org/openmetadata/service/events/AuditEventHandler.java
index 75fe9cc29d06..0622997c0fd2 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/events/AuditEventHandler.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/events/AuditEventHandler.java
@@ -37,13 +37,6 @@ public void init(OpenMetadataApplicationConfig config) {
public Void process(
ContainerRequestContext requestContext, ContainerResponseContext responseContext) {
- if (requestContext
- .getUriInfo()
- .getPath()
- .contains(WebAnalyticEventHandler.WEB_ANALYTIC_ENDPOINT)) {
- // we don't want to send web analytic event to the audit log
- return null;
- }
int responseCode = responseContext.getStatus();
String method = requestContext.getMethod();
if (responseContext.getEntity() != null) {
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/events/ChangeEventHandler.java b/openmetadata-service/src/main/java/org/openmetadata/service/events/ChangeEventHandler.java
index fd9fcb299b59..612c4051f8d3 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/events/ChangeEventHandler.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/events/ChangeEventHandler.java
@@ -39,11 +39,15 @@
import org.openmetadata.service.socket.WebSocketManager;
import org.openmetadata.service.util.FeedUtils;
import org.openmetadata.service.util.JsonUtils;
+import org.openmetadata.service.util.WebsocketNotificationHandler;
@Slf4j
public class ChangeEventHandler implements EventHandler {
private ObjectMapper mapper;
private FeedMessageDecorator feedMessageDecorator = new FeedMessageDecorator();
+ private final FeedRepository feedRepository = new FeedRepository();
+ private final WebsocketNotificationHandler websocketNotificationHandler =
+ new WebsocketNotificationHandler();
public void init(OpenMetadataApplicationConfig config) {
this.mapper = new ObjectMapper();
@@ -52,17 +56,20 @@ public void init(OpenMetadataApplicationConfig config) {
@SneakyThrows
public Void process(
ContainerRequestContext requestContext, ContainerResponseContext responseContext) {
+ websocketNotificationHandler.processNotifications(responseContext);
String method = requestContext.getMethod();
SecurityContext securityContext = requestContext.getSecurityContext();
String loggedInUserName = securityContext.getUserPrincipal().getName();
try {
CollectionDAO collectionDAO = Entity.getCollectionDAO();
CollectionDAO.ChangeEventDAO changeEventDAO = collectionDAO.changeEventDAO();
- FeedRepository feedRepository = new FeedRepository();
Optional optionalChangeEvent =
getChangeEventFromResponseContext(responseContext, loggedInUserName, method);
if (optionalChangeEvent.isPresent()) {
ChangeEvent changeEvent = optionalChangeEvent.get();
+ if (changeEvent.getEntityType().equals(Entity.QUERY)) {
+ return null;
+ }
// Always set the Change Event Username as context Principal, the one creating the CE
changeEvent.setUserName(loggedInUserName);
LOG.info(
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/events/WebAnalyticEventHandler.java b/openmetadata-service/src/main/java/org/openmetadata/service/events/WebAnalyticEventHandler.java
deleted file mode 100644
index bfa933d325ac..000000000000
--- a/openmetadata-service/src/main/java/org/openmetadata/service/events/WebAnalyticEventHandler.java
+++ /dev/null
@@ -1,47 +0,0 @@
-package org.openmetadata.service.events;
-
-import io.micrometer.core.instrument.Counter;
-import io.micrometer.prometheus.PrometheusMeterRegistry;
-import javax.ws.rs.container.ContainerRequestContext;
-import javax.ws.rs.container.ContainerResponseContext;
-import javax.ws.rs.core.UriInfo;
-import lombok.extern.slf4j.Slf4j;
-import org.openmetadata.service.OpenMetadataApplicationConfig;
-import org.openmetadata.service.util.MicrometerBundleSingleton;
-
-@Slf4j
-public class WebAnalyticEventHandler implements EventHandler {
- private PrometheusMeterRegistry prometheusMeterRegistry;
- private String clusterName;
- public static final String WEB_ANALYTIC_ENDPOINT = "v1/analytics/web/events/collect";
- private static final String COUNTER_NAME = "web.analytics.events";
-
- public void init(OpenMetadataApplicationConfig config) {
- this.prometheusMeterRegistry = MicrometerBundleSingleton.prometheusMeterRegistry;
- this.clusterName = config.getClusterName();
- }
-
- public Void process(
- ContainerRequestContext requestContext, ContainerResponseContext responseContext) {
- UriInfo uriInfo = requestContext.getUriInfo();
- if (uriInfo.getPath().contains(WEB_ANALYTIC_ENDPOINT)) {
- String username = "anonymous";
- if (requestContext.getSecurityContext().getUserPrincipal() != null) {
- username = requestContext.getSecurityContext().getUserPrincipal().getName();
- }
- incrementMetric(username);
- }
- return null;
- }
-
- private void incrementMetric(String username) {
- Counter.builder(COUNTER_NAME)
- .tags("clusterName", clusterName, "username", username)
- .register(prometheusMeterRegistry)
- .increment();
- }
-
- public void close() {
- prometheusMeterRegistry.close();
- }
-}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/EventSubscriptionScheduler.java b/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/EventSubscriptionScheduler.java
index d3ccf1d9b24d..78257ddca415 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/EventSubscriptionScheduler.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/EventSubscriptionScheduler.java
@@ -132,7 +132,8 @@ private JobDetail jobBuilder(
private Trigger trigger(EventSubscription eventSubscription) {
return TriggerBuilder.newTrigger()
.withIdentity(eventSubscription.getId().toString(), ALERT_TRIGGER_GROUP)
- .withSchedule(SimpleScheduleBuilder.repeatSecondlyForever(3))
+ .withSchedule(
+ SimpleScheduleBuilder.repeatSecondlyForever(eventSubscription.getPollInterval()))
.startNow()
.build();
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/template/DataInsightDescriptionAndOwnerTemplate.java b/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/template/DataInsightDescriptionAndOwnerTemplate.java
index 3925de0895df..fb34e89ac60e 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/template/DataInsightDescriptionAndOwnerTemplate.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/template/DataInsightDescriptionAndOwnerTemplate.java
@@ -29,9 +29,11 @@ public enum KpiCriteria {
NOT_MET
}
+ private String totalAssets;
private final String percentCompleted;
private boolean kpiAvailable;
private String percentChange;
+ private String percentChangeMessage;
private String targetKpi;
private String numberOfDaysLeft;
private String completeMessage;
@@ -42,6 +44,7 @@ public enum KpiCriteria {
public DataInsightDescriptionAndOwnerTemplate(
MetricType metricType,
KpiCriteria criteria,
+ String totalAssets,
Double percentCompleted,
String targetKpi,
Double percentChange,
@@ -53,6 +56,8 @@ public DataInsightDescriptionAndOwnerTemplate(
this.percentCompleted = String.format("%.2f", percentCompleted);
this.targetKpi = targetKpi;
this.percentChange = String.format("%.2f", percentChange);
+ this.percentChangeMessage = getFormattedPercentChangeMessage(percentChange);
+ this.totalAssets = totalAssets;
this.kpiAvailable = isKpiAvailable;
this.numberOfDaysLeft = numberOfDaysLeft;
this.tierMap = tierMap;
@@ -131,6 +136,22 @@ public void setNumberOfDaysLeft(String numberOfDaysLeft) {
this.numberOfDaysLeft = numberOfDaysLeft;
}
+ public String getTotalAssets() {
+ return totalAssets;
+ }
+
+ public void setTotalAssets(String totalAssets) {
+ this.totalAssets = totalAssets;
+ }
+
+ public String getPercentChangeMessage() {
+ return percentChangeMessage;
+ }
+
+ public void setPercentChangeMessage(String message) {
+ this.percentChangeMessage = message;
+ }
+
public String getCompleteMessage() {
return completeMessage;
}
@@ -162,4 +183,18 @@ public Map getDateMap() {
public void setDateMap(Map dateMap) {
this.dateMap = dateMap;
}
+
+ public static String getFormattedPercentChangeMessage(Double percent) {
+ String symbol = "";
+ String color = "#BF0000";
+ if (percent > 0) {
+ symbol = "+";
+ color = "#008611";
+ } else if (percent < 0) {
+ symbol = "-";
+ }
+
+ return String.format(
+ "%s%.2f", color, symbol, percent);
+ }
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/template/DataInsightTotalAssetTemplate.java b/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/template/DataInsightTotalAssetTemplate.java
index 6e194fa7ea5d..5f7b85749a4c 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/template/DataInsightTotalAssetTemplate.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/events/scheduled/template/DataInsightTotalAssetTemplate.java
@@ -13,12 +13,15 @@
package org.openmetadata.service.events.scheduled.template;
+import static org.openmetadata.service.events.scheduled.template.DataInsightDescriptionAndOwnerTemplate.getFormattedPercentChangeMessage;
+
import java.util.Map;
@SuppressWarnings("unused")
public class DataInsightTotalAssetTemplate {
private String totalDataAssets;
private String percentChangeTotalAssets;
+ private String percentChangeMessage;
private String completeMessage;
private int numberOfDaysChange;
private Map dateMap;
@@ -30,6 +33,7 @@ public DataInsightTotalAssetTemplate(
Map dateMap) {
this.totalDataAssets = String.format("%.2f", totalDataAssets);
this.percentChangeTotalAssets = String.format("%.2f", percentChangeTotalAssets);
+ this.percentChangeMessage = getFormattedPercentChangeMessage(percentChangeTotalAssets);
this.numberOfDaysChange = numberOfDaysChange;
this.dateMap = dateMap;
String color = "#BF0000";
@@ -66,6 +70,14 @@ public void setCompleteMessage(String completeMessage) {
this.completeMessage = completeMessage;
}
+ public String getPercentChangeMessage() {
+ return percentChangeMessage;
+ }
+
+ public void setPercentChangeMessage(String message) {
+ this.percentChangeMessage = message;
+ }
+
public int getNumberOfDaysChange() {
return numberOfDaysChange;
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/events/subscription/AlertUtil.java b/openmetadata-service/src/main/java/org/openmetadata/service/events/subscription/AlertUtil.java
index 33f06644c67d..220c05b00743 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/events/subscription/AlertUtil.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/events/subscription/AlertUtil.java
@@ -45,6 +45,7 @@
import org.openmetadata.service.exception.CatalogExceptionMessage;
import org.openmetadata.service.util.JsonUtils;
import org.springframework.expression.Expression;
+import org.springframework.expression.spel.support.SimpleEvaluationContext;
@Slf4j
public final class AlertUtil {
@@ -56,8 +57,13 @@ public static void validateExpression(String condition, Class clz) {
}
Expression expression = parseExpression(condition);
AlertsRuleEvaluator ruleEvaluator = new AlertsRuleEvaluator(null);
+ SimpleEvaluationContext context =
+ SimpleEvaluationContext.forReadOnlyDataBinding()
+ .withInstanceMethods()
+ .withRootObject(ruleEvaluator)
+ .build();
try {
- expression.getValue(ruleEvaluator, clz);
+ expression.getValue(context, clz);
} catch (Exception exception) {
// Remove unnecessary class details in the exception message
String message =
@@ -73,7 +79,12 @@ public static boolean evaluateAlertConditions(
String completeCondition = buildCompleteCondition(alertFilterRules);
AlertsRuleEvaluator ruleEvaluator = new AlertsRuleEvaluator(changeEvent);
Expression expression = parseExpression(completeCondition);
- result = Boolean.TRUE.equals(expression.getValue(ruleEvaluator, Boolean.class));
+ SimpleEvaluationContext context =
+ SimpleEvaluationContext.forReadOnlyDataBinding()
+ .withInstanceMethods()
+ .withRootObject(ruleEvaluator)
+ .build();
+ result = Boolean.TRUE.equals(expression.getValue(context, Boolean.class));
LOG.debug("Alert evaluated as Result : {}", result);
return result;
} else {
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/exception/CatalogExceptionMessage.java b/openmetadata-service/src/main/java/org/openmetadata/service/exception/CatalogExceptionMessage.java
index ea2792f346cc..ed8d27831827 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/exception/CatalogExceptionMessage.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/exception/CatalogExceptionMessage.java
@@ -158,6 +158,10 @@ public static String invalidFieldName(String fieldType, String fieldName) {
return String.format("Invalid %s name %s", fieldType, fieldName);
}
+ public static String invalidFieldFQN(String fqn) {
+ return String.format("Invalid fully qualified field name %s", fqn);
+ }
+
public static String entityVersionNotFound(String entityType, UUID id, Double version) {
return String.format("%s instance for %s and version %s not found", entityType, id, version);
}
@@ -268,6 +272,10 @@ public static String systemEntityRenameNotAllowed(String name, String entityType
return String.format("System entity [%s] of type %s can not be renamed.", name, entityType);
}
+ public static String systemEntityModifyNotAllowed(String name, String entityType) {
+ return String.format("System entity [%s] of type %s can not be modified.", name, entityType);
+ }
+
public static String mutuallyExclusiveLabels(TagLabel tag1, TagLabel tag2) {
return String.format(
"Tag labels %s and %s are mutually exclusive and can't be assigned together",
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/EmailMessageDecorator.java b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/EmailMessageDecorator.java
index e1933998c7c2..f9c3bb9070cb 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/EmailMessageDecorator.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/EmailMessageDecorator.java
@@ -53,11 +53,11 @@ public String getRemoveMarkerClose() {
}
@Override
- public String getEntityUrl(String entityType, String fqn, String additionalParams) {
+ public String getEntityUrl(String prefix, String fqn, String additionalParams) {
return String.format(
"%s",
getSmtpSettings().getOpenMetadataUrl(),
- entityType,
+ prefix,
fqn.trim(),
nullOrEmpty(additionalParams) ? "" : String.format("/%s", additionalParams),
fqn.trim());
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/FeedMessageDecorator.java b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/FeedMessageDecorator.java
index 05e580a9e4ba..8b193e295ca6 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/FeedMessageDecorator.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/FeedMessageDecorator.java
@@ -51,11 +51,11 @@ public String getRemoveMarkerClose() {
}
@Override
- public String getEntityUrl(String entityType, String fqn, String additionalParams) {
+ public String getEntityUrl(String prefix, String fqn, String additionalParams) {
return String.format(
"[%s](/%s/%s%s)",
fqn,
- entityType,
+ prefix,
fqn.trim(),
nullOrEmpty(additionalParams) ? "" : String.format("/%s", additionalParams));
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/GChatMessageDecorator.java b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/GChatMessageDecorator.java
index 93156f94efca..7a2374c6826f 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/GChatMessageDecorator.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/GChatMessageDecorator.java
@@ -55,11 +55,11 @@ public String getRemoveMarkerClose() {
}
@Override
- public String getEntityUrl(String entityType, String fqn, String additionalParams) {
+ public String getEntityUrl(String prefix, String fqn, String additionalParams) {
return String.format(
"<%s/%s/%s%s|%s>",
getSmtpSettings().getOpenMetadataUrl(),
- entityType,
+ prefix,
fqn.trim().replace(" ", "%20"),
nullOrEmpty(additionalParams) ? "" : String.format("/%s", additionalParams),
fqn.trim());
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/MSTeamsMessageDecorator.java b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/MSTeamsMessageDecorator.java
index 36c83d5e295e..d76fcf8cee37 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/MSTeamsMessageDecorator.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/MSTeamsMessageDecorator.java
@@ -55,12 +55,12 @@ public String getRemoveMarkerClose() {
}
@Override
- public String getEntityUrl(String entityType, String fqn, String additionalParams) {
+ public String getEntityUrl(String prefix, String fqn, String additionalParams) {
return String.format(
"[%s](/%s/%s%s)",
fqn.trim(),
getSmtpSettings().getOpenMetadataUrl(),
- entityType,
+ prefix,
nullOrEmpty(additionalParams) ? "" : String.format("/%s", additionalParams));
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/MessageDecorator.java b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/MessageDecorator.java
index ad1c6d884737..e08d932d7a12 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/MessageDecorator.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/MessageDecorator.java
@@ -34,6 +34,7 @@
import org.openmetadata.schema.tests.TestCase;
import org.openmetadata.schema.type.ChangeEvent;
import org.openmetadata.schema.type.Include;
+import org.openmetadata.schema.type.ThreadType;
import org.openmetadata.service.Entity;
import org.openmetadata.service.exception.UnhandledServerException;
import org.openmetadata.service.resources.feeds.MessageParser;
@@ -60,7 +61,7 @@ default String httpRemoveMarker() {
String getRemoveMarkerClose();
- String getEntityUrl(String entityType, String fqn, String additionalInput);
+ String getEntityUrl(String prefix, String fqn, String additionalInput);
T buildEntityMessage(ChangeEvent event);
@@ -77,15 +78,57 @@ default String buildEntityUrl(String entityType, EntityInterface entityInterface
// Hande Test Case
if (entityType.equals(Entity.TEST_CASE)) {
TestCase testCase = (TestCase) entityInterface;
- MessageParser.EntityLink link = MessageParser.EntityLink.parse(testCase.getEntityLink());
- // TODO: this needs to be fixed no way to know the UI redirection
return getEntityUrl(
- link.getEntityType(), link.getEntityFQN(), "profiler?activeTab=Data%20Quality");
+ "incident-manager", testCase.getFullyQualifiedName(), "test-case-results");
+ }
+
+ // Glossary Term
+ if (entityType.equals(Entity.GLOSSARY_TERM)) {
+ // Glossary Term is a special case where the URL is different
+ return getEntityUrl(Entity.GLOSSARY, fqn, "");
+ }
+
+ // Tag
+ if (entityType.equals(Entity.TAG)) {
+ // Tags need to be redirected to Classification Page
+ return getEntityUrl("tags", fqn.split("\\.")[0], "");
}
return getEntityUrl(entityType, fqn, "");
}
+ default String buildThreadUrl(
+ ThreadType threadType, String entityType, EntityInterface entityInterface) {
+ String activeTab =
+ threadType.equals(ThreadType.Task) ? "activity_feed/tasks" : "activity_feed/all";
+ String fqn = entityInterface.getFullyQualifiedName();
+ if (CommonUtil.nullOrEmpty(fqn)) {
+ EntityInterface result =
+ Entity.getEntity(entityType, entityInterface.getId(), "id", Include.NON_DELETED);
+ fqn = result.getFullyQualifiedName();
+ }
+
+ // Hande Test Case
+ if (entityType.equals(Entity.TEST_CASE)) {
+ TestCase testCase = (TestCase) entityInterface;
+ return getEntityUrl("incident-manager", testCase.getFullyQualifiedName(), "issues");
+ }
+
+ // Glossary Term
+ if (entityType.equals(Entity.GLOSSARY_TERM)) {
+ // Glossary Term is a special case where the URL is different
+ return getEntityUrl(Entity.GLOSSARY, fqn, activeTab);
+ }
+
+ // Tag
+ if (entityType.equals(Entity.TAG)) {
+ // Tags need to be redirected to Classification Page
+ return getEntityUrl("tags", fqn.split("\\.")[0], "");
+ }
+
+ return getEntityUrl(entityType, fqn, activeTab);
+ }
+
default T buildOutgoingMessage(ChangeEvent event) {
if (event.getEntityType().equals(Entity.THREAD)) {
return buildThreadMessage(event);
@@ -176,20 +219,28 @@ default OutgoingMessage createThreadMessage(ChangeEvent event) {
OutgoingMessage message = new OutgoingMessage();
message.setUserName(event.getUserName());
Thread thread = getThread(event);
+
+ MessageParser.EntityLink entityLink = MessageParser.EntityLink.parse(thread.getAbout());
+ EntityInterface entityInterface = Entity.getEntity(entityLink, "", Include.ALL);
+ String entityUrl = buildEntityUrl(entityLink.getEntityType(), entityInterface);
+
String headerMessage = "";
List attachmentList = new ArrayList<>();
+
+ String assetUrl =
+ getThreadAssetsUrl(thread.getType(), MessageParser.EntityLink.parse(thread.getAbout()));
switch (thread.getType()) {
case Conversation -> {
switch (event.getEventType()) {
case THREAD_CREATED -> {
headerMessage =
String.format(
- "@%s started a conversation for asset %s",
- thread.getCreatedBy(), thread.getAbout());
+ "@%s started a conversation for asset %s", thread.getCreatedBy(), assetUrl);
attachmentList.add(replaceEntityLinks(thread.getMessage()));
}
case POST_CREATED -> {
- headerMessage = String.format("@%s posted a message", thread.getCreatedBy());
+ headerMessage =
+ String.format("@%s posted a message on asset %s", thread.getCreatedBy(), assetUrl);
attachmentList.add(
String.format(
"@%s : %s", thread.getCreatedBy(), replaceEntityLinks(thread.getMessage())));
@@ -204,7 +255,9 @@ default OutgoingMessage createThreadMessage(ChangeEvent event) {
}
case THREAD_UPDATED -> {
headerMessage =
- String.format("@%s posted update on Conversation", thread.getUpdatedBy());
+ String.format(
+ "@%s posted update on Conversation for asset %s",
+ thread.getUpdatedBy(), assetUrl);
attachmentList.add(replaceEntityLinks(thread.getMessage()));
}
}
@@ -214,8 +267,8 @@ default OutgoingMessage createThreadMessage(ChangeEvent event) {
case THREAD_CREATED -> {
headerMessage =
String.format(
- "@%s created a Task with Id : %s",
- thread.getCreatedBy(), thread.getTask().getId());
+ "@%s created a Task for %s %s",
+ thread.getCreatedBy(), entityLink.getEntityType(), assetUrl);
attachmentList.add(String.format("Task Type : %s", thread.getTask().getType().value()));
attachmentList.add(
String.format(
@@ -229,8 +282,8 @@ default OutgoingMessage createThreadMessage(ChangeEvent event) {
case POST_CREATED -> {
headerMessage =
String.format(
- "@%s posted a message on the Task with Id : %s",
- thread.getCreatedBy(), thread.getTask().getId());
+ "@%s posted a message on the Task with Id : %s for Asset %s",
+ thread.getCreatedBy(), thread.getTask().getId(), assetUrl);
thread
.getPosts()
.forEach(
@@ -243,8 +296,8 @@ default OutgoingMessage createThreadMessage(ChangeEvent event) {
case THREAD_UPDATED -> {
headerMessage =
String.format(
- "@%s posted update on the Task with Id : %s",
- thread.getUpdatedBy(), thread.getTask().getId());
+ "@%s posted update on the Task with Id : %s for Asset %s",
+ thread.getUpdatedBy(), thread.getTask().getId(), assetUrl);
attachmentList.add(String.format("Task Type : %s", thread.getTask().getType().value()));
attachmentList.add(
String.format(
@@ -258,15 +311,15 @@ default OutgoingMessage createThreadMessage(ChangeEvent event) {
case TASK_CLOSED -> {
headerMessage =
String.format(
- "@%s closed Task with Id : %s",
- thread.getCreatedBy(), thread.getTask().getId());
+ "@%s closed Task with Id : %s for Asset %s",
+ thread.getCreatedBy(), thread.getTask().getId(), assetUrl);
attachmentList.add(String.format("Current Status : %s", thread.getTask().getStatus()));
}
case TASK_RESOLVED -> {
headerMessage =
String.format(
- "@%s resolved Task with Id : %s",
- thread.getCreatedBy(), thread.getTask().getId());
+ "@%s resolved Task with Id : %s for Asset %s",
+ thread.getCreatedBy(), thread.getTask().getId(), assetUrl);
attachmentList.add(String.format("Current Status : %s", thread.getTask().getStatus()));
}
}
@@ -319,9 +372,23 @@ default OutgoingMessage createThreadMessage(ChangeEvent event) {
}
message.setHeader(headerMessage);
message.setMessages(attachmentList);
+
+ message.setEntityUrl(entityUrl);
return message;
}
+ default String getThreadAssetsUrl(
+ ThreadType threadType, MessageParser.EntityLink aboutEntityLink) {
+ try {
+ return this.buildThreadUrl(
+ threadType,
+ aboutEntityLink.getEntityType(),
+ Entity.getEntity(aboutEntityLink, "id", Include.ALL));
+ } catch (Exception ex) {
+ return "";
+ }
+ }
+
private String getDateString(long epochTimestamp) {
Instant instant = Instant.ofEpochSecond(epochTimestamp);
LocalDateTime localDateTime = LocalDateTime.ofInstant(instant, ZoneId.systemDefault());
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/SlackMessageDecorator.java b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/SlackMessageDecorator.java
index 2075eb236a69..b18446a08ac9 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/SlackMessageDecorator.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/formatter/decorators/SlackMessageDecorator.java
@@ -55,11 +55,11 @@ public String getRemoveMarkerClose() {
return "~";
}
- public String getEntityUrl(String entityType, String fqn, String additionalParams) {
+ public String getEntityUrl(String prefix, String fqn, String additionalParams) {
return String.format(
"<%s/%s/%s%s|%s>",
getSmtpSettings().getOpenMetadataUrl(),
- entityType,
+ prefix,
fqn.trim().replaceAll(" ", "%20"),
nullOrEmpty(additionalParams) ? "" : String.format("/%s", additionalParams),
fqn.trim());
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/AppRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/AppRepository.java
index 23d5d0ab7666..7789d3b25728 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/AppRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/AppRepository.java
@@ -20,6 +20,7 @@
import org.openmetadata.schema.type.Relationship;
import org.openmetadata.service.Entity;
import org.openmetadata.service.exception.EntityNotFoundException;
+import org.openmetadata.service.exception.UnhandledServerException;
import org.openmetadata.service.resources.apps.AppResource;
import org.openmetadata.service.security.jwt.JWTTokenGenerator;
import org.openmetadata.service.util.EntityUtil;
@@ -75,7 +76,7 @@ public EntityReference createNewAppBot(App application) {
User botUser;
Bot bot;
try {
- botUser = userRepository.findByName(botName, Include.NON_DELETED);
+ botUser = userRepository.getByName(null, botName, userRepository.getFields("id"));
} catch (EntityNotFoundException ex) {
// Get Bot Role
EntityReference roleRef =
@@ -88,6 +89,7 @@ public EntityReference createNewAppBot(App application) {
CreateUser createUser =
new CreateUser()
.withName(botName)
+ .withDisplayName(application.getDisplayName())
.withEmail(String.format("%s@openmetadata.org", botName))
.withIsAdmin(false)
.withIsBot(true)
@@ -135,15 +137,14 @@ public EntityReference createNewAppBot(App application) {
@Override
public void storeEntity(App entity, boolean update) {
- EntityReference botUserRef = entity.getBot();
EntityReference ownerRef = entity.getOwner();
- entity.withBot(null).withOwner(null);
+ entity.withOwner(null);
// Store
store(entity, update);
// Restore entity fields
- entity.withBot(botUserRef).withOwner(ownerRef);
+ entity.withOwner(ownerRef);
}
public EntityReference getBotUser(App application) {
@@ -209,6 +210,9 @@ protected void cleanup(App app) {
public AppRunRecord getLatestAppRuns(UUID appId) {
String json = daoCollection.appExtensionTimeSeriesDao().getLatestAppRun(appId);
+ if (json == null) {
+ throw new UnhandledServerException("No Available Application Run Records.");
+ }
return JsonUtils.readValue(json, AppRunRecord.class);
}
@@ -227,6 +231,7 @@ public void entitySpecificUpdate() {
recordChange(
"appConfiguration", original.getAppConfiguration(), updated.getAppConfiguration());
recordChange("appSchedule", original.getAppSchedule(), updated.getAppSchedule());
+ recordChange("bot", original.getBot(), updated.getBot());
}
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java
index bdd5e042120f..9110a0a9694b 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java
@@ -122,7 +122,6 @@
import org.openmetadata.schema.util.ServicesCount;
import org.openmetadata.schema.utils.EntityInterfaceUtil;
import org.openmetadata.service.Entity;
-import org.openmetadata.service.exception.UnhandledServerException;
import org.openmetadata.service.jdbi3.CollectionDAO.TagUsageDAO.TagLabelMapper;
import org.openmetadata.service.jdbi3.CollectionDAO.UsageDAO.UsageDetailsMapper;
import org.openmetadata.service.jdbi3.FeedRepository.FilterType;
@@ -3222,7 +3221,8 @@ List listWithEntityFilter(
List listWithoutEntityFilter(
@Bind("eventType") String eventType, @Bind("timestamp") long timestamp);
- @SqlQuery("SELECT json FROM change_event ORDER BY eventTime ASC LIMIT :limit OFFSET :offset")
+ @SqlQuery(
+ "SELECT json FROM change_event ce where ce.offset > :offset ORDER BY ce.eventTime ASC LIMIT :limit")
List list(@Bind("limit") long limit, @Bind("offset") long offset);
@SqlQuery("SELECT count(*) FROM change_event")
@@ -3698,7 +3698,7 @@ default String getLatestAppRun(UUID appId) {
if (!nullOrEmpty(result)) {
return result.get(0);
}
- throw new UnhandledServerException("No Available Application Run Records.");
+ return null;
}
}
@@ -3943,6 +3943,9 @@ interface SystemDAO {
@SqlUpdate(value = "DELETE from openmetadata_settings WHERE configType = :configType")
void delete(@Bind("configType") String configType);
+
+ @SqlQuery("SELECT 42")
+ Integer testConnection() throws StatementException;
}
class SettingsRowMapper implements RowMapper {
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ColumnUtil.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ColumnUtil.java
index b1654d6b47f1..e57f30f73bb5 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ColumnUtil.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ColumnUtil.java
@@ -1,5 +1,6 @@
package org.openmetadata.service.jdbi3;
+import static org.openmetadata.common.utils.CommonUtil.findChildren;
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
@@ -9,6 +10,7 @@
import java.util.Set;
import java.util.stream.Collectors;
import org.openmetadata.schema.type.Column;
+import org.openmetadata.schema.type.Field;
import org.openmetadata.schema.type.TagLabel;
import org.openmetadata.service.exception.CatalogExceptionMessage;
import org.openmetadata.service.util.FullyQualifiedName;
@@ -56,18 +58,21 @@ public static void setColumnFQN(String parentFQN, List columns) {
// Validate if a given column exists in the table
public static void validateColumnFQN(List columns, String columnFQN) {
- boolean validColumn = false;
- for (Column column : columns) {
- if (column.getFullyQualifiedName().equals(columnFQN)) {
- validColumn = true;
- break;
- }
- }
- if (!validColumn) {
+ boolean exists = findChildren(columns, "getChildren", columnFQN);
+ if (!exists) {
throw new IllegalArgumentException(CatalogExceptionMessage.invalidColumnFQN(columnFQN));
}
}
+ // validate if a given field exists in the topic
+ public static void validateFieldFQN(List fields, String fieldFQN) {
+ boolean exists = findChildren(fields, "getChildren", fieldFQN);
+ if (!exists) {
+ throw new IllegalArgumentException(
+ CatalogExceptionMessage.invalidFieldName("field", fieldFQN));
+ }
+ }
+
public static Set getAllTags(Column column) {
Set tags = new HashSet<>();
if (!listOrEmpty(column.getTags()).isEmpty()) {
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java
index 279824467dd2..21ec38fc5ae9 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java
@@ -124,6 +124,7 @@
import org.openmetadata.schema.type.LifeCycle;
import org.openmetadata.schema.type.ProviderType;
import org.openmetadata.schema.type.Relationship;
+import org.openmetadata.schema.type.SuggestionType;
import org.openmetadata.schema.type.TagLabel;
import org.openmetadata.schema.type.TaskType;
import org.openmetadata.schema.type.ThreadType;
@@ -1907,6 +1908,25 @@ public final EntityReference validateDomain(String domainFqn) {
return Entity.getEntityReferenceByName(Entity.DOMAIN, domainFqn, NON_DELETED);
}
+ public final void validateDomain(EntityReference domain) {
+ if (!supportsDomain) {
+ throw new IllegalArgumentException(CatalogExceptionMessage.invalidField(FIELD_DOMAIN));
+ }
+ Entity.getEntityReferenceById(Entity.DOMAIN, domain.getId(), NON_DELETED);
+ }
+
+ public final void validateDataProducts(List dataProducts) {
+ if (!supportsDataProducts) {
+ throw new IllegalArgumentException(CatalogExceptionMessage.invalidField(FIELD_DATA_PRODUCTS));
+ }
+
+ if (!nullOrEmpty(dataProducts)) {
+ for (EntityReference dataProduct : dataProducts) {
+ Entity.getEntityReferenceById(Entity.DATA_PRODUCT, dataProduct.getId(), NON_DELETED);
+ }
+ }
+ }
+
/** Override this method to support downloading CSV functionality */
public String exportToCsv(String name, String user) throws IOException {
throw new IllegalArgumentException(csvNotSupported(entityType));
@@ -1934,8 +1954,8 @@ public TaskWorkflow getTaskWorkflow(ThreadContext threadContext) {
}
}
- public SuggestionRepository.SuggestionWorkflow getSuggestionWorkflow(Suggestion suggestion) {
- return new SuggestionRepository.SuggestionWorkflow(suggestion);
+ public SuggestionRepository.SuggestionWorkflow getSuggestionWorkflow(EntityInterface entity) {
+ return new SuggestionRepository.SuggestionWorkflow(entity);
}
public EntityInterface applySuggestion(
@@ -1943,6 +1963,13 @@ public EntityInterface applySuggestion(
return entity;
}
+ /**
+ * Bring in the necessary fields required to have all the information before applying a suggestion
+ */
+ public String getSuggestionFields(Suggestion suggestion) {
+ return suggestion.getType() == SuggestionType.SuggestTagLabel ? "tags" : "";
+ }
+
public final void validateTaskThread(ThreadContext threadContext) {
ThreadType threadType = threadContext.getThread().getType();
if (threadType != ThreadType.Task) {
@@ -2263,6 +2290,7 @@ && recordChange(FIELD_DOMAIN, origDomain, updatedDomain, true, entityReferenceMa
origDomain.getId(), Entity.DOMAIN, original.getId(), entityType, Relationship.HAS);
}
if (updatedDomain != null) {
+ validateDomain(updatedDomain);
// Add relationship owner --- owns ---> ownedEntity
LOG.info(
"Adding domain {} for entity {}",
@@ -2283,6 +2311,7 @@ private void updateDataProducts() {
}
List origDataProducts = listOrEmpty(original.getDataProducts());
List updatedDataProducts = listOrEmpty(updated.getDataProducts());
+ validateDataProducts(updatedDataProducts);
updateFromRelationships(
FIELD_DATA_PRODUCTS,
DATA_PRODUCT,
@@ -2299,6 +2328,7 @@ private void updateExperts() {
}
List origExperts = getEntityReferences(original.getExperts());
List updatedExperts = getEntityReferences(updated.getExperts());
+ validateUsers(updatedExperts);
updateToRelationships(
FIELD_EXPERTS,
entityType,
@@ -2317,6 +2347,7 @@ private void updateReviewers() {
}
List origReviewers = getEntityReferences(original.getReviewers());
List updatedReviewers = getEntityReferences(updated.getReviewers());
+ validateUsers(updatedReviewers);
updateFromRelationships(
"reviewers",
Entity.USER,
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java
index e9fdc6bd1fbb..08a41c610456 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java
@@ -64,6 +64,7 @@
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.EventType;
import org.openmetadata.schema.type.Include;
+import org.openmetadata.schema.type.MetadataOperation;
import org.openmetadata.schema.type.Post;
import org.openmetadata.schema.type.Reaction;
import org.openmetadata.schema.type.Relationship;
@@ -85,6 +86,9 @@
import org.openmetadata.service.resources.feeds.MessageParser;
import org.openmetadata.service.resources.feeds.MessageParser.EntityLink;
import org.openmetadata.service.security.AuthorizationException;
+import org.openmetadata.service.security.Authorizer;
+import org.openmetadata.service.security.policyevaluator.OperationContext;
+import org.openmetadata.service.security.policyevaluator.ResourceContext;
import org.openmetadata.service.util.EntityUtil;
import org.openmetadata.service.util.FullyQualifiedName;
import org.openmetadata.service.util.JsonUtils;
@@ -785,11 +789,12 @@ public final PatchResponse patchThread(
}
public void checkPermissionsForResolveTask(
- Thread thread, boolean closeTask, SecurityContext securityContext) {
+ Authorizer authorizer, Thread thread, boolean closeTask, SecurityContext securityContext) {
String userName = securityContext.getUserPrincipal().getName();
User user = Entity.getEntityByName(USER, userName, TEAMS_FIELD, NON_DELETED);
EntityLink about = EntityLink.parse(thread.getAbout());
EntityReference aboutRef = EntityUtil.validateEntityLink(about);
+ ThreadContext threadContext = getThreadContext(thread);
if (Boolean.TRUE.equals(user.getIsAdmin())) {
return; // Allow admin resolve/close task
}
@@ -799,9 +804,25 @@ public void checkPermissionsForResolveTask(
// Allow if user created the task to close task (and not resolve task)
EntityReference owner = Entity.getOwner(aboutRef);
List assignees = thread.getTask().getAssignees();
- if (assignees.stream().anyMatch(assignee -> assignee.getName().equals(userName))
- || owner.getName().equals(userName)
- || closeTask && thread.getCreatedBy().equals(userName)) {
+ if (owner.getName().equals(userName) || closeTask && thread.getCreatedBy().equals(userName)) {
+ return;
+ }
+
+ // Allow if user is an assignee of the task and if the assignee has permissions to update the
+ // entity
+ if (assignees.stream().anyMatch(assignee -> assignee.getName().equals(userName))) {
+ // If entity does not exist, this is a create operation, else update operation
+ ResourceContext resourceContext =
+ new ResourceContext<>(aboutRef.getType(), aboutRef.getId(), null);
+ if (EntityUtil.isDescriptionTask(threadContext.getTaskWorkflow().getTaskType())) {
+ OperationContext operationContext =
+ new OperationContext(aboutRef.getType(), MetadataOperation.EDIT_DESCRIPTION);
+ authorizer.authorize(securityContext, operationContext, resourceContext);
+ } else if (EntityUtil.isTagTask(threadContext.getTaskWorkflow().getTaskType())) {
+ OperationContext operationContext =
+ new OperationContext(aboutRef.getType(), MetadataOperation.EDIT_TAGS);
+ authorizer.authorize(securityContext, operationContext, resourceContext);
+ }
return;
}
@@ -913,7 +934,7 @@ private boolean fieldsChanged(Post original, Post updated) {
}
private boolean fieldsChanged(Thread original, Thread updated) {
- // Patch supports isResolved, message, task assignees, reactions, and announcements for now
+ // Patch supports isResolved, message, task assignees, reactions, announcements and AI for now
return !original.getResolved().equals(updated.getResolved())
|| !original.getMessage().equals(updated.getMessage())
|| (Collections.isEmpty(original.getReactions())
@@ -935,6 +956,10 @@ private boolean fieldsChanged(Thread original, Thread updated) {
|| !Objects.equals(
original.getAnnouncement().getEndTime(),
updated.getAnnouncement().getEndTime())))
+ || (original.getChatbot() == null && updated.getChatbot() != null)
+ || (original.getChatbot() != null
+ && updated.getChatbot() != null
+ && !original.getChatbot().getQuery().equals(updated.getChatbot().getQuery()))
|| (original.getTask() != null
&& (original.getTask().getAssignees().size() != updated.getTask().getAssignees().size()
|| !original
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/GlossaryTermRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/GlossaryTermRepository.java
index fadb55a78ff2..3846818b9cd5 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/GlossaryTermRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/GlossaryTermRepository.java
@@ -90,7 +90,7 @@
import org.openmetadata.service.util.EntityUtil.Fields;
import org.openmetadata.service.util.FullyQualifiedName;
import org.openmetadata.service.util.JsonUtils;
-import org.openmetadata.service.util.NotificationHandler;
+import org.openmetadata.service.util.WebsocketNotificationHandler;
@Slf4j
public class GlossaryTermRepository extends EntityRepository {
@@ -655,7 +655,7 @@ private void createApprovalTask(GlossaryTerm entity, List paren
feedRepository.create(thread);
// Send WebSocket Notification
- NotificationHandler.handleTaskNotification(thread);
+ WebsocketNotificationHandler.handleTaskNotification(thread);
}
private void closeApprovalTask(GlossaryTerm entity, String comment) {
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/LineageRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/LineageRepository.java
index dbf68dd939ba..09defe6a2725 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/LineageRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/LineageRepository.java
@@ -13,6 +13,12 @@
package org.openmetadata.service.jdbi3;
+import static org.openmetadata.service.Entity.CONTAINER;
+import static org.openmetadata.service.Entity.DASHBOARD;
+import static org.openmetadata.service.Entity.DASHBOARD_DATA_MODEL;
+import static org.openmetadata.service.Entity.MLMODEL;
+import static org.openmetadata.service.Entity.TABLE;
+import static org.openmetadata.service.Entity.TOPIC;
import static org.openmetadata.service.search.SearchClient.GLOBAL_SEARCH_ALIAS;
import static org.openmetadata.service.search.SearchClient.REMOVE_LINEAGE_SCRIPT;
@@ -21,14 +27,17 @@
import java.util.List;
import java.util.Map;
import java.util.UUID;
-import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.jdbi.v3.sqlobject.transaction.Transaction;
import org.openmetadata.common.utils.CommonUtil;
-import org.openmetadata.schema.ColumnsEntityInterface;
import org.openmetadata.schema.api.lineage.AddLineage;
+import org.openmetadata.schema.entity.data.Container;
+import org.openmetadata.schema.entity.data.Dashboard;
+import org.openmetadata.schema.entity.data.DashboardDataModel;
+import org.openmetadata.schema.entity.data.MlModel;
import org.openmetadata.schema.entity.data.Table;
+import org.openmetadata.schema.entity.data.Topic;
import org.openmetadata.schema.type.ColumnLineage;
import org.openmetadata.schema.type.Edge;
import org.openmetadata.schema.type.EntityLineage;
@@ -37,17 +46,17 @@
import org.openmetadata.schema.type.LineageDetails;
import org.openmetadata.schema.type.Relationship;
import org.openmetadata.service.Entity;
+import org.openmetadata.service.exception.CatalogExceptionMessage;
import org.openmetadata.service.jdbi3.CollectionDAO.EntityRelationshipRecord;
import org.openmetadata.service.search.SearchClient;
import org.openmetadata.service.search.models.IndexMapping;
-import org.openmetadata.service.util.FullyQualifiedName;
import org.openmetadata.service.util.JsonUtils;
@Repository
public class LineageRepository {
private final CollectionDAO dao;
- public SearchClient searchClient = Entity.getSearchRepository().getSearchClient();
+ private static final SearchClient searchClient = Entity.getSearchRepository().getSearchClient();
public LineageRepository() {
this.dao = Entity.getCollectionDAO();
@@ -173,41 +182,86 @@ private String validateLineageDetails(
if (details == null) {
return null;
}
-
List columnsLineage = details.getColumnsLineage();
if (columnsLineage != null && !columnsLineage.isEmpty()) {
- if (areValidEntities(from, to)) {
- throw new IllegalArgumentException(
- "Column level lineage is only allowed between two tables or from table to dashboard.");
- }
- Table fromTable = dao.tableDAO().findEntityById(from.getId());
- ColumnsEntityInterface toTable = getToEntity(to);
for (ColumnLineage columnLineage : columnsLineage) {
for (String fromColumn : columnLineage.getFromColumns()) {
- // From column belongs to the fromNode
- if (fromColumn.startsWith(fromTable.getFullyQualifiedName())) {
- ColumnUtil.validateColumnFQN(fromTable.getColumns(), fromColumn);
- } else {
- Table otherTable =
- dao.tableDAO().findEntityByName(FullyQualifiedName.getTableFQN(fromColumn));
- ColumnUtil.validateColumnFQN(otherTable.getColumns(), fromColumn);
- }
+ validateChildren(fromColumn, from);
}
- ColumnUtil.validateColumnFQN(toTable.getColumns(), columnLineage.getToColumn());
+ validateChildren(columnLineage.getToColumn(), to);
}
}
return JsonUtils.pojoToJson(details);
}
- private ColumnsEntityInterface getToEntity(EntityReference from) {
- return from.getType().equals(Entity.TABLE)
- ? dao.tableDAO().findEntityById(from.getId())
- : dao.dashboardDataModelDAO().findEntityById(from.getId());
+ private void validateChildren(String columnFQN, EntityReference entityReference) {
+ switch (entityReference.getType()) {
+ case TABLE -> {
+ Table table =
+ Entity.getEntity(TABLE, entityReference.getId(), "columns", Include.NON_DELETED);
+ ColumnUtil.validateColumnFQN(table.getColumns(), columnFQN);
+ }
+ case TOPIC -> {
+ Topic topic =
+ Entity.getEntity(TOPIC, entityReference.getId(), "messageSchema", Include.NON_DELETED);
+ ColumnUtil.validateFieldFQN(topic.getMessageSchema().getSchemaFields(), columnFQN);
+ }
+ case CONTAINER -> {
+ Container container =
+ Entity.getEntity(CONTAINER, entityReference.getId(), "dataModel", Include.NON_DELETED);
+ ColumnUtil.validateColumnFQN(container.getDataModel().getColumns(), columnFQN);
+ }
+ case DASHBOARD_DATA_MODEL -> {
+ DashboardDataModel dashboardDataModel =
+ Entity.getEntity(
+ DASHBOARD_DATA_MODEL, entityReference.getId(), "columns", Include.NON_DELETED);
+ ColumnUtil.validateColumnFQN(dashboardDataModel.getColumns(), columnFQN);
+ }
+ case DASHBOARD -> {
+ Dashboard dashboard =
+ Entity.getEntity(DASHBOARD, entityReference.getId(), "charts", Include.NON_DELETED);
+ dashboard.getCharts().stream()
+ .filter(c -> c.getFullyQualifiedName().equals(columnFQN))
+ .findAny()
+ .orElseThrow(
+ () ->
+ new IllegalArgumentException(
+ CatalogExceptionMessage.invalidFieldName("chart", columnFQN)));
+ }
+ case MLMODEL -> {
+ MlModel mlModel =
+ Entity.getEntity(MLMODEL, entityReference.getId(), "", Include.NON_DELETED);
+ mlModel.getMlFeatures().stream()
+ .filter(f -> f.getFullyQualifiedName().equals(columnFQN))
+ .findAny()
+ .orElseThrow(
+ () ->
+ new IllegalArgumentException(
+ CatalogExceptionMessage.invalidFieldName("feature", columnFQN)));
+ }
+ default -> throw new IllegalArgumentException(
+ String.format("Unsupported Entity Type %s for lineage", entityReference.getType()));
+ }
}
- private boolean areValidEntities(EntityReference from, EntityReference to) {
- return !from.getType().equals(Entity.TABLE)
- || !(to.getType().equals(Entity.TABLE) || to.getType().equals(Entity.DASHBOARD_DATA_MODEL));
+ @Transaction
+ public boolean deleteLineageByFQN(
+ String fromEntity, String fromFQN, String toEntity, String toFQN) {
+ EntityReference from =
+ Entity.getEntityReferenceByName(fromEntity, fromFQN, Include.NON_DELETED);
+ EntityReference to = Entity.getEntityReferenceByName(toEntity, toFQN, Include.NON_DELETED);
+ // Finally, delete lineage relationship
+ boolean result =
+ dao.relationshipDAO()
+ .delete(
+ from.getId(),
+ from.getType(),
+ to.getId(),
+ to.getType(),
+ Relationship.UPSTREAM.ordinal())
+ > 0;
+ deleteLineageFromSearch(from, to);
+ return result;
}
@Transaction
@@ -260,7 +314,7 @@ private EntityLineage getLineage(
getDownstreamLineage(primary.getId(), primary.getType(), lineage, downstreamDepth);
// Remove duplicate nodes
- lineage.withNodes(lineage.getNodes().stream().distinct().collect(Collectors.toList()));
+ lineage.withNodes(lineage.getNodes().stream().distinct().toList());
return lineage;
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java
index ab71d19a0565..11c81f97c3bc 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java
@@ -49,24 +49,26 @@ public String getCondition() {
}
public String getCondition(String tableName) {
- String condition = getIncludeCondition(tableName);
- condition = addCondition(condition, getDatabaseCondition(tableName));
- condition = addCondition(condition, getDatabaseSchemaCondition(tableName));
- condition = addCondition(condition, getServiceCondition(tableName));
- condition = addCondition(condition, getPipelineTypeCondition(tableName));
- condition = addCondition(condition, getParentCondition(tableName));
- condition = addCondition(condition, getDisabledCondition());
- condition = addCondition(condition, getCategoryCondition(tableName));
- condition = addCondition(condition, getWebhookCondition(tableName));
- condition = addCondition(condition, getWebhookTypeCondition(tableName));
- condition = addCondition(condition, getTestCaseCondition());
- condition = addCondition(condition, getTestSuiteTypeCondition(tableName));
- condition = addCondition(condition, getTestSuiteFQNCondition());
- condition = addCondition(condition, getDomainCondition());
- condition = addCondition(condition, getEntityFQNHashCondition());
- condition = addCondition(condition, getTestCaseResolutionStatusType());
- condition = addCondition(condition, getAssignee());
- condition = addCondition(condition, getEventSubscriptionAlertType());
+ ArrayList conditions = new ArrayList<>();
+ conditions.add(getIncludeCondition(tableName));
+ conditions.add(getDatabaseCondition(tableName));
+ conditions.add(getDatabaseSchemaCondition(tableName));
+ conditions.add(getServiceCondition(tableName));
+ conditions.add(getPipelineTypeCondition(tableName));
+ conditions.add(getParentCondition(tableName));
+ conditions.add(getDisabledCondition());
+ conditions.add(getCategoryCondition(tableName));
+ conditions.add(getWebhookCondition(tableName));
+ conditions.add(getWebhookTypeCondition(tableName));
+ conditions.add(getTestCaseCondition());
+ conditions.add(getTestSuiteTypeCondition(tableName));
+ conditions.add(getTestSuiteFQNCondition());
+ conditions.add(getDomainCondition());
+ conditions.add(getEntityFQNHashCondition());
+ conditions.add(getTestCaseResolutionStatusType());
+ conditions.add(getAssignee());
+ conditions.add(getEventSubscriptionAlertType());
+ String condition = addCondition(conditions);
return condition.isEmpty() ? "WHERE TRUE" : "WHERE " + condition;
}
@@ -199,27 +201,44 @@ public String getPipelineTypeCondition(String tableName) {
}
private String getTestCaseCondition() {
- String condition1 = "";
+ ArrayList conditions = new ArrayList<>();
+
String entityFQN = getQueryParam("entityFQN");
boolean includeAllTests = Boolean.parseBoolean(getQueryParam("includeAllTests"));
+ String status = getQueryParam("testCaseStatus");
+ String testSuiteId = getQueryParam("testSuiteId");
+ String type = getQueryParam("testCaseType");
+
if (entityFQN != null) {
- condition1 =
+ conditions.add(
includeAllTests
? String.format(
- "entityFQN LIKE '%s%s%%' OR entityFQN = '%s'",
+ "(entityFQN LIKE '%s%s%%' OR entityFQN = '%s')",
escape(entityFQN), Entity.SEPARATOR, escapeApostrophe(entityFQN))
- : String.format("entityFQN = '%s'", escapeApostrophe(entityFQN));
+ : String.format("entityFQN = '%s'", escapeApostrophe(entityFQN)));
}
- String condition2 = "";
- String testSuiteId = getQueryParam("testSuiteId");
if (testSuiteId != null) {
- condition2 =
+ conditions.add(
String.format(
"id IN (SELECT toId FROM entity_relationship WHERE fromId='%s' AND toEntity='%s' AND relation=%d AND fromEntity='%s')",
- testSuiteId, Entity.TEST_CASE, Relationship.CONTAINS.ordinal(), Entity.TEST_SUITE);
+ testSuiteId, Entity.TEST_CASE, Relationship.CONTAINS.ordinal(), Entity.TEST_SUITE));
}
- return addCondition(condition1, condition2);
+
+ if (status != null) {
+ conditions.add(String.format("status = '%s'", status));
+ }
+
+ if (type != null) {
+ conditions.add(
+ switch (type) {
+ case "table" -> "entityLink NOT LIKE '%::columns::%'";
+ case "column" -> "entityLink LIKE '%::columns::%'";
+ default -> "";
+ });
+ }
+
+ return addCondition(conditions);
}
private String getTestSuiteTypeCondition(String tableName) {
@@ -312,14 +331,19 @@ private String getStatusPrefixCondition(String tableName, String statusPrefix) {
: String.format("%s.status LIKE '%s%s%%'", tableName, statusPrefix, "");
}
- protected String addCondition(String condition1, String condition2) {
- if (condition1.isEmpty()) {
- return condition2;
- }
- if (condition2.isEmpty()) {
- return condition1;
+ protected String addCondition(List conditions) {
+ StringBuffer condition = new StringBuffer();
+
+ for (String c : conditions) {
+ if (!c.isEmpty()) {
+ if (!condition.isEmpty()) {
+ // Add `AND` between conditions
+ condition.append(" AND ");
+ }
+ condition.append(c);
+ }
}
- return condition1 + " AND " + condition2;
+ return condition.toString();
}
public static String escapeApostrophe(String name) {
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/MigrationDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/MigrationDAO.java
index 3fdf445973b2..e47b7417ebbd 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/MigrationDAO.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/MigrationDAO.java
@@ -129,6 +129,9 @@ void upsertServerMigrationSQL(
@RegisterRowMapper(FromServerChangeLogMapper.class)
List listMetricsFromDBMigrations();
+ @SqlQuery("SELECT version FROM SERVER_CHANGE_LOG")
+ List getMigrationVersions();
+
@Getter
@Setter
class ServerMigrationSQLTable {
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/PolicyRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/PolicyRepository.java
index bd19c2377085..d7f8ecdac079 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/PolicyRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/PolicyRepository.java
@@ -132,7 +132,9 @@ public void validateRules(Policy policy) {
public static List filterRedundantResources(List resources) {
// If ALL_RESOURCES are in the resource list, remove redundant resources specifically mentioned
boolean containsAllResources = resources.stream().anyMatch(ALL_RESOURCES::equalsIgnoreCase);
- return containsAllResources ? new ArrayList<>(List.of(ALL_RESOURCES)) : resources;
+ return containsAllResources
+ ? new ArrayList<>(List.of(ALL_RESOURCES))
+ : new ArrayList<>(resources);
}
public static List filterRedundantOperations(
@@ -142,9 +144,7 @@ public static List filterRedundantOperations(
boolean containsViewAll = operations.stream().anyMatch(o -> o.equals(VIEW_ALL));
if (containsViewAll) {
operations =
- operations.stream()
- .filter(o -> o.equals(VIEW_ALL) || !isViewOperation(o))
- .collect(Collectors.toList());
+ operations.stream().filter(o -> o.equals(VIEW_ALL) || !isViewOperation(o)).toList();
}
// If EDIT_ALL is in the operation list, remove all the other specific edit operations that are
@@ -152,11 +152,9 @@ public static List filterRedundantOperations(
boolean containsEditAll = operations.stream().anyMatch(o -> o.equals(EDIT_ALL));
if (containsEditAll) {
operations =
- operations.stream()
- .filter(o -> o.equals(EDIT_ALL) || !isEditOperation(o))
- .collect(Collectors.toList());
+ operations.stream().filter(o -> o.equals(EDIT_ALL) || !isEditOperation(o)).toList();
}
- return operations;
+ return new ArrayList<>(operations);
}
/** Handles entity updated from PUT and POST operation. */
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SuggestionFilter.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SuggestionFilter.java
index 3bfd327433c2..99ef71072874 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SuggestionFilter.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SuggestionFilter.java
@@ -24,7 +24,7 @@ public String getCondition(boolean includePagination) {
StringBuilder condition = new StringBuilder();
condition.append("WHERE TRUE ");
if (suggestionType != null) {
- condition.append(String.format(" AND type = '%s' ", suggestionType.value()));
+ condition.append(String.format(" AND suggestionType = '%s' ", suggestionType.value()));
}
if (suggestionStatus != null) {
condition.append(String.format(" AND status = '%s' ", suggestionStatus.value()));
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SuggestionRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SuggestionRepository.java
index 7b442f655416..e9061c04964b 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SuggestionRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SuggestionRepository.java
@@ -16,7 +16,6 @@
import java.util.List;
import java.util.UUID;
import javax.json.JsonPatch;
-import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.SecurityContext;
import javax.ws.rs.core.UriInfo;
@@ -28,9 +27,11 @@
import org.openmetadata.schema.entity.teams.Team;
import org.openmetadata.schema.entity.teams.User;
import org.openmetadata.schema.type.EntityReference;
+import org.openmetadata.schema.type.MetadataOperation;
import org.openmetadata.schema.type.SuggestionStatus;
import org.openmetadata.schema.type.SuggestionType;
import org.openmetadata.schema.type.TagLabel;
+import org.openmetadata.sdk.exception.SuggestionException;
import org.openmetadata.service.Entity;
import org.openmetadata.service.ResourceRegistry;
import org.openmetadata.service.exception.CatalogExceptionMessage;
@@ -154,32 +155,31 @@ public void deleteSuggestionInternalForAnEntity(EntityInterface entity) {
@Getter
public static class SuggestionWorkflow {
- protected final Suggestion suggestion;
- protected final MessageParser.EntityLink entityLink;
+ // The workflow is applied to a specific entity at a time
+ protected final EntityInterface entity;
- SuggestionWorkflow(Suggestion suggestion) {
- this.suggestion = suggestion;
- this.entityLink = MessageParser.EntityLink.parse(suggestion.getEntityLink());
+ SuggestionWorkflow(EntityInterface entity) {
+ this.entity = entity;
}
- public EntityInterface acceptSuggestions(
- EntityRepository> repository, EntityInterface entityInterface) {
+ public EntityInterface acceptSuggestion(Suggestion suggestion, EntityInterface entity) {
+ MessageParser.EntityLink entityLink =
+ MessageParser.EntityLink.parse(suggestion.getEntityLink());
if (entityLink.getFieldName() != null) {
- entityInterface =
- repository.applySuggestion(
- entityInterface, entityLink.getFullyQualifiedFieldValue(), suggestion);
- return entityInterface;
+ EntityRepository> repository = Entity.getEntityRepository(entityLink.getEntityType());
+ return repository.applySuggestion(
+ entity, entityLink.getFullyQualifiedFieldValue(), suggestion);
} else {
if (suggestion.getType().equals(SuggestionType.SuggestTagLabel)) {
- List tags = new ArrayList<>(entityInterface.getTags());
+ List tags = new ArrayList<>(entity.getTags());
tags.addAll(suggestion.getTagLabels());
- entityInterface.setTags(tags);
- return entityInterface;
+ entity.setTags(tags);
+ return entity;
} else if (suggestion.getType().equals(SuggestionType.SuggestDescription)) {
- entityInterface.setDescription(suggestion.getDescription());
- return entityInterface;
+ entity.setDescription(suggestion.getDescription());
+ return entity;
} else {
- throw new WebApplicationException("Invalid suggestion Type");
+ throw new SuggestionException("Invalid suggestion Type");
}
}
}
@@ -190,26 +190,43 @@ public RestUtil.PutResponse acceptSuggestion(
Suggestion suggestion,
SecurityContext securityContext,
Authorizer authorizer) {
- suggestion.setStatus(SuggestionStatus.Accepted);
acceptSuggestion(suggestion, securityContext, authorizer);
Suggestion updatedHref = SuggestionsResource.addHref(uriInfo, suggestion);
return new RestUtil.PutResponse<>(Response.Status.OK, updatedHref, SUGGESTION_ACCEPTED);
}
+ public RestUtil.PutResponse> acceptSuggestionList(
+ UriInfo uriInfo,
+ List suggestions,
+ SuggestionType suggestionType,
+ SecurityContext securityContext,
+ Authorizer authorizer) {
+ acceptSuggestionList(suggestions, suggestionType, securityContext, authorizer);
+ List updatedHref =
+ suggestions.stream()
+ .map(suggestion -> SuggestionsResource.addHref(uriInfo, suggestion))
+ .toList();
+ return new RestUtil.PutResponse<>(Response.Status.OK, updatedHref, SUGGESTION_ACCEPTED);
+ }
+
protected void acceptSuggestion(
Suggestion suggestion, SecurityContext securityContext, Authorizer authorizer) {
String user = securityContext.getUserPrincipal().getName();
MessageParser.EntityLink entityLink =
MessageParser.EntityLink.parse(suggestion.getEntityLink());
+ EntityRepository> repository = Entity.getEntityRepository(entityLink.getEntityType());
EntityInterface entity =
- Entity.getEntity(
- entityLink, suggestion.getType() == SuggestionType.SuggestTagLabel ? "tags" : "", ALL);
+ Entity.getEntity(entityLink, repository.getSuggestionFields(suggestion), ALL);
+ // Prepare the original JSON before updating the Entity, otherwise we get an empty patch
String origJson = JsonUtils.pojoToJson(entity);
- SuggestionWorkflow suggestionWorkflow = getSuggestionWorkflow(suggestion);
- EntityRepository> repository = Entity.getEntityRepository(entityLink.getEntityType());
- EntityInterface updatedEntity = suggestionWorkflow.acceptSuggestions(repository, entity);
+ SuggestionWorkflow suggestionWorkflow = repository.getSuggestionWorkflow(entity);
+
+ EntityInterface updatedEntity = suggestionWorkflow.acceptSuggestion(suggestion, entity);
String updatedEntityJson = JsonUtils.pojoToJson(updatedEntity);
+
+ // Patch the entity with the updated suggestions
JsonPatch patch = JsonUtils.getJsonPatch(origJson, updatedEntityJson);
+
OperationContext operationContext = new OperationContext(entityLink.getEntityType(), patch);
authorizer.authorize(
securityContext,
@@ -220,6 +237,60 @@ protected void acceptSuggestion(
update(suggestion, user);
}
+ @Transaction
+ protected void acceptSuggestionList(
+ List suggestions,
+ SuggestionType suggestionType,
+ SecurityContext securityContext,
+ Authorizer authorizer) {
+ String user = securityContext.getUserPrincipal().getName();
+
+ // Entity being updated
+ EntityInterface entity = null;
+ EntityRepository> repository = null;
+ String origJson = null;
+ SuggestionWorkflow suggestionWorkflow = null;
+
+ for (Suggestion suggestion : suggestions) {
+ MessageParser.EntityLink entityLink =
+ MessageParser.EntityLink.parse(suggestion.getEntityLink());
+
+ // Validate all suggestions indeed talk about the same entity
+ if (entity == null) {
+ // Initialize the Entity and the Repository
+ entity =
+ Entity.getEntity(
+ entityLink,
+ suggestionType == SuggestionType.SuggestTagLabel ? "tags" : "",
+ NON_DELETED);
+ repository = Entity.getEntityRepository(entityLink.getEntityType());
+ origJson = JsonUtils.pojoToJson(entity);
+ suggestionWorkflow = repository.getSuggestionWorkflow(entity);
+ } else if (!entity.getFullyQualifiedName().equals(entityLink.getEntityFQN())) {
+ throw new SuggestionException("All suggestions must be for the same entity");
+ }
+ // update entity with the suggestion
+ entity = suggestionWorkflow.acceptSuggestion(suggestion, entity);
+ }
+
+ // Patch the entity with the updated suggestions
+ String updatedEntityJson = JsonUtils.pojoToJson(entity);
+ JsonPatch patch = JsonUtils.getJsonPatch(origJson, updatedEntityJson);
+
+ OperationContext operationContext = new OperationContext(repository.getEntityType(), patch);
+ authorizer.authorize(
+ securityContext,
+ operationContext,
+ new ResourceContext<>(repository.getEntityType(), entity.getId(), null));
+ repository.patch(null, entity.getId(), user, patch);
+
+ // Only mark the suggestions as accepted after the entity has been successfully updated
+ for (Suggestion suggestion : suggestions) {
+ suggestion.setStatus(SuggestionStatus.Accepted);
+ update(suggestion, user);
+ }
+ }
+
public RestUtil.PutResponse rejectSuggestion(
UriInfo uriInfo, Suggestion suggestion, String user) {
suggestion.setStatus(SuggestionStatus.Rejected);
@@ -228,6 +299,17 @@ public RestUtil.PutResponse rejectSuggestion(
return new RestUtil.PutResponse<>(Response.Status.OK, updatedHref, SUGGESTION_REJECTED);
}
+ @Transaction
+ public RestUtil.PutResponse> rejectSuggestionList(
+ UriInfo uriInfo, List suggestions, String user) {
+ for (Suggestion suggestion : suggestions) {
+ suggestion.setStatus(SuggestionStatus.Rejected);
+ update(suggestion, user);
+ SuggestionsResource.addHref(uriInfo, suggestion);
+ }
+ return new RestUtil.PutResponse<>(Response.Status.OK, suggestions, SUGGESTION_REJECTED);
+ }
+
public void checkPermissionsForUpdateSuggestion(
Suggestion suggestion, SecurityContext securityContext) {
String userName = securityContext.getUserPrincipal().getName();
@@ -272,11 +354,23 @@ public void checkPermissionsForAcceptOrRejectSuggestion(
}
}
- public SuggestionWorkflow getSuggestionWorkflow(Suggestion suggestion) {
+ public void checkPermissionsForEditEntity(
+ Suggestion suggestion,
+ SuggestionType suggestionType,
+ SecurityContext securityContext,
+ Authorizer authorizer) {
MessageParser.EntityLink entityLink =
MessageParser.EntityLink.parse(suggestion.getEntityLink());
- EntityRepository> repository = Entity.getEntityRepository(entityLink.getEntityType());
- return repository.getSuggestionWorkflow(suggestion);
+ EntityInterface entity = Entity.getEntity(entityLink, "", NON_DELETED);
+ // Check that the user has the right permissions to update the entity
+ authorizer.authorize(
+ securityContext,
+ new OperationContext(
+ entityLink.getEntityType(),
+ suggestionType == SuggestionType.SuggestTagLabel
+ ? MetadataOperation.EDIT_TAGS
+ : MetadataOperation.EDIT_DESCRIPTION),
+ new ResourceContext<>(entityLink.getEntityType(), entity.getId(), null));
}
public int listCount(SuggestionFilter filter) {
@@ -333,4 +427,9 @@ private List getSuggestionList(List jsons) {
}
return suggestions;
}
+
+ public final List listAll(SuggestionFilter filter) {
+ ResultList suggestionList = listAfter(filter, Integer.MAX_VALUE - 1, "");
+ return suggestionList.getData();
+ }
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SystemRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SystemRepository.java
index 94c92c91908d..28f308d8dcec 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SystemRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/SystemRepository.java
@@ -13,16 +13,26 @@
import org.jdbi.v3.sqlobject.transaction.Transaction;
import org.openmetadata.schema.api.configuration.SlackAppConfiguration;
import org.openmetadata.schema.email.SmtpSettings;
+import org.openmetadata.schema.entity.services.ingestionPipelines.PipelineServiceClientResponse;
+import org.openmetadata.schema.services.connections.metadata.OpenMetadataConnection;
import org.openmetadata.schema.settings.Settings;
import org.openmetadata.schema.settings.SettingsType;
+import org.openmetadata.schema.system.StepValidation;
+import org.openmetadata.schema.system.ValidationResponse;
import org.openmetadata.schema.util.EntitiesCount;
import org.openmetadata.schema.util.ServicesCount;
+import org.openmetadata.sdk.PipelineServiceClient;
import org.openmetadata.service.Entity;
+import org.openmetadata.service.OpenMetadataApplicationConfig;
import org.openmetadata.service.exception.CustomExceptionMessage;
import org.openmetadata.service.fernet.Fernet;
import org.openmetadata.service.jdbi3.CollectionDAO.SystemDAO;
+import org.openmetadata.service.migration.MigrationValidationClient;
import org.openmetadata.service.resources.settings.SettingsCache;
+import org.openmetadata.service.search.SearchRepository;
+import org.openmetadata.service.security.JwtFilter;
import org.openmetadata.service.util.JsonUtils;
+import org.openmetadata.service.util.OpenMetadataConnectionBuilder;
import org.openmetadata.service.util.RestUtil;
import org.openmetadata.service.util.ResultList;
@@ -32,10 +42,28 @@ public class SystemRepository {
private static final String FAILED_TO_UPDATE_SETTINGS = "Failed to Update Settings";
public static final String INTERNAL_SERVER_ERROR_WITH_REASON = "Internal Server Error. Reason :";
private final SystemDAO dao;
+ private final MigrationValidationClient migrationValidationClient;
+
+ private enum ValidationStepDescription {
+ DATABASE("Validate that we can properly run a query against the configured database."),
+ SEARCH("Validate that the search client is available."),
+ PIPELINE_SERVICE_CLIENT("Validate that the pipeline service client is available."),
+ JWT_TOKEN("Validate that the ingestion-bot JWT token can be properly decoded."),
+ MIGRATION("Validate that all the necessary migrations have been properly executed.");
+
+ public final String key;
+
+ ValidationStepDescription(String param) {
+ this.key = param;
+ }
+ }
+
+ private static final String INDEX_NAME = "table_search_index";
public SystemRepository() {
this.dao = Entity.getCollectionDAO().systemDAO();
Entity.setSystemRepository(this);
+ migrationValidationClient = MigrationValidationClient.getInstance();
}
public EntitiesCount getAllEntitiesCount(ListFilter filter) {
@@ -210,4 +238,98 @@ public static SlackAppConfiguration decryptSlackAppSetting(String encryptedSetti
}
return JsonUtils.readValue(encryptedSetting, SlackAppConfiguration.class);
}
+
+ public ValidationResponse validateSystem(
+ OpenMetadataApplicationConfig applicationConfig,
+ PipelineServiceClient pipelineServiceClient,
+ JwtFilter jwtFilter) {
+ ValidationResponse validation = new ValidationResponse();
+
+ validation.setDatabase(getDatabaseValidation());
+ validation.setSearchInstance(getSearchValidation());
+ validation.setPipelineServiceClient(getPipelineServiceClientValidation(pipelineServiceClient));
+ validation.setJwks(getJWKsValidation(applicationConfig, jwtFilter));
+ validation.setMigrations(getMigrationValidation(migrationValidationClient));
+
+ return validation;
+ }
+
+ private StepValidation getDatabaseValidation() {
+ try {
+ dao.testConnection();
+ return new StepValidation()
+ .withDescription(ValidationStepDescription.DATABASE.key)
+ .withPassed(Boolean.TRUE);
+ } catch (Exception exc) {
+ return new StepValidation()
+ .withDescription(ValidationStepDescription.DATABASE.key)
+ .withPassed(Boolean.FALSE)
+ .withMessage(exc.getMessage());
+ }
+ }
+
+ private StepValidation getSearchValidation() {
+ SearchRepository searchRepository = Entity.getSearchRepository();
+ if (Boolean.TRUE.equals(searchRepository.getSearchClient().isClientAvailable())
+ && searchRepository.getSearchClient().indexExists(INDEX_NAME)) {
+ return new StepValidation()
+ .withDescription(ValidationStepDescription.SEARCH.key)
+ .withPassed(Boolean.TRUE);
+ } else {
+ return new StepValidation()
+ .withDescription(ValidationStepDescription.SEARCH.key)
+ .withPassed(Boolean.FALSE)
+ .withMessage("Search instance is not reachable or available");
+ }
+ }
+
+ private StepValidation getPipelineServiceClientValidation(
+ PipelineServiceClient pipelineServiceClient) {
+ PipelineServiceClientResponse pipelineResponse = pipelineServiceClient.getServiceStatus();
+ if (pipelineResponse.getCode() == 200) {
+ return new StepValidation()
+ .withDescription(ValidationStepDescription.PIPELINE_SERVICE_CLIENT.key)
+ .withPassed(Boolean.TRUE);
+ } else {
+ return new StepValidation()
+ .withDescription(ValidationStepDescription.PIPELINE_SERVICE_CLIENT.key)
+ .withPassed(Boolean.FALSE)
+ .withMessage(pipelineResponse.getReason());
+ }
+ }
+
+ private StepValidation getJWKsValidation(
+ OpenMetadataApplicationConfig applicationConfig, JwtFilter jwtFilter) {
+ OpenMetadataConnection openMetadataServerConnection =
+ new OpenMetadataConnectionBuilder(applicationConfig).build();
+ try {
+ jwtFilter.validateAndReturnDecodedJwtToken(
+ openMetadataServerConnection.getSecurityConfig().getJwtToken());
+ return new StepValidation()
+ .withDescription(ValidationStepDescription.JWT_TOKEN.key)
+ .withPassed(Boolean.TRUE);
+ } catch (Exception e) {
+ return new StepValidation()
+ .withDescription(ValidationStepDescription.JWT_TOKEN.key)
+ .withPassed(Boolean.FALSE)
+ .withMessage(e.getMessage());
+ }
+ }
+
+ private StepValidation getMigrationValidation(
+ MigrationValidationClient migrationValidationClient) {
+ List currentVersions = migrationValidationClient.getCurrentVersions();
+ if (currentVersions.equals(migrationValidationClient.getExpectedMigrationList())) {
+ return new StepValidation()
+ .withDescription(ValidationStepDescription.MIGRATION.key)
+ .withPassed(Boolean.TRUE);
+ }
+ return new StepValidation()
+ .withDescription(ValidationStepDescription.MIGRATION.key)
+ .withPassed(Boolean.FALSE)
+ .withMessage(
+ String.format(
+ "Found the versions [%s], but expected [%s]",
+ currentVersions, migrationValidationClient.getExpectedMigrationList()));
+ }
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java
index 85c64e05b8c3..3c2a457bb100 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TableRepository.java
@@ -45,7 +45,6 @@
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import javax.ws.rs.WebApplicationException;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
@@ -85,6 +84,7 @@
import org.openmetadata.schema.type.csv.CsvFile;
import org.openmetadata.schema.type.csv.CsvHeader;
import org.openmetadata.schema.type.csv.CsvImportResult;
+import org.openmetadata.sdk.exception.SuggestionException;
import org.openmetadata.service.Entity;
import org.openmetadata.service.exception.CatalogExceptionMessage;
import org.openmetadata.service.exception.EntityNotFoundException;
@@ -739,9 +739,14 @@ public TaskWorkflow getTaskWorkflow(ThreadContext threadContext) {
return super.getTaskWorkflow(threadContext);
}
+ @Override
+ public String getSuggestionFields(Suggestion suggestion) {
+ return suggestion.getType() == SuggestionType.SuggestTagLabel ? "columns,tags" : "";
+ }
+
@Override
public Table applySuggestion(EntityInterface entity, String columnFQN, Suggestion suggestion) {
- Table table = Entity.getEntity(TABLE, entity.getId(), "columns,tags", ALL);
+ Table table = (Table) entity;
for (Column col : table.getColumns()) {
if (col.getFullyQualifiedName().equals(columnFQN)) {
if (suggestion.getType().equals(SuggestionType.SuggestTagLabel)) {
@@ -751,7 +756,7 @@ public Table applySuggestion(EntityInterface entity, String columnFQN, Suggestio
} else if (suggestion.getType().equals(SuggestionType.SuggestDescription)) {
col.setDescription(suggestion.getDescription());
} else {
- throw new WebApplicationException("Invalid suggestion Type");
+ throw new SuggestionException("Invalid suggestion Type");
}
}
}
@@ -1231,7 +1236,7 @@ protected void addRecord(CsvFile csvFile, Table entity) {
addRecord(csvFile, recordList, table.getColumns().get(0), false);
for (int i = 1; i < entity.getColumns().size(); i++) {
- addRecord(csvFile, new ArrayList<>(), table.getColumns().get(1), true);
+ addRecord(csvFile, new ArrayList<>(), table.getColumns().get(i), true);
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java
index ae0d2fdcaa44..11cb5f60741d 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java
@@ -409,13 +409,15 @@ private void setTestSuiteSummary(
updateResultSummaries(testCase, isDeleted, resultSummaries, resultSummary);
// Update test case result summary attribute for the test suite
+ TestSuiteRepository testSuiteRepository =
+ (TestSuiteRepository) Entity.getEntityRepository(Entity.TEST_SUITE);
+ TestSuite original =
+ TestSuiteRepository.copyTestSuite(
+ testSuite); // we'll need the original state to update the test suite
testSuite.setTestCaseResultSummary(resultSummaries);
- daoCollection
- .testSuiteDAO()
- .update(
- testSuite.getId(),
- testSuite.getFullyQualifiedName(),
- JsonUtils.pojoToJson(testSuite));
+ EntityRepository.EntityUpdater testSuiteUpdater =
+ testSuiteRepository.getUpdater(original, testSuite, Operation.PUT);
+ testSuiteUpdater.update();
}
}
@@ -652,11 +654,16 @@ private void removeTestCaseFromTestSuiteResultSummary(UUID testSuiteId, String t
testSuite.setSummary(null); // we don't want to store the summary in the database
List resultSummaries = testSuite.getTestCaseResultSummary();
resultSummaries.removeIf(summary -> summary.getTestCaseName().equals(testCaseFqn));
+
+ TestSuiteRepository testSuiteRepository =
+ (TestSuiteRepository) Entity.getEntityRepository(Entity.TEST_SUITE);
+ TestSuite original =
+ TestSuiteRepository.copyTestSuite(
+ testSuite); // we'll need the original state to update the test suite
testSuite.setTestCaseResultSummary(resultSummaries);
- daoCollection
- .testSuiteDAO()
- .update(
- testSuite.getId(), testSuite.getFullyQualifiedName(), JsonUtils.pojoToJson(testSuite));
+ EntityRepository.EntityUpdater testSuiteUpdater =
+ testSuiteRepository.getUpdater(original, testSuite, Operation.PUT);
+ testSuiteUpdater.update();
}
@Override
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java
index bbb507705427..c85e618a4056 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java
@@ -9,10 +9,14 @@
import static org.openmetadata.service.Entity.TEST_SUITE;
import static org.openmetadata.service.util.FullyQualifiedName.quoteName;
+import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
+import javax.json.JsonArray;
+import javax.json.JsonObject;
+import javax.json.JsonValue;
import javax.ws.rs.core.SecurityContext;
import lombok.extern.slf4j.Slf4j;
import org.jdbi.v3.sqlobject.transaction.Transaction;
@@ -23,7 +27,6 @@
import org.openmetadata.schema.tests.type.TestSummary;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.EventType;
-import org.openmetadata.schema.type.Include;
import org.openmetadata.schema.type.Relationship;
import org.openmetadata.service.Entity;
import org.openmetadata.service.resources.dqtests.TestSuiteResource;
@@ -55,7 +58,7 @@ public void setFields(TestSuite entity, EntityUtil.Fields fields) {
fields.contains("pipelines") ? getIngestionPipelines(entity) : entity.getPipelines());
entity.setSummary(
fields.contains("summary") ? getTestCasesExecutionSummary(entity) : entity.getSummary());
- entity.withTests(fields.contains("tests") ? getTestCases(entity) : entity.getTests());
+ entity.withTests(fields.contains(UPDATE_FIELDS) ? getTestCases(entity) : entity.getTests());
}
@Override
@@ -71,7 +74,7 @@ public void setInheritedFields(TestSuite testSuite, EntityUtil.Fields fields) {
public void clearFields(TestSuite entity, EntityUtil.Fields fields) {
entity.setPipelines(fields.contains("pipelines") ? entity.getPipelines() : null);
entity.setSummary(fields.contains("summary") ? entity.getSummary() : null);
- entity.withTests(fields.contains("tests") ? entity.getTests() : null);
+ entity.withTests(fields.contains(UPDATE_FIELDS) ? entity.getTests() : null);
}
private TestSummary buildTestSummary(Map testCaseSummary) {
@@ -117,31 +120,81 @@ private TestSummary getTestCasesExecutionSummary(TestSuite entity) {
return buildTestSummary(testCaseSummary);
}
- private TestSummary getTestCasesExecutionSummary(List entities) {
- if (entities.isEmpty()) return new TestSummary();
- Map testsSummary = new HashMap<>();
- for (TestSuite testSuite : entities) {
- Map testSummary = getResultSummary(testSuite);
- for (Map.Entry entry : testSummary.entrySet()) {
- testsSummary.put(
- entry.getKey(), testsSummary.getOrDefault(entry.getKey(), 0) + entry.getValue());
+ private TestSummary getTestCasesExecutionSummary(JsonObject aggregation) {
+ // Initialize the test summary with 0 values
+ TestSummary testSummary =
+ new TestSummary().withAborted(0).withFailed(0).withSuccess(0).withQueued(0).withTotal(0);
+ JsonObject summary = aggregation.getJsonObject("nested#testCaseResultSummary");
+ testSummary.setTotal(summary.getJsonNumber("doc_count").intValue());
+
+ JsonObject statusCount = summary.getJsonObject("sterms#status_counts");
+ JsonArray buckets = statusCount.getJsonArray("buckets");
+
+ for (JsonValue bucket : buckets) {
+ String key = ((JsonObject) bucket).getString("key");
+ Integer count = ((JsonObject) bucket).getJsonNumber("doc_count").intValue();
+ switch (key) {
+ case "Success":
+ testSummary.setSuccess(count);
+ break;
+ case "Failed":
+ testSummary.setFailed(count);
+ break;
+ case "Aborted":
+ testSummary.setAborted(count);
+ break;
+ case "Queued":
+ testSummary.setQueued(count);
+ break;
}
- testSuite.getTestCaseResultSummary().size();
}
- return buildTestSummary(testsSummary);
+ return testSummary;
}
- public TestSummary getTestSummary(UUID testSuiteId) {
+ public TestSummary getTestSummary(UUID testSuiteId) throws IOException {
+ String aggregationQuery =
+ """
+ {
+ "aggregations": {
+ "test_case_results": {
+ "nested": {
+ "path": "testCaseResultSummary"
+ },
+ "aggs": {
+ "status_counts": {
+ "terms": {
+ "field": "testCaseResultSummary.status"
+ }
+ }
+ }
+ }
+ }
+ }
+ """;
+ JsonObject aggregationJson = JsonUtils.readJson(aggregationQuery).asJsonObject();
TestSummary testSummary;
if (testSuiteId == null) {
- ListFilter filter = new ListFilter();
- filter.addQueryParam("testSuiteType", "executable");
- List testSuites = listAll(EntityUtil.Fields.EMPTY_FIELDS, filter);
- testSummary = getTestCasesExecutionSummary(testSuites);
+ JsonObject testCaseResultSummary =
+ searchRepository.aggregate(null, TEST_SUITE, aggregationJson);
+ testSummary = getTestCasesExecutionSummary(testCaseResultSummary);
} else {
+ String query =
+ """
+ {
+ "query": {
+ "bool": {
+ "must": {
+ "term": {"id": "%s"}
+ }
+ }
+ }
+ }
+ """
+ .formatted(testSuiteId);
// don't want to get it from the cache as test results summary may be stale
- TestSuite testSuite = Entity.getEntity(TEST_SUITE, testSuiteId, "", Include.ALL, false);
- testSummary = getTestCasesExecutionSummary(testSuite);
+ JsonObject testCaseResultSummary =
+ searchRepository.aggregate(query, TEST_SUITE, aggregationJson);
+ testSummary = getTestCasesExecutionSummary(testCaseResultSummary);
}
return testSummary;
}
@@ -211,6 +264,26 @@ public RestUtil.DeleteResponse deleteLogicalTestSuite(
return new RestUtil.DeleteResponse<>(updated, changeType);
}
+ public static TestSuite copyTestSuite(TestSuite testSuite) {
+ return new TestSuite()
+ .withConnection(testSuite.getConnection())
+ .withDescription(testSuite.getDescription())
+ .withChangeDescription(testSuite.getChangeDescription())
+ .withDeleted(testSuite.getDeleted())
+ .withDisplayName(testSuite.getDisplayName())
+ .withFullyQualifiedName(testSuite.getFullyQualifiedName())
+ .withHref(testSuite.getHref())
+ .withId(testSuite.getId())
+ .withName(testSuite.getName())
+ .withExecutable(testSuite.getExecutable())
+ .withExecutableEntityReference(testSuite.getExecutableEntityReference())
+ .withServiceType(testSuite.getServiceType())
+ .withOwner(testSuite.getOwner())
+ .withUpdatedBy(testSuite.getUpdatedBy())
+ .withUpdatedAt(testSuite.getUpdatedAt())
+ .withVersion(testSuite.getVersion());
+ }
+
public class TestSuiteUpdater extends EntityUpdater {
public TestSuiteUpdater(TestSuite original, TestSuite updated, Operation operation) {
super(original, updated, operation);
@@ -221,7 +294,13 @@ public TestSuiteUpdater(TestSuite original, TestSuite updated, Operation operati
public void entitySpecificUpdate() {
List origTests = listOrEmpty(original.getTests());
List updatedTests = listOrEmpty(updated.getTests());
- recordChange("tests", origTests, updatedTests);
+ List origTestCaseResultSummary =
+ listOrEmpty(original.getTestCaseResultSummary());
+ List updatedTestCaseResultSummary =
+ listOrEmpty(updated.getTestCaseResultSummary());
+ recordChange(UPDATE_FIELDS, origTests, updatedTests);
+ recordChange(
+ "testCaseResultSummary", origTestCaseResultSummary, updatedTestCaseResultSummary);
}
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TypeRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TypeRepository.java
index 68ad87df8f2b..bce5765a243b 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TypeRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TypeRepository.java
@@ -23,6 +23,7 @@
import static org.openmetadata.service.util.EntityUtil.getCustomField;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
import java.util.UUID;
import javax.ws.rs.core.UriInfo;
@@ -32,9 +33,11 @@
import org.openmetadata.schema.entity.Type;
import org.openmetadata.schema.entity.type.Category;
import org.openmetadata.schema.entity.type.CustomProperty;
+import org.openmetadata.schema.type.CustomPropertyConfig;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.Include;
import org.openmetadata.schema.type.Relationship;
+import org.openmetadata.schema.type.customproperties.EnumConfig;
import org.openmetadata.service.Entity;
import org.openmetadata.service.TypeRegistry;
import org.openmetadata.service.resources.types.TypeResource;
@@ -117,6 +120,7 @@ public PutResponse addCustomProperty(
property.setPropertyType(
Entity.getEntityReferenceById(
Entity.TYPE, property.getPropertyType().getId(), NON_DELETED));
+ validateProperty(property);
if (type.getCategory().equals(Category.Field)) {
throw new IllegalArgumentException(
"Only entity types can be extended and field types can't be extended");
@@ -161,6 +165,30 @@ private List getCustomProperties(Type type) {
return customProperties;
}
+ private void validateProperty(CustomProperty customProperty) {
+ switch (customProperty.getPropertyType().getName()) {
+ case "enum" -> {
+ CustomPropertyConfig config = customProperty.getCustomPropertyConfig();
+ if (config != null) {
+ EnumConfig enumConfig = JsonUtils.convertValue(config.getConfig(), EnumConfig.class);
+ if (enumConfig == null
+ || (enumConfig.getValues() != null && enumConfig.getValues().isEmpty())) {
+ throw new IllegalArgumentException(
+ "Enum Custom Property Type must have EnumConfig populated with values.");
+ } else if (enumConfig.getValues() != null
+ && enumConfig.getValues().stream().distinct().count()
+ != enumConfig.getValues().size()) {
+ throw new IllegalArgumentException(
+ "Enum Custom Property values cannot have duplicates.");
+ }
+ } else {
+ throw new IllegalArgumentException("Enum Custom Property Type must have EnumConfig.");
+ }
+ }
+ case "int", "string" -> {}
+ }
+ }
+
/** Handles entity updated from PUT and POST operation. */
public class TypeUpdater extends EntityUpdater {
public TypeUpdater(Type original, Type updated, Operation operation) {
@@ -199,6 +227,7 @@ private void updateCustomProperties() {
continue;
}
updateCustomPropertyDescription(updated, storedProperty, updateProperty);
+ updateCustomPropertyConfig(updated, storedProperty, updateProperty);
}
}
@@ -270,5 +299,55 @@ private void updateCustomPropertyDescription(
customPropertyJson);
}
}
+
+ private void updateCustomPropertyConfig(
+ Type entity, CustomProperty origProperty, CustomProperty updatedProperty) {
+ String fieldName = getCustomField(origProperty, "customPropertyConfig");
+ if (previous == null || !previous.getVersion().equals(updated.getVersion())) {
+ validatePropertyConfigUpdate(entity, origProperty, updatedProperty);
+ }
+ if (recordChange(
+ fieldName,
+ origProperty.getCustomPropertyConfig(),
+ updatedProperty.getCustomPropertyConfig())) {
+ String customPropertyFQN =
+ getCustomPropertyFQN(entity.getName(), updatedProperty.getName());
+ EntityReference propertyType =
+ updatedProperty.getPropertyType(); // Don't store entity reference
+ String customPropertyJson = JsonUtils.pojoToJson(updatedProperty.withPropertyType(null));
+ updatedProperty.withPropertyType(propertyType); // Restore entity reference
+ daoCollection
+ .fieldRelationshipDAO()
+ .upsert(
+ customPropertyFQN,
+ updatedProperty.getPropertyType().getName(),
+ customPropertyFQN,
+ updatedProperty.getPropertyType().getName(),
+ Entity.TYPE,
+ Entity.TYPE,
+ Relationship.HAS.ordinal(),
+ "customProperty",
+ customPropertyJson);
+ }
+ }
+
+ private void validatePropertyConfigUpdate(
+ Type entity, CustomProperty origProperty, CustomProperty updatedProperty) {
+ if (origProperty.getPropertyType().getName().equals("enum")) {
+ EnumConfig origConfig =
+ JsonUtils.convertValue(
+ origProperty.getCustomPropertyConfig().getConfig(), EnumConfig.class);
+ EnumConfig updatedConfig =
+ JsonUtils.convertValue(
+ updatedProperty.getCustomPropertyConfig().getConfig(), EnumConfig.class);
+ HashSet updatedValues = new HashSet<>(updatedConfig.getValues());
+ if (updatedValues.size() != updatedConfig.getValues().size()) {
+ throw new IllegalArgumentException("Enum Custom Property values cannot have duplicates.");
+ } else if (!updatedValues.containsAll(origConfig.getValues())) {
+ throw new IllegalArgumentException(
+ "Existing Enum Custom Property values cannot be removed.");
+ }
+ }
+ }
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/UserRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/UserRepository.java
index 84f20bf36e8e..7389866ce275 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/UserRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/UserRepository.java
@@ -63,6 +63,7 @@
import org.openmetadata.service.secrets.SecretsManager;
import org.openmetadata.service.secrets.SecretsManagerFactory;
import org.openmetadata.service.security.SecurityUtil;
+import org.openmetadata.service.security.auth.BotTokenCache;
import org.openmetadata.service.security.policyevaluator.SubjectContext;
import org.openmetadata.service.util.EntityUtil;
import org.openmetadata.service.util.EntityUtil.Fields;
@@ -509,6 +510,14 @@ public static String invalidTeam(int field, String team, String user, String use
}
}
+ @Override
+ protected void postDelete(User entity) {
+ // If the User is bot it's token needs to be invalidated
+ if (Boolean.TRUE.equals(entity.getIsBot())) {
+ BotTokenCache.invalidateToken(entity.getName());
+ }
+ }
+
/** Handles entity updated from PUT and POST operation. */
public class UserUpdater extends EntityUpdater {
public UserUpdater(User original, User updated, Operation operation) {
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/migration/MigrationValidationClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/migration/MigrationValidationClient.java
new file mode 100644
index 000000000000..d803ebc7dd9e
--- /dev/null
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/migration/MigrationValidationClient.java
@@ -0,0 +1,72 @@
+package org.openmetadata.service.migration;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Stream;
+import lombok.Getter;
+import lombok.extern.slf4j.Slf4j;
+import org.openmetadata.service.OpenMetadataApplicationConfig;
+import org.openmetadata.service.jdbi3.MigrationDAO;
+
+@Slf4j
+public class MigrationValidationClient {
+ @Getter public static MigrationValidationClient instance;
+
+ private final MigrationDAO migrationDAO;
+ private final OpenMetadataApplicationConfig config;
+ @Getter private final List expectedMigrationList;
+
+ private MigrationValidationClient(
+ MigrationDAO migrationDAO, OpenMetadataApplicationConfig config) {
+ this.migrationDAO = migrationDAO;
+ this.config = config;
+ this.expectedMigrationList = loadExpectedMigrationList();
+ }
+
+ public static MigrationValidationClient initialize(
+ MigrationDAO migrationDAO, OpenMetadataApplicationConfig config) {
+
+ if (instance == null) {
+ instance = new MigrationValidationClient(migrationDAO, config);
+ }
+ return instance;
+ }
+
+ public List getCurrentVersions() {
+ return migrationDAO.getMigrationVersions();
+ }
+
+ private List loadExpectedMigrationList() {
+ try {
+ String nativePath = config.getMigrationConfiguration().getNativePath();
+ String extensionPath = config.getMigrationConfiguration().getExtensionPath();
+
+ List availableOMNativeMigrations = getMigrationFilesFromPath(nativePath);
+
+ // If we only have OM migrations, return them
+ if (extensionPath == null || extensionPath.isEmpty()) {
+ return availableOMNativeMigrations;
+ }
+
+ // Otherwise, fetch the extension migration and sort the results
+ List availableOMExtensionMigrations = getMigrationFilesFromPath(extensionPath);
+
+ return Stream.concat(
+ availableOMNativeMigrations.stream(), availableOMExtensionMigrations.stream())
+ .sorted()
+ .toList();
+ } catch (Exception e) {
+ LOG.error("Error loading expected migration list", e);
+ return List.of();
+ }
+ }
+
+ private List getMigrationFilesFromPath(String path) {
+ return Arrays.stream(Objects.requireNonNull(new File(path).listFiles(File::isDirectory)))
+ .map(File::getName)
+ .sorted()
+ .toList();
+ }
+}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/migration/MigrationValidationClientException.java b/openmetadata-service/src/main/java/org/openmetadata/service/migration/MigrationValidationClientException.java
new file mode 100644
index 000000000000..2636d2839f83
--- /dev/null
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/migration/MigrationValidationClientException.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2021 Collate
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.openmetadata.service.migration;
+
+import javax.ws.rs.core.Response;
+import org.openmetadata.sdk.exception.WebServiceException;
+
+public class MigrationValidationClientException extends WebServiceException {
+ private static final String BY_NAME_MESSAGE = "Migration Validation Exception [%s] due to [%s].";
+ private static final String ERROR_TYPE = "MIGRATION_VALIDATION";
+
+ public MigrationValidationClientException(String message) {
+ super(Response.Status.BAD_REQUEST, ERROR_TYPE, message);
+ }
+
+ private MigrationValidationClientException(Response.Status status, String message) {
+ super(status, ERROR_TYPE, message);
+ }
+
+ public static MigrationValidationClientException byMessage(
+ String name, String errorMessage, Response.Status status) {
+ return new MigrationValidationClientException(status, buildMessageByName(name, errorMessage));
+ }
+
+ public static MigrationValidationClientException byMessage(String name, String errorMessage) {
+ return new MigrationValidationClientException(
+ Response.Status.BAD_REQUEST, buildMessageByName(name, errorMessage));
+ }
+
+ private static String buildMessageByName(String name, String errorMessage) {
+ return String.format(BY_NAME_MESSAGE, name, errorMessage);
+ }
+}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v132/Migration.java b/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v132/Migration.java
new file mode 100644
index 000000000000..a863138e504f
--- /dev/null
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v132/Migration.java
@@ -0,0 +1,37 @@
+package org.openmetadata.service.migration.mysql.v132;
+
+import static org.openmetadata.service.migration.utils.v132.MigrationUtil.migrateDbtConfigType;
+
+import lombok.SneakyThrows;
+import org.jdbi.v3.core.Handle;
+import org.openmetadata.service.jdbi3.CollectionDAO;
+import org.openmetadata.service.migration.api.MigrationProcessImpl;
+import org.openmetadata.service.migration.utils.MigrationFile;
+
+public class Migration extends MigrationProcessImpl {
+ private CollectionDAO collectionDAO;
+ private Handle handle;
+
+ public Migration(MigrationFile migrationFile) {
+ super(migrationFile);
+ }
+
+ @Override
+ public void initialize(Handle handle) {
+ super.initialize(handle);
+ this.handle = handle;
+ this.collectionDAO = handle.attach(CollectionDAO.class);
+ }
+
+ @Override
+ @SneakyThrows
+ public void runDataMigration() {
+ String getDbtPipelinesQuery =
+ "SELECT * from ingestion_pipeline_entity ipe WHERE JSON_EXTRACT(json, '$.pipelineType') = 'dbt'";
+ String updateSqlQuery =
+ "UPDATE ingestion_pipeline_entity ipe SET json = :json "
+ + "WHERE JSON_EXTRACT(json, '$.pipelineType') = 'dbt'"
+ + "AND id = :id";
+ migrateDbtConfigType(handle, updateSqlQuery, getDbtPipelinesQuery);
+ }
+}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v131/Migration.java b/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v133/Migration.java
similarity index 93%
rename from openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v131/Migration.java
rename to openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v133/Migration.java
index 04a27b28dec1..8192db1a66f0 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v131/Migration.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/migration/mysql/v133/Migration.java
@@ -1,4 +1,4 @@
-package org.openmetadata.service.migration.mysql.v131;
+package org.openmetadata.service.migration.mysql.v133;
import static org.openmetadata.service.migration.utils.v131.MigrationUtil.migrateCronExpression;
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v132/Migration.java b/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v132/Migration.java
new file mode 100644
index 000000000000..f7f5950e830b
--- /dev/null
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v132/Migration.java
@@ -0,0 +1,37 @@
+package org.openmetadata.service.migration.postgres.v132;
+
+import static org.openmetadata.service.migration.utils.v132.MigrationUtil.migrateDbtConfigType;
+
+import lombok.SneakyThrows;
+import org.jdbi.v3.core.Handle;
+import org.openmetadata.service.jdbi3.CollectionDAO;
+import org.openmetadata.service.migration.api.MigrationProcessImpl;
+import org.openmetadata.service.migration.utils.MigrationFile;
+
+public class Migration extends MigrationProcessImpl {
+ private CollectionDAO collectionDAO;
+ private Handle handle;
+
+ public Migration(MigrationFile migrationFile) {
+ super(migrationFile);
+ }
+
+ @Override
+ public void initialize(Handle handle) {
+ super.initialize(handle);
+ this.handle = handle;
+ this.collectionDAO = handle.attach(CollectionDAO.class);
+ }
+
+ @Override
+ @SneakyThrows
+ public void runDataMigration() {
+ String getDbtPipelinesQuery =
+ "SELECT * from ingestion_pipeline_entity ipe WHERE json #>> '{pipelineType}' = 'dbt'";
+ String updateSqlQuery =
+ "UPDATE ingestion_pipeline_entity ipe SET json = :json::jsonb "
+ + "WHERE json #>> '{pipelineType}' = 'dbt'"
+ + "AND id = :id";
+ migrateDbtConfigType(handle, updateSqlQuery, getDbtPipelinesQuery);
+ }
+}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v131/Migration.java b/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v133/Migration.java
similarity index 93%
rename from openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v131/Migration.java
rename to openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v133/Migration.java
index b5570372ba37..82325342ebd4 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v131/Migration.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/migration/postgres/v133/Migration.java
@@ -1,4 +1,4 @@
-package org.openmetadata.service.migration.postgres.v131;
+package org.openmetadata.service.migration.postgres.v133;
import static org.openmetadata.service.migration.utils.v131.MigrationUtil.migrateCronExpression;
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/migration/utils/v132/MigrationUtil.java b/openmetadata-service/src/main/java/org/openmetadata/service/migration/utils/v132/MigrationUtil.java
new file mode 100644
index 000000000000..27fe3e7c62c0
--- /dev/null
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/migration/utils/v132/MigrationUtil.java
@@ -0,0 +1,102 @@
+package org.openmetadata.service.migration.utils.v132;
+
+import java.util.LinkedHashMap;
+import lombok.extern.slf4j.Slf4j;
+import org.jdbi.v3.core.Handle;
+import org.json.JSONObject;
+import org.openmetadata.schema.entity.services.ingestionPipelines.IngestionPipeline;
+import org.openmetadata.schema.metadataIngestion.dbtconfig.DbtAzureConfig;
+import org.openmetadata.schema.metadataIngestion.dbtconfig.DbtCloudConfig;
+import org.openmetadata.schema.metadataIngestion.dbtconfig.DbtGCSConfig;
+import org.openmetadata.schema.metadataIngestion.dbtconfig.DbtHttpConfig;
+import org.openmetadata.schema.metadataIngestion.dbtconfig.DbtLocalConfig;
+import org.openmetadata.schema.metadataIngestion.dbtconfig.DbtS3Config;
+import org.openmetadata.service.exception.UnhandledServerException;
+import org.openmetadata.service.util.JsonUtils;
+
+@Slf4j
+public class MigrationUtil {
+
+ private MigrationUtil() {
+ /* Cannot create object util class*/
+ }
+
+ public static void migrateDbtConfigType(
+ Handle handle, String updateSqlQuery, String dbtGetDbtPipelinesQuery) {
+ handle
+ .createQuery(dbtGetDbtPipelinesQuery)
+ .mapToMap()
+ .forEach(
+ row -> {
+ try {
+ IngestionPipeline ingestionPipeline =
+ JsonUtils.readValue(row.get("json").toString(), IngestionPipeline.class);
+ String id = row.get("id").toString();
+ LinkedHashMap sourceConfig =
+ (LinkedHashMap) ingestionPipeline.getSourceConfig().getConfig();
+ LinkedHashMap dbtConfigSource = (LinkedHashMap) sourceConfig.get("dbtConfigSource");
+
+ sourceConfig.put("dbtConfigSource", addDbtConfigType(dbtConfigSource));
+ String json = JsonUtils.pojoToJson(ingestionPipeline);
+
+ handle.createUpdate(updateSqlQuery).bind("json", json).bind("id", id).execute();
+
+ } catch (Exception ex) {
+ LOG.warn("Error during the dbt type migration due to ", ex);
+ }
+ });
+ }
+
+ public static Object addDbtConfigType(LinkedHashMap dbtConfigSource) {
+ String jsonString = new JSONObject(dbtConfigSource).toString();
+
+ // For adding s3 type
+ try {
+ DbtS3Config dbtS3Config = JsonUtils.readValue(jsonString, DbtS3Config.class);
+ dbtS3Config.setDbtConfigType(DbtS3Config.DbtConfigType.S_3);
+ return dbtS3Config;
+ } catch (UnhandledServerException ex) {
+ }
+
+ // For adding GCS type
+ try {
+ DbtGCSConfig dbtGCSConfig = JsonUtils.readValue(jsonString, DbtGCSConfig.class);
+ dbtGCSConfig.setDbtConfigType(DbtGCSConfig.DbtConfigType.GCS);
+ return dbtGCSConfig;
+ } catch (UnhandledServerException ex) {
+ }
+
+ // For adding Azure type
+ try {
+ DbtAzureConfig dbtAzureConfig = JsonUtils.readValue(jsonString, DbtAzureConfig.class);
+ dbtAzureConfig.setDbtConfigType(DbtAzureConfig.DbtConfigType.AZURE);
+ return dbtAzureConfig;
+ } catch (UnhandledServerException ex) {
+ }
+
+ // For adding cloud type
+ try {
+ DbtCloudConfig dbtCloudConfig = JsonUtils.readValue(jsonString, DbtCloudConfig.class);
+ dbtCloudConfig.setDbtConfigType(DbtCloudConfig.DbtConfigType.CLOUD);
+ return dbtCloudConfig;
+ } catch (UnhandledServerException ex) {
+ }
+
+ // For adding local type
+ try {
+ DbtLocalConfig dbtLocalConfig = JsonUtils.readValue(jsonString, DbtLocalConfig.class);
+ dbtLocalConfig.setDbtConfigType(DbtLocalConfig.DbtConfigType.LOCAL);
+ return dbtLocalConfig;
+ } catch (UnhandledServerException ex) {
+ }
+
+ // For adding http type
+ try {
+ DbtHttpConfig dbtHttpConfig = JsonUtils.readValue(jsonString, DbtHttpConfig.class);
+ dbtHttpConfig.setDbtConfigType(DbtHttpConfig.DbtConfigType.HTTP);
+ return dbtHttpConfig;
+ } catch (UnhandledServerException ex) {
+ }
+ return null;
+ }
+}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/apps/AppMarketPlaceResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/apps/AppMarketPlaceResource.java
index 423b0b6360e3..6970ef4fa2e5 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/apps/AppMarketPlaceResource.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/apps/AppMarketPlaceResource.java
@@ -47,6 +47,7 @@
import org.openmetadata.sdk.PipelineServiceClient;
import org.openmetadata.service.Entity;
import org.openmetadata.service.OpenMetadataApplicationConfig;
+import org.openmetadata.service.apps.ApplicationHandler;
import org.openmetadata.service.clients.pipeline.PipelineServiceClientFactory;
import org.openmetadata.service.jdbi3.AppMarketPlaceRepository;
import org.openmetadata.service.jdbi3.ListFilter;
@@ -211,7 +212,10 @@ public AppMarketPlaceDefinition get(
@QueryParam("include")
@DefaultValue("non-deleted")
Include include) {
- return getInternal(uriInfo, securityContext, id, fieldsParam, include);
+ AppMarketPlaceDefinition definition =
+ getInternal(uriInfo, securityContext, id, fieldsParam, include);
+ definition.setPreview(ApplicationHandler.getInstance().isPreview(definition.getName()));
+ return definition;
}
@GET
@@ -247,7 +251,10 @@ public AppMarketPlaceDefinition getByName(
@QueryParam("include")
@DefaultValue("non-deleted")
Include include) {
- return getByNameInternal(uriInfo, securityContext, name, fieldsParam, include);
+ AppMarketPlaceDefinition definition =
+ getByNameInternal(uriInfo, securityContext, name, fieldsParam, include);
+ definition.setPreview(ApplicationHandler.getInstance().isPreview(definition.getName()));
+ return definition;
}
@GET
@@ -442,7 +449,8 @@ private AppMarketPlaceDefinition getApplicationDefinition(
.withAppScreenshots(create.getAppScreenshots())
.withFeatures(create.getFeatures())
.withSourcePythonClass(create.getSourcePythonClass())
- .withAllowConfiguration(create.getAllowConfiguration());
+ .withAllowConfiguration(create.getAllowConfiguration())
+ .withSystem(create.getSystem());
// Validate App
validateApplication(app);
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/apps/AppResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/apps/AppResource.java
index 3cf18bb2d130..c4c600ef806f 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/apps/AppResource.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/apps/AppResource.java
@@ -45,8 +45,6 @@
import lombok.extern.slf4j.Slf4j;
import org.openmetadata.common.utils.CommonUtil;
import org.openmetadata.schema.ServiceEntityInterface;
-import org.openmetadata.schema.api.configuration.apps.AppPrivateConfig;
-import org.openmetadata.schema.api.configuration.apps.AppsPrivateConfiguration;
import org.openmetadata.schema.api.data.RestoreEntity;
import org.openmetadata.schema.entity.app.App;
import org.openmetadata.schema.entity.app.AppMarketPlaceDefinition;
@@ -68,6 +66,7 @@
import org.openmetadata.service.apps.ApplicationHandler;
import org.openmetadata.service.apps.scheduler.AppScheduler;
import org.openmetadata.service.clients.pipeline.PipelineServiceClientFactory;
+import org.openmetadata.service.exception.CatalogExceptionMessage;
import org.openmetadata.service.exception.EntityNotFoundException;
import org.openmetadata.service.jdbi3.AppRepository;
import org.openmetadata.service.jdbi3.CollectionDAO;
@@ -98,7 +97,6 @@
public class AppResource extends EntityResource {
public static final String COLLECTION_PATH = "v1/apps/";
private OpenMetadataApplicationConfig openMetadataApplicationConfig;
- private AppsPrivateConfiguration privateConfiguration;
private PipelineServiceClient pipelineServiceClient;
static final String FIELDS = "owner";
private SearchRepository searchRepository;
@@ -107,7 +105,6 @@ public class AppResource extends EntityResource {
public void initialize(OpenMetadataApplicationConfig config) {
try {
this.openMetadataApplicationConfig = config;
- this.privateConfiguration = config.getAppsPrivateConfiguration();
this.pipelineServiceClient =
PipelineServiceClientFactory.createPipelineServiceClient(
config.getPipelineServiceClientConfiguration());
@@ -139,23 +136,8 @@ public void initialize(OpenMetadataApplicationConfig config) {
// Schedule
if (app.getScheduleType().equals(ScheduleType.Scheduled)) {
- setAppRuntimeProperties(app);
- ApplicationHandler.installApplication(app, Entity.getCollectionDAO(), searchRepository);
- }
- }
-
- // Initialize installed applications
- for (App installedApp : repository.listAll()) {
- App appWithBot = getAppForInit(installedApp.getName());
- if (appWithBot == null) {
- LOG.error(
- String.format(
- "Failed to init app [%s]. GET should return the installed app",
- installedApp.getName()));
- } else {
- setAppRuntimeProperties(appWithBot);
- ApplicationHandler.runAppInit(appWithBot, dao, searchRepository);
- LOG.info(String.format("Initialized installed app [%s]", installedApp.getName()));
+ ApplicationHandler.getInstance()
+ .installApplication(app, Entity.getCollectionDAO(), searchRepository);
}
}
} catch (Exception ex) {
@@ -183,24 +165,6 @@ public static class AppRunList extends ResultList {
/* Required for serde */
}
- /**
- * Load the apps' OM configuration and private parameters
- */
- private void setAppRuntimeProperties(App app) {
- app.setOpenMetadataServerConnection(
- new OpenMetadataConnectionBuilder(openMetadataApplicationConfig, app.getBot().getName())
- .build());
-
- if (privateConfiguration != null
- && !nullOrEmpty(privateConfiguration.getAppsPrivateConfiguration())) {
- for (AppPrivateConfig appPrivateConfig : privateConfiguration.getAppsPrivateConfiguration()) {
- if (app.getName().equals(appPrivateConfig.getName())) {
- app.setPrivateConfiguration(appPrivateConfig.getParameters());
- }
- }
- }
- }
-
/**
* We don't want to store runtime information into the DB
*/
@@ -580,10 +544,11 @@ public Response create(
create.getName(),
new EntityUtil.Fields(repository.getMarketPlace().getAllowedFields()));
App app = getApplication(definition, create, securityContext.getUserPrincipal().getName());
- setAppRuntimeProperties(app);
if (app.getScheduleType().equals(ScheduleType.Scheduled)) {
- ApplicationHandler.installApplication(app, Entity.getCollectionDAO(), searchRepository);
- ApplicationHandler.configureApplication(app, Entity.getCollectionDAO(), searchRepository);
+ ApplicationHandler.getInstance()
+ .installApplication(app, Entity.getCollectionDAO(), searchRepository);
+ ApplicationHandler.getInstance()
+ .configureApplication(app, Entity.getCollectionDAO(), searchRepository);
}
// We don't want to store this information
unsetAppRuntimeProperties(app);
@@ -617,13 +582,16 @@ public Response patchApplication(
JsonPatch patch)
throws SchedulerException {
App app = repository.get(null, id, repository.getFields("bot,pipelines"));
+ if (app.getSystem()) {
+ throw new IllegalArgumentException(
+ CatalogExceptionMessage.systemEntityModifyNotAllowed(app.getName(), "SystemApp"));
+ }
AppScheduler.getInstance().deleteScheduledApplication(app);
Response response = patchInternal(uriInfo, securityContext, id, patch);
App updatedApp = (App) response.getEntity();
- setAppRuntimeProperties(updatedApp);
if (app.getScheduleType().equals(ScheduleType.Scheduled)) {
- ApplicationHandler.installApplication(
- updatedApp, Entity.getCollectionDAO(), searchRepository);
+ ApplicationHandler.getInstance()
+ .installApplication(updatedApp, Entity.getCollectionDAO(), searchRepository);
}
// We don't want to store this information
unsetAppRuntimeProperties(updatedApp);
@@ -656,9 +624,9 @@ public Response createOrUpdate(
new EntityUtil.Fields(repository.getMarketPlace().getAllowedFields()));
App app = getApplication(definition, create, securityContext.getUserPrincipal().getName());
AppScheduler.getInstance().deleteScheduledApplication(app);
- setAppRuntimeProperties(app);
if (app.getScheduleType().equals(ScheduleType.Scheduled)) {
- ApplicationHandler.installApplication(app, Entity.getCollectionDAO(), searchRepository);
+ ApplicationHandler.getInstance()
+ .installApplication(app, Entity.getCollectionDAO(), searchRepository);
}
// We don't want to store this information
unsetAppRuntimeProperties(app);
@@ -673,6 +641,9 @@ public Response createOrUpdate(
description = "Delete a App by `name`.",
responses = {
@ApiResponse(responseCode = "200", description = "OK"),
+ @ApiResponse(
+ responseCode = "400",
+ description = "System entity {name} of type SystemApp can not be deleted."),
@ApiResponse(responseCode = "404", description = "App for instance {name} is not found")
})
public Response delete(
@@ -686,6 +657,10 @@ public Response delete(
@PathParam("name")
String name) {
App app = repository.getByName(null, name, repository.getFields("bot,pipelines"));
+ if (app.getSystem()) {
+ throw new IllegalArgumentException(
+ CatalogExceptionMessage.systemEntityDeleteNotAllowed(app.getName(), "SystemApp"));
+ }
// Remove from Pipeline Service
deleteApp(securityContext, app, hardDelete);
return deleteByName(uriInfo, securityContext, name, true, hardDelete);
@@ -699,6 +674,9 @@ public Response delete(
description = "Delete a App by `Id`.",
responses = {
@ApiResponse(responseCode = "200", description = "OK"),
+ @ApiResponse(
+ responseCode = "400",
+ description = "System entity {name} of type SystemApp can not be deleted."),
@ApiResponse(responseCode = "404", description = "App for instance {id} is not found")
})
public Response delete(
@@ -711,6 +689,10 @@ public Response delete(
@Parameter(description = "Id of the App", schema = @Schema(type = "UUID")) @PathParam("id")
UUID id) {
App app = repository.get(null, id, repository.getFields("bot,pipelines"));
+ if (app.getSystem()) {
+ throw new IllegalArgumentException(
+ CatalogExceptionMessage.systemEntityDeleteNotAllowed(app.getName(), "SystemApp"));
+ }
// Remove from Pipeline Service
deleteApp(securityContext, app, hardDelete);
// Remove from repository
@@ -739,9 +721,9 @@ public Response restoreApp(
Response response = restoreEntity(uriInfo, securityContext, restore.getId());
if (response.getStatus() == Response.Status.OK.getStatusCode()) {
App app = (App) response.getEntity();
- setAppRuntimeProperties(app);
if (app.getScheduleType().equals(ScheduleType.Scheduled)) {
- ApplicationHandler.installApplication(app, Entity.getCollectionDAO(), searchRepository);
+ ApplicationHandler.getInstance()
+ .installApplication(app, Entity.getCollectionDAO(), searchRepository);
}
// We don't want to store this information
unsetAppRuntimeProperties(app);
@@ -775,9 +757,9 @@ public Response scheduleApplication(
@Context SecurityContext securityContext) {
App app =
repository.getByName(uriInfo, name, new EntityUtil.Fields(repository.getAllowedFields()));
- setAppRuntimeProperties(app);
if (app.getScheduleType().equals(ScheduleType.Scheduled)) {
- ApplicationHandler.installApplication(app, repository.getDaoCollection(), searchRepository);
+ ApplicationHandler.getInstance()
+ .installApplication(app, repository.getDaoCollection(), searchRepository);
return Response.status(Response.Status.OK).entity("App is Scheduled.").build();
}
throw new IllegalArgumentException("App is not of schedule type Scheduled.");
@@ -811,9 +793,9 @@ public Response configureApplication(
repository.getByName(uriInfo, name, new EntityUtil.Fields(repository.getAllowedFields()));
// The application will have the updated appConfiguration we can use to run the `configure`
// logic
- setAppRuntimeProperties(app);
try {
- ApplicationHandler.configureApplication(app, repository.getDaoCollection(), searchRepository);
+ ApplicationHandler.getInstance()
+ .configureApplication(app, repository.getDaoCollection(), searchRepository);
return Response.status(Response.Status.OK).entity("App has been configured.").build();
} catch (RuntimeException e) {
return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
@@ -845,10 +827,9 @@ public Response triggerApplicationRun(
String name) {
EntityUtil.Fields fields = getFields(String.format("%s,bot,pipelines", FIELD_OWNER));
App app = repository.getByName(uriInfo, name, fields);
- setAppRuntimeProperties(app);
if (app.getAppType().equals(AppType.Internal)) {
- ApplicationHandler.triggerApplicationOnDemand(
- app, Entity.getCollectionDAO(), searchRepository);
+ ApplicationHandler.getInstance()
+ .triggerApplicationOnDemand(app, Entity.getCollectionDAO(), searchRepository);
return Response.status(Response.Status.OK).entity("Application Triggered").build();
} else {
if (!app.getPipelines().isEmpty()) {
@@ -894,9 +875,9 @@ public Response deployApplicationFlow(
String name) {
EntityUtil.Fields fields = getFields(String.format("%s,bot,pipelines", FIELD_OWNER));
App app = repository.getByName(uriInfo, name, fields);
- setAppRuntimeProperties(app);
if (app.getAppType().equals(AppType.Internal)) {
- ApplicationHandler.installApplication(app, Entity.getCollectionDAO(), searchRepository);
+ ApplicationHandler.getInstance()
+ .installApplication(app, Entity.getCollectionDAO(), searchRepository);
return Response.status(Response.Status.OK).entity("Application Deployed").build();
} else {
if (!app.getPipelines().isEmpty()) {
@@ -978,7 +959,8 @@ private App getApplication(
.withAppScreenshots(marketPlaceDefinition.getAppScreenshots())
.withFeatures(marketPlaceDefinition.getFeatures())
.withSourcePythonClass(marketPlaceDefinition.getSourcePythonClass())
- .withAllowConfiguration(marketPlaceDefinition.getAllowConfiguration());
+ .withAllowConfiguration(marketPlaceDefinition.getAllowConfiguration())
+ .withSystem(marketPlaceDefinition.getSystem());
// validate Bot if provided
validateAndAddBot(app, createAppRequest.getBot());
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResource.java
index a448fd16db65..4ad718b6c70a 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResource.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResource.java
@@ -174,12 +174,31 @@ public ResultList list(
schema = @Schema(implementation = Include.class))
@QueryParam("include")
@DefaultValue("non-deleted")
- Include include) {
+ Include include,
+ @Parameter(
+ description = "Filter test case by status",
+ schema =
+ @Schema(
+ type = "string",
+ allowableValues = {"Success", "Failed", "Aborted", "Queued"}))
+ @QueryParam("testCaseStatus")
+ String status,
+ @Parameter(
+ description = "Filter for test case type (e.g. column, table, all",
+ schema =
+ @Schema(
+ type = "string",
+ allowableValues = {"column", "table", "all"}))
+ @QueryParam("testCaseType")
+ @DefaultValue("all")
+ String type) {
ListFilter filter =
new ListFilter(include)
.addQueryParam("testSuiteId", testSuiteId)
.addQueryParam("includeAllTests", includeAllTests.toString())
- .addQueryParam("orderByLastExecutionDate", orderByLastExecutionDate.toString());
+ .addQueryParam("orderByLastExecutionDate", orderByLastExecutionDate.toString())
+ .addQueryParam("testCaseStatus", status)
+ .addQueryParam("testCaseType", type);
ResourceContextInterface resourceContext;
if (entityLink != null) {
EntityLink entityLinkParsed = EntityLink.parse(entityLink);
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestSuiteResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestSuiteResource.java
index 7bf403ae1fe9..553577e9b4c7 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestSuiteResource.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestSuiteResource.java
@@ -11,6 +11,7 @@
import io.swagger.v3.oas.annotations.parameters.RequestBody;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.tags.Tag;
+import java.io.IOException;
import java.util.List;
import java.util.UUID;
import javax.json.JsonPatch;
@@ -321,7 +322,8 @@ public TestSummary getTestsExecutionSummary(
description = "get summary for a specific test suite",
schema = @Schema(type = "String", format = "uuid"))
@QueryParam("testSuiteId")
- UUID testSuiteId) {
+ UUID testSuiteId)
+ throws IOException {
ResourceContext> resourceContext = getResourceContext();
OperationContext operationContext =
new OperationContext(Entity.TABLE, MetadataOperation.VIEW_TESTS);
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/feeds/FeedResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/feeds/FeedResource.java
index be0649a6e0b7..976799e1d0f1 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/feeds/FeedResource.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/feeds/FeedResource.java
@@ -297,7 +297,7 @@ public Response resolveTask(
String id,
@Valid ResolveTask resolveTask) {
Thread task = dao.getTask(Integer.parseInt(id));
- dao.checkPermissionsForResolveTask(task, false, securityContext);
+ dao.checkPermissionsForResolveTask(authorizer, task, false, securityContext);
return dao.resolveTask(uriInfo, task, securityContext.getUserPrincipal().getName(), resolveTask)
.toResponse();
}
@@ -326,7 +326,7 @@ public Response closeTask(
String id,
@Valid CloseTask closeTask) {
Thread task = dao.getTask(Integer.parseInt(id));
- dao.checkPermissionsForResolveTask(task, true, securityContext);
+ dao.checkPermissionsForResolveTask(authorizer, task, true, securityContext);
return dao.closeTask(uriInfo, task, securityContext.getUserPrincipal().getName(), closeTask)
.toResponse();
}
@@ -590,6 +590,7 @@ private Thread getThread(SecurityContext securityContext, CreateThread create) {
.withType(create.getType())
.withTask(getTaskDetails(create.getTaskDetails()))
.withAnnouncement(create.getAnnouncementDetails())
+ .withChatbot(create.getChatbotDetails())
.withUpdatedBy(securityContext.getUserPrincipal().getName())
.withUpdatedAt(System.currentTimeMillis());
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/feeds/SuggestionsResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/feeds/SuggestionsResource.java
index 6e74397708d8..29056eb367db 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/feeds/SuggestionsResource.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/feeds/SuggestionsResource.java
@@ -14,7 +14,9 @@
package org.openmetadata.service.resources.feeds;
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
+import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
import static org.openmetadata.schema.type.EventType.SUGGESTION_CREATED;
+import static org.openmetadata.schema.type.EventType.SUGGESTION_REJECTED;
import static org.openmetadata.schema.type.EventType.SUGGESTION_UPDATED;
import static org.openmetadata.service.util.RestUtil.CHANGE_CUSTOM_HEADER;
@@ -206,12 +208,12 @@ public Suggestion get(
@Path("/{id}/accept")
@Operation(
operationId = "acceptSuggestion",
- summary = "Close a task",
- description = "Close a task without making any changes to the entity.",
+ summary = "Accept a Suggestion",
+ description = "Accept a Suggestion and apply the changes to the entity.",
responses = {
@ApiResponse(
responseCode = "200",
- description = "The task thread.",
+ description = "The suggestion.",
content =
@Content(
mediaType = "application/json",
@@ -259,6 +261,107 @@ public Response rejectSuggestion(
.toResponse();
}
+ @PUT
+ @Path("accept-all")
+ @Operation(
+ operationId = "acceptAllSuggestion",
+ summary = "Accept all Suggestions from a user and an Entity",
+ description = "Accept a Suggestion and apply the changes to the entity.",
+ responses = {
+ @ApiResponse(
+ responseCode = "200",
+ description = "The suggestion.",
+ content =
+ @Content(
+ mediaType = "application/json",
+ schema = @Schema(implementation = Suggestion.class))),
+ @ApiResponse(responseCode = "400", description = "Bad request")
+ })
+ public RestUtil.PutResponse> acceptAllSuggestions(
+ @Context UriInfo uriInfo,
+ @Context SecurityContext securityContext,
+ @Parameter(description = "user id", schema = @Schema(type = "string")) @QueryParam("userId")
+ UUID userId,
+ @Parameter(description = "fullyQualifiedName of entity", schema = @Schema(type = "string"))
+ @QueryParam("entityFQN")
+ String entityFQN,
+ @Parameter(description = "Suggestion type being accepted", schema = @Schema(type = "string"))
+ @QueryParam("suggestionType")
+ @DefaultValue("SuggestDescription")
+ SuggestionType suggestionType) {
+ SuggestionFilter filter =
+ SuggestionFilter.builder()
+ .suggestionStatus(SuggestionStatus.Open)
+ .entityFQN(entityFQN)
+ .createdBy(userId)
+ .suggestionType(suggestionType)
+ .build();
+ List suggestions = dao.listAll(filter);
+ if (!nullOrEmpty(suggestions)) {
+ // Validate the permissions for one suggestion
+ Suggestion suggestion = dao.get(suggestions.get(0).getId());
+ dao.checkPermissionsForAcceptOrRejectSuggestion(
+ suggestion, SuggestionStatus.Rejected, securityContext);
+ dao.checkPermissionsForEditEntity(suggestion, suggestionType, securityContext, authorizer);
+ return dao.acceptSuggestionList(
+ uriInfo, suggestions, suggestionType, securityContext, authorizer);
+ } else {
+ // No suggestions found
+ return new RestUtil.PutResponse<>(
+ Response.Status.BAD_REQUEST, List.of(), SUGGESTION_REJECTED);
+ }
+ }
+
+ @PUT
+ @Path("reject-all")
+ @Operation(
+ operationId = "rejectAllSuggestion",
+ summary = "Reject all Suggestions from a user and an Entity",
+ description = "Reject all Suggestions from a user and an Entity",
+ responses = {
+ @ApiResponse(
+ responseCode = "200",
+ description = "The suggestion.",
+ content =
+ @Content(
+ mediaType = "application/json",
+ schema = @Schema(implementation = Suggestion.class))),
+ @ApiResponse(responseCode = "400", description = "Bad request")
+ })
+ public RestUtil.PutResponse> rejectAllSuggestions(
+ @Context UriInfo uriInfo,
+ @Context SecurityContext securityContext,
+ @Parameter(description = "user id", schema = @Schema(type = "string")) @QueryParam("userId")
+ UUID userId,
+ @Parameter(description = "fullyQualifiedName of entity", schema = @Schema(type = "string"))
+ @QueryParam("entityFQN")
+ String entityFQN,
+ @Parameter(description = "Suggestion type being rejected", schema = @Schema(type = "string"))
+ @QueryParam("suggestionType")
+ @DefaultValue("SuggestDescription")
+ SuggestionType suggestionType) {
+ SuggestionFilter filter =
+ SuggestionFilter.builder()
+ .suggestionStatus(SuggestionStatus.Open)
+ .entityFQN(entityFQN)
+ .createdBy(userId)
+ .suggestionType(suggestionType)
+ .build();
+ List suggestions = dao.listAll(filter);
+ if (!nullOrEmpty(suggestions)) {
+ // Validate the permissions for one suggestion
+ Suggestion suggestion = dao.get(suggestions.get(0).getId());
+ dao.checkPermissionsForAcceptOrRejectSuggestion(
+ suggestion, SuggestionStatus.Rejected, securityContext);
+ return dao.rejectSuggestionList(
+ uriInfo, suggestions, securityContext.getUserPrincipal().getName());
+ } else {
+ // No suggestions found
+ return new RestUtil.PutResponse<>(
+ Response.Status.BAD_REQUEST, List.of(), SUGGESTION_REJECTED);
+ }
+ }
+
@PUT
@Path("/{id}")
@Operation(
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/lineage/LineageResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/lineage/LineageResource.java
index 5472de2d7102..5d81fc924352 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/lineage/LineageResource.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/lineage/LineageResource.java
@@ -273,6 +273,53 @@ public Response deleteLineage(
return Response.status(Status.OK).build();
}
+ @DELETE
+ @Path("/{fromEntity}/name/{fromFQN}/{toEntity}/name/{toFQN}")
+ @Operation(
+ operationId = "deleteLineageEdgeByName",
+ summary = "Delete a lineage edge by FQNs",
+ description =
+ "Delete a lineage edge with from entity as upstream node and to entity as downstream node.",
+ responses = {
+ @ApiResponse(responseCode = "200"),
+ @ApiResponse(
+ responseCode = "404",
+ description = "Entity for instance {fromFQN} is not found")
+ })
+ public Response deleteLineageByName(
+ @Context UriInfo uriInfo,
+ @Context SecurityContext securityContext,
+ @Parameter(
+ description = "Entity type of upstream entity of the edge",
+ required = true,
+ schema = @Schema(type = "string", example = "table, report, metrics, or dashboard"))
+ @PathParam("fromEntity")
+ String fromEntity,
+ @Parameter(description = "Entity FQN", required = true, schema = @Schema(type = "string"))
+ @PathParam("fromFQN")
+ String fromFQN,
+ @Parameter(
+ description = "Entity type for downstream entity of the edge",
+ required = true,
+ schema = @Schema(type = "string", example = "table, report, metrics, or dashboard"))
+ @PathParam("toEntity")
+ String toEntity,
+ @Parameter(description = "Entity FQN", required = true, schema = @Schema(type = "string"))
+ @PathParam("toFQN")
+ String toFQN) {
+ authorizer.authorize(
+ securityContext,
+ new OperationContext(LINEAGE_FIELD, MetadataOperation.EDIT_LINEAGE),
+ new LineageResourceContext());
+ boolean deleted = dao.deleteLineageByFQN(fromEntity, fromFQN, toEntity, toFQN);
+ if (!deleted) {
+ return Response.status(NOT_FOUND)
+ .entity(new ErrorMessage(NOT_FOUND.getStatusCode(), "Lineage edge not found"))
+ .build();
+ }
+ return Response.status(Status.OK).build();
+ }
+
private EntityLineage addHref(UriInfo uriInfo, EntityLineage lineage) {
Entity.withHref(uriInfo, lineage.getEntity());
Entity.withHref(uriInfo, lineage.getNodes());
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/policies/PolicyResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/policies/PolicyResource.java
index 0734bd2b2a7f..879e5ecf028d 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/policies/PolicyResource.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/policies/PolicyResource.java
@@ -498,6 +498,7 @@ public void validateCondition(
@Parameter(description = "Expression of validating rule", schema = @Schema(type = "string"))
@PathParam("expression")
String expression) {
+ authorizer.authorizeAdmin(securityContext);
CompiledRule.validateExpression(expression, Boolean.class);
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/system/ConfigResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/system/ConfigResource.java
index 4b7785a11cd5..79e9a76fbb70 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/system/ConfigResource.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/system/ConfigResource.java
@@ -84,6 +84,8 @@ public AuthenticationConfiguration getAuthConfig() {
authenticationConfiguration.getSamlConfiguration().getIdp().getAuthorityUrl()));
authenticationConfiguration.setSamlConfiguration(ssoClientConfig);
}
+
+ authenticationConfiguration.setOidcConfiguration(null);
}
return authenticationConfiguration;
}
@@ -150,12 +152,12 @@ public LoginConfiguration getLoginConfiguration() {
@GET
@Path(("/pipeline-service-client"))
@Operation(
- operationId = "getAirflowConfiguration",
- summary = "Get airflow configuration",
+ operationId = "getPipelineServiceConfiguration",
+ summary = "Get Pipeline Service Client configuration",
responses = {
@ApiResponse(
responseCode = "200",
- description = "Airflow configuration",
+ description = "Pipeline Service Client configuration",
content =
@Content(
mediaType = "application/json",
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/system/SystemResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/system/SystemResource.java
index 93cfd5d89b51..d7193b16d0cf 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/system/SystemResource.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/system/SystemResource.java
@@ -34,16 +34,20 @@
import org.openmetadata.schema.auth.EmailRequest;
import org.openmetadata.schema.settings.Settings;
import org.openmetadata.schema.settings.SettingsType;
+import org.openmetadata.schema.system.ValidationResponse;
import org.openmetadata.schema.type.Include;
import org.openmetadata.schema.util.EntitiesCount;
import org.openmetadata.schema.util.ServicesCount;
+import org.openmetadata.sdk.PipelineServiceClient;
import org.openmetadata.service.Entity;
import org.openmetadata.service.OpenMetadataApplicationConfig;
+import org.openmetadata.service.clients.pipeline.PipelineServiceClientFactory;
import org.openmetadata.service.exception.UnhandledServerException;
import org.openmetadata.service.jdbi3.ListFilter;
import org.openmetadata.service.jdbi3.SystemRepository;
import org.openmetadata.service.resources.Collection;
import org.openmetadata.service.security.Authorizer;
+import org.openmetadata.service.security.JwtFilter;
import org.openmetadata.service.util.EmailUtil;
import org.openmetadata.service.util.ResultList;
@@ -55,19 +59,26 @@
@Collection(name = "system")
@Slf4j
public class SystemResource {
- public static final String COLLECTION_PATH = "/v1/util";
+ public static final String COLLECTION_PATH = "/v1/system";
private final SystemRepository systemRepository;
private final Authorizer authorizer;
private OpenMetadataApplicationConfig applicationConfig;
+ private PipelineServiceClient pipelineServiceClient;
+ private JwtFilter jwtFilter;
public SystemResource(Authorizer authorizer) {
this.systemRepository = Entity.getSystemRepository();
this.authorizer = authorizer;
}
- @SuppressWarnings("unused") // Method used for reflection
public void initialize(OpenMetadataApplicationConfig config) {
this.applicationConfig = config;
+ this.pipelineServiceClient =
+ PipelineServiceClientFactory.createPipelineServiceClient(
+ config.getPipelineServiceClientConfiguration());
+
+ this.jwtFilter =
+ new JwtFilter(config.getAuthenticationConfiguration(), config.getAuthorizerConfiguration());
}
public static class SettingsList extends ResultList {
@@ -287,4 +298,24 @@ public ServicesCount listServicesCount(
ListFilter filter = new ListFilter(include);
return systemRepository.getAllServicesCount(filter);
}
+
+ @GET
+ @Path("/status")
+ @Operation(
+ operationId = "validateDeployment",
+ summary = "Validate the OpenMetadata deployment",
+ description =
+ "Check connectivity against your database, elasticsearch/opensearch, migrations,...",
+ responses = {
+ @ApiResponse(
+ responseCode = "200",
+ description = "validation OK",
+ content =
+ @Content(
+ mediaType = "application/json",
+ schema = @Schema(implementation = ServicesCount.class)))
+ })
+ public ValidationResponse validate() {
+ return systemRepository.validateSystem(applicationConfig, pipelineServiceClient, jwtFilter);
+ }
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchClient.java
index d67843c3f5ad..b6ffee7c7adb 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchClient.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchClient.java
@@ -8,6 +8,7 @@
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
+import javax.json.JsonObject;
import javax.net.ssl.SSLContext;
import javax.ws.rs.core.Response;
import org.apache.commons.lang3.tuple.Pair;
@@ -87,6 +88,8 @@ Response searchLineage(
Response aggregate(String index, String fieldName, String value, String query) throws IOException;
+ JsonObject aggregate(String query, String index, JsonObject aggregationJson) throws IOException;
+
Response suggest(SearchRequest request) throws IOException;
void createEntity(String indexName, String docId, String doc);
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexUtils.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexUtils.java
index f8fc750ebbc8..cffaa6a1f916 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexUtils.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexUtils.java
@@ -3,7 +3,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
-import java.util.stream.Collectors;
+import java.util.Set;
import org.openmetadata.schema.type.EntityReference;
import org.openmetadata.schema.type.TagLabel;
@@ -15,15 +15,39 @@ public static List parseFollowers(List followersRef) {
if (followersRef == null) {
return Collections.emptyList();
}
- return followersRef.stream().map(item -> item.getId().toString()).collect(Collectors.toList());
+ return followersRef.stream().map(item -> item.getId().toString()).toList();
}
- public static void removeNonIndexableFields(Map doc, List fields) {
+ public static void removeNonIndexableFields(Map doc, Set fields) {
for (String key : fields) {
- doc.remove(key);
+ if (key.contains(".")) {
+ removeFieldByPath(doc, key);
+ } else {
+ doc.remove(key);
+ }
}
}
+ public static void removeFieldByPath(Map jsonMap, String path) {
+ String[] pathElements = path.split("\\.");
+ Map currentMap = jsonMap;
+
+ for (int i = 0; i < pathElements.length - 1; i++) {
+ String key = pathElements[i];
+ Object value = currentMap.get(key);
+ if (value instanceof Map) {
+ currentMap = (Map) value;
+ } else {
+ // Path Not Found
+ return;
+ }
+ }
+
+ // Remove the field at the last path element
+ String lastKey = pathElements[pathElements.length - 1];
+ currentMap.remove(lastKey);
+ }
+
public static List parseTags(List tags) {
if (tags == null) {
return Collections.emptyList();
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java
index 36d22d87787f..220bcc788cf3 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java
@@ -252,7 +252,7 @@ public void createEntity(EntityInterface entity) {
try {
IndexMapping indexMapping = entityIndexMap.get(entityType);
SearchIndex index = searchIndexFactory.buildIndex(entityType, entity);
- String doc = JsonUtils.pojoToJson(index.buildESDoc());
+ String doc = JsonUtils.pojoToJson(index.buildSearchIndexDoc());
searchClient.createEntity(indexMapping.getIndexName(clusterAlias), entityId, doc);
} catch (Exception ie) {
LOG.error(
@@ -280,7 +280,7 @@ public void createTimeSeriesEntity(EntityTimeSeriesInterface entity) {
try {
IndexMapping indexMapping = entityIndexMap.get(entityType);
SearchIndex index = searchIndexFactory.buildIndex(entityType, entity);
- String doc = JsonUtils.pojoToJson(index.buildESDoc());
+ String doc = JsonUtils.pojoToJson(index.buildSearchIndexDoc());
searchClient.createTimeSeriesEntity(indexMapping.getIndexName(clusterAlias), entityId, doc);
} catch (Exception ie) {
LOG.error(
@@ -309,7 +309,7 @@ public void updateEntity(EntityInterface entity) {
scriptTxt = getScriptWithParams(entity, doc);
} else {
SearchIndex elasticSearchIndex = searchIndexFactory.buildIndex(entityType, entity);
- doc = elasticSearchIndex.buildESDoc();
+ doc = elasticSearchIndex.buildSearchIndexDoc();
}
searchClient.updateEntity(
indexMapping.getIndexName(clusterAlias), entityId, doc, scriptTxt);
@@ -669,6 +669,11 @@ public Response aggregate(String index, String fieldName, String value, String q
return searchClient.aggregate(index, fieldName, value, query);
}
+ public JsonObject aggregate(String query, String index, JsonObject aggregationJson)
+ throws IOException {
+ return searchClient.aggregate(query, index, aggregationJson);
+ }
+
public Response suggest(SearchRequest request) throws IOException {
return searchClient.suggest(request);
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java
index 4d7bc8cb68c2..ed704139686b 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java
@@ -105,6 +105,7 @@
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
+import javax.json.JsonObject;
import javax.net.ssl.SSLContext;
import javax.ws.rs.core.Response;
import lombok.extern.slf4j.Slf4j;
@@ -704,6 +705,82 @@ public Response aggregate(String index, String fieldName, String value, String q
return Response.status(OK).entity(response).build();
}
+ /*
+ Build dynamic aggregation from elasticsearch JSON like aggregation query.
+ See TestSuiteResourceTest for example usage (ln. 506) for tested aggregation query.
+
+ @param aggregations - JsonObject containing the aggregation query
+ */
+ public static List buildAggregation(JsonObject aggregations) {
+ List aggregationBuilders = new ArrayList<>();
+ for (String key : aggregations.keySet()) {
+ JsonObject aggregation = aggregations.getJsonObject(key);
+ for (String aggregationType : aggregation.keySet()) {
+ switch (aggregationType) {
+ case "terms":
+ JsonObject termAggregation = aggregation.getJsonObject(aggregationType);
+ TermsAggregationBuilder termsAggregationBuilder =
+ AggregationBuilders.terms(key).field(termAggregation.getString("field"));
+ aggregationBuilders.add(termsAggregationBuilder);
+ break;
+ case "nested":
+ JsonObject nestedAggregation = aggregation.getJsonObject("nested");
+ AggregationBuilder nestedAggregationBuilder =
+ AggregationBuilders.nested(
+ nestedAggregation.getString("path"), nestedAggregation.getString("path"));
+ JsonObject nestedAggregations = aggregation.getJsonObject("aggs");
+
+ List nestedAggregationBuilders =
+ buildAggregation(nestedAggregations);
+ for (AggregationBuilder nestedAggregationBuilder1 : nestedAggregationBuilders) {
+ nestedAggregationBuilder.subAggregation(nestedAggregationBuilder1);
+ }
+ aggregationBuilders.add(nestedAggregationBuilder);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return aggregationBuilders;
+ }
+
+ @Override
+ public JsonObject aggregate(String query, String index, JsonObject aggregationJson)
+ throws IOException {
+ JsonObject aggregations = aggregationJson.getJsonObject("aggregations");
+ if (aggregations == null) {
+ return null;
+ }
+
+ List aggregationBuilder = buildAggregation(aggregations);
+ es.org.elasticsearch.action.search.SearchRequest searchRequest =
+ new es.org.elasticsearch.action.search.SearchRequest(
+ Entity.getSearchRepository().getIndexOrAliasName(index));
+ SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
+ if (query != null) {
+ XContentParser queryParser =
+ XContentType.JSON
+ .xContent()
+ .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, query);
+ QueryBuilder parsedQuery = SearchSourceBuilder.fromXContent(queryParser).query();
+ BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().must(parsedQuery);
+ searchSourceBuilder.query(boolQueryBuilder);
+ }
+
+ searchSourceBuilder.size(0).timeout(new TimeValue(30, TimeUnit.SECONDS));
+
+ for (AggregationBuilder aggregation : aggregationBuilder) {
+ searchSourceBuilder.aggregation(aggregation);
+ }
+
+ searchRequest.source(searchSourceBuilder);
+
+ String response = client.search(searchRequest, RequestOptions.DEFAULT).toString();
+ JsonObject jsonResponse = JsonUtils.readJson(response).asJsonObject();
+ return jsonResponse.getJsonObject("aggregations");
+ }
+
private static ScriptScoreFunctionBuilder boostScore() {
return ScoreFunctionBuilders.scriptFunction(
"double score = _score;"
@@ -819,6 +896,7 @@ private static SearchSourceBuilder buildDashboardSearchBuilder(String query, int
.aggregation(
AggregationBuilders.terms("dataModels.displayName.keyword")
.field("dataModels.displayName.keyword"))
+ .aggregation(AggregationBuilders.terms("project.keyword").field("project.keyword"))
.aggregation(
AggregationBuilders.terms("charts.displayName.keyword")
.field("charts.displayName.keyword"));
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchDataInsightProcessor.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchDataInsightProcessor.java
index c4fe74ee6ef7..d10afe6a18bd 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchDataInsightProcessor.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchDataInsightProcessor.java
@@ -86,7 +86,8 @@ private UpdateRequest getUpdateRequest(String entityType, ReportData reportData)
indexMapping.getIndexName(Entity.getSearchRepository().getClusterAlias()),
reportData.getId().toString());
updateRequest.doc(
- JsonUtils.pojoToJson(new ReportDataIndexes(reportData).buildESDoc()), XContentType.JSON);
+ JsonUtils.pojoToJson(new ReportDataIndexes(reportData).buildSearchIndexDoc()),
+ XContentType.JSON);
updateRequest.docAsUpsert(true);
return updateRequest;
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchEntitiesProcessor.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchEntitiesProcessor.java
index 7d07972c8900..13313aeb034e 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchEntitiesProcessor.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchEntitiesProcessor.java
@@ -89,7 +89,8 @@ public static UpdateRequest getUpdateRequest(String entityType, EntityInterface
entity.getId().toString());
updateRequest.doc(
JsonUtils.pojoToJson(
- Objects.requireNonNull(Entity.buildSearchIndex(entityType, entity)).buildESDoc()),
+ Objects.requireNonNull(Entity.buildSearchIndex(entityType, entity))
+ .buildSearchIndexDoc()),
XContentType.JSON);
updateRequest.docAsUpsert(true);
return updateRequest;
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/AggregatedCostAnalysisReportDataIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/AggregatedCostAnalysisReportDataIndex.java
index 8341391d20fc..354105c19406 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/AggregatedCostAnalysisReportDataIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/AggregatedCostAnalysisReportDataIndex.java
@@ -2,13 +2,16 @@
import java.util.Map;
import org.openmetadata.schema.analytics.ReportData;
-import org.openmetadata.service.util.JsonUtils;
public record AggregatedCostAnalysisReportDataIndex(ReportData reportData) implements SearchIndex {
@Override
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(reportData);
+ public Object getEntity() {
+ return reportData;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
doc.put("entityType", "aggregatedCostAnalysisReportData");
return doc;
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ChartIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ChartIndex.java
index fca43e42386e..721f57e4b83a 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ChartIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ChartIndex.java
@@ -8,14 +8,11 @@
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record ChartIndex(Chart chart) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(chart);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(chart.getName()).weight(10).build());
suggest.add(SearchSuggest.builder().input(chart.getFullyQualifiedName()).weight(5).build());
@@ -27,6 +24,7 @@ public Map buildESDoc() {
doc.put("entityType", Entity.CHART);
doc.put("owner", getEntityWithDisplayName(chart.getOwner()));
doc.put("domain", getEntityWithDisplayName(chart.getDomain()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(chart.getFollowers()));
doc.put(
"totalVotes",
CommonUtil.nullOrEmpty(chart.getVotes())
@@ -34,4 +32,9 @@ public Map buildESDoc() {
: chart.getVotes().getUpVotes() - chart.getVotes().getDownVotes());
return doc;
}
+
+ @Override
+ public Object getEntity() {
+ return chart;
+ }
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ClassificationIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ClassificationIndex.java
index ce6a80712929..b2215af369ef 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ClassificationIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ClassificationIndex.java
@@ -3,19 +3,15 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import java.util.stream.Collectors;
import org.openmetadata.schema.entity.classification.Classification;
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record ClassificationIndex(Classification classification) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(classification);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(classification.getName()).weight(10).build());
suggest.add(
@@ -24,10 +20,16 @@ public Map buildESDoc() {
"fqnParts",
getFQNParts(
classification.getFullyQualifiedName(),
- suggest.stream().map(SearchSuggest::getInput).collect(Collectors.toList())));
+ suggest.stream().map(SearchSuggest::getInput).toList()));
doc.put("suggest", suggest);
doc.put("entityType", Entity.CLASSIFICATION);
doc.put("owner", getEntityWithDisplayName(classification.getOwner()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(classification.getFollowers()));
return doc;
}
+
+ @Override
+ public Object getEntity() {
+ return classification;
+ }
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java
index 97b200caf9dc..a3694ea8655a 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java
@@ -15,19 +15,21 @@
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.FlattenColumn;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record ContainerIndex(Container container) implements ColumnIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(container);
+ @Override
+ public Object getEntity() {
+ return container;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
List columnSuggest = new ArrayList<>();
List serviceSuggest = new ArrayList<>();
Set> tagsWithChildren = new HashSet<>();
List columnsWithChildrenName = new ArrayList<>();
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
suggest.add(SearchSuggest.builder().input(container.getFullyQualifiedName()).weight(5).build());
suggest.add(SearchSuggest.builder().input(container.getName()).weight(10).build());
if (container.getDataModel() != null && container.getDataModel().getColumns() != null) {
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardDataModelIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardDataModelIndex.java
index 21f136c0602c..538688f89b64 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardDataModelIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardDataModelIndex.java
@@ -15,15 +15,17 @@
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.FlattenColumn;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record DashboardDataModelIndex(DashboardDataModel dashboardDataModel)
implements ColumnIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(dashboardDataModel);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return dashboardDataModel;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
List columnSuggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(dashboardDataModel.getName()).weight(10).build());
@@ -34,7 +36,6 @@ public Map buildESDoc() {
.build());
Set> tagsWithChildren = new HashSet<>();
List columnsWithChildrenName = new ArrayList<>();
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
if (dashboardDataModel.getColumns() != null) {
List cols = new ArrayList<>();
parseColumns(dashboardDataModel.getColumns(), cols, null);
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardIndex.java
index 635767c1c675..5ce774c99651 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardIndex.java
@@ -12,19 +12,21 @@
import org.openmetadata.service.search.ParseTags;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public class DashboardIndex implements SearchIndex {
final Dashboard dashboard;
- final List excludeFields = List.of("changeDescription");
public DashboardIndex(Dashboard dashboard) {
this.dashboard = dashboard;
}
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(dashboard);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return dashboard;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
List serviceSuggest = new ArrayList<>();
List chartSuggest = new ArrayList<>();
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardServiceIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardServiceIndex.java
index 3c0e0729da51..cdabefd7d96a 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardServiceIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardServiceIndex.java
@@ -7,15 +7,16 @@
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record DashboardServiceIndex(DashboardService dashboardService) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
+ @Override
+ public Object getEntity() {
+ return dashboardService;
+ }
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(dashboardService);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(dashboardService.getName()).weight(5).build());
suggest.add(
@@ -29,6 +30,7 @@ public Map buildESDoc() {
doc.put("entityType", Entity.DASHBOARD_SERVICE);
doc.put("owner", getEntityWithDisplayName(dashboardService.getOwner()));
doc.put("domain", getEntityWithDisplayName(dashboardService.getDomain()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(dashboardService.getFollowers()));
return doc;
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DataProductIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DataProductIndex.java
index 044e7aeeab72..310d7ca3907e 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DataProductIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DataProductIndex.java
@@ -7,14 +7,16 @@
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record DataProductIndex(DataProduct dataProduct) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(dataProduct);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return dataProduct;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(dataProduct.getName()).weight(5).build());
suggest.add(
@@ -27,6 +29,7 @@ public Map buildESDoc() {
doc.put("entityType", Entity.DATA_PRODUCT);
doc.put("owner", getEntityWithDisplayName(dataProduct.getOwner()));
doc.put("domain", getEntityWithDisplayName(dataProduct.getDomain()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(dataProduct.getFollowers()));
return doc;
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseIndex.java
index 5c8623c111d0..0ed613b05493 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseIndex.java
@@ -8,14 +8,16 @@
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record DatabaseIndex(Database database) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(database);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return database;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(database.getName()).weight(5).build());
suggest.add(SearchSuggest.builder().input(database.getFullyQualifiedName()).weight(5).build());
@@ -33,6 +35,7 @@ public Map buildESDoc() {
: database.getVotes().getUpVotes() - database.getVotes().getDownVotes());
doc.put("owner", getEntityWithDisplayName(database.getOwner()));
doc.put("domain", getEntityWithDisplayName(database.getDomain()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(database.getFollowers()));
return doc;
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseSchemaIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseSchemaIndex.java
index 0dde36a0546b..39984999f845 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseSchemaIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseSchemaIndex.java
@@ -8,14 +8,16 @@
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record DatabaseSchemaIndex(DatabaseSchema databaseSchema) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(databaseSchema);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return databaseSchema;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(databaseSchema.getName()).weight(5).build());
suggest.add(
@@ -34,6 +36,7 @@ public Map buildESDoc() {
? 0
: databaseSchema.getVotes().getUpVotes() - databaseSchema.getVotes().getDownVotes());
doc.put("domain", getEntityWithDisplayName(databaseSchema.getDomain()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(databaseSchema.getFollowers()));
return doc;
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseServiceIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseServiceIndex.java
index 428d5b7e7405..7297d5f4ec13 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseServiceIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseServiceIndex.java
@@ -3,19 +3,20 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import java.util.stream.Collectors;
import org.openmetadata.schema.entity.services.DatabaseService;
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record DatabaseServiceIndex(DatabaseService databaseService) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(databaseService);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return databaseService;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(databaseService.getName()).weight(5).build());
suggest.add(
@@ -24,11 +25,12 @@ public Map buildESDoc() {
"fqnParts",
getFQNParts(
databaseService.getFullyQualifiedName(),
- suggest.stream().map(SearchSuggest::getInput).collect(Collectors.toList())));
+ suggest.stream().map(SearchSuggest::getInput).toList()));
doc.put("suggest", suggest);
doc.put("entityType", Entity.DATABASE_SERVICE);
doc.put("owner", getEntityWithDisplayName(databaseService.getOwner()));
doc.put("domain", getEntityWithDisplayName(databaseService.getDomain()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(databaseService.getFollowers()));
return doc;
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DomainIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DomainIndex.java
index 88ce31931988..bac0c7e6c616 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DomainIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DomainIndex.java
@@ -7,14 +7,16 @@
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record DomainIndex(Domain domain) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(domain);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return domain;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(domain.getName()).weight(5).build());
suggest.add(SearchSuggest.builder().input(domain.getFullyQualifiedName()).weight(5).build());
@@ -25,6 +27,7 @@ public Map buildESDoc() {
suggest.stream().map(SearchSuggest::getInput).toList()));
doc.put("suggest", suggest);
doc.put("entityType", Entity.DOMAIN);
+ doc.put("followers", SearchIndexUtils.parseFollowers(domain.getFollowers()));
return doc;
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/EntityReportDataIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/EntityReportDataIndex.java
index 857323ee299d..384c4479497a 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/EntityReportDataIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/EntityReportDataIndex.java
@@ -2,13 +2,16 @@
import java.util.Map;
import org.openmetadata.schema.analytics.ReportData;
-import org.openmetadata.service.util.JsonUtils;
public record EntityReportDataIndex(ReportData reportData) implements SearchIndex {
@Override
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(reportData);
+ public Object getEntity() {
+ return reportData;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
doc.put("entityType", "entityReportData");
return doc;
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryIndex.java
index 960c05cf73ad..b773c34bffa6 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryIndex.java
@@ -8,19 +8,21 @@
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public class GlossaryIndex implements SearchIndex {
final Glossary glossary;
- final List excludeFields = List.of("changeDescription");
public GlossaryIndex(Glossary glossary) {
this.glossary = glossary;
}
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(glossary);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return glossary;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(glossary.getName()).weight(5).build());
if (glossary.getDisplayName() != null && !glossary.getDisplayName().isEmpty()) {
@@ -40,6 +42,7 @@ public Map buildESDoc() {
? 0
: glossary.getVotes().getUpVotes() - glossary.getVotes().getDownVotes());
doc.put("domain", getEntityWithDisplayName(glossary.getDomain()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(glossary.getFollowers()));
return doc;
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryTermIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryTermIndex.java
index 244c2eeb5c89..7a1d48dc1c50 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryTermIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryTermIndex.java
@@ -8,19 +8,21 @@
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public class GlossaryTermIndex implements SearchIndex {
final GlossaryTerm glossaryTerm;
- final List excludeFields = List.of("changeDescription");
public GlossaryTermIndex(GlossaryTerm glossaryTerm) {
this.glossaryTerm = glossaryTerm;
}
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(glossaryTerm);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return glossaryTerm;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(glossaryTerm.getName()).weight(5).build());
if (glossaryTerm.getDisplayName() != null && !glossaryTerm.getDisplayName().isEmpty()) {
@@ -40,6 +42,7 @@ public Map buildESDoc() {
: glossaryTerm.getVotes().getUpVotes() - glossaryTerm.getVotes().getDownVotes());
doc.put("owner", getEntityWithDisplayName(glossaryTerm.getOwner()));
doc.put("domain", getEntityWithDisplayName(glossaryTerm.getDomain()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(glossaryTerm.getFollowers()));
return doc;
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/IngestionPipelineIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/IngestionPipelineIndex.java
index 2432cfe82714..c69a40962f9d 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/IngestionPipelineIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/IngestionPipelineIndex.java
@@ -3,26 +3,34 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.openmetadata.common.utils.CommonUtil;
import org.openmetadata.schema.entity.services.ingestionPipelines.IngestionPipeline;
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.ParseTags;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public class IngestionPipelineIndex implements SearchIndex {
final IngestionPipeline ingestionPipeline;
- final List excludeFields =
- List.of("changeDescription", "sourceConfig", "openMetadataServerConnection", "airflowConfig");
+ final Set excludeFields =
+ Set.of("sourceConfig", "openMetadataServerConnection", "airflowConfig");
public IngestionPipelineIndex(IngestionPipeline ingestionPipeline) {
this.ingestionPipeline = ingestionPipeline;
}
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(ingestionPipeline);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return ingestionPipeline;
+ }
+
+ public Set getExcludedFields() {
+ return excludeFields;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
List serviceSuggest = new ArrayList<>();
suggest.add(
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MessagingServiceIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MessagingServiceIndex.java
index 11f26bb28341..adcc4c360f51 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MessagingServiceIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MessagingServiceIndex.java
@@ -3,19 +3,20 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import java.util.stream.Collectors;
import org.openmetadata.schema.entity.services.MessagingService;
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record MessagingServiceIndex(MessagingService messagingService) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(messagingService);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return messagingService;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(messagingService.getName()).weight(5).build());
suggest.add(
@@ -24,11 +25,12 @@ public Map buildESDoc() {
"fqnParts",
getFQNParts(
messagingService.getFullyQualifiedName(),
- suggest.stream().map(SearchSuggest::getInput).collect(Collectors.toList())));
+ suggest.stream().map(SearchSuggest::getInput).toList()));
doc.put("suggest", suggest);
doc.put("entityType", Entity.MESSAGING_SERVICE);
doc.put("owner", getEntityWithDisplayName(messagingService.getOwner()));
doc.put("domain", getEntityWithDisplayName(messagingService.getDomain()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(messagingService.getFollowers()));
return doc;
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MetadataServiceIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MetadataServiceIndex.java
index 817e7c4bd865..7c3042d82831 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MetadataServiceIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MetadataServiceIndex.java
@@ -3,19 +3,20 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import java.util.stream.Collectors;
import org.openmetadata.schema.entity.services.MetadataService;
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record MetadataServiceIndex(MetadataService metadataService) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(metadataService);
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
+ @Override
+ public Object getEntity() {
+ return metadataService;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
suggest.add(SearchSuggest.builder().input(metadataService.getName()).weight(5).build());
suggest.add(
@@ -24,10 +25,11 @@ public Map buildESDoc() {
"fqnParts",
getFQNParts(
metadataService.getFullyQualifiedName(),
- suggest.stream().map(SearchSuggest::getInput).collect(Collectors.toList())));
+ suggest.stream().map(SearchSuggest::getInput).toList()));
doc.put("suggest", suggest);
doc.put("entityType", Entity.METADATA_SERVICE);
doc.put("owner", getEntityWithDisplayName(metadataService.getOwner()));
+ doc.put("followers", SearchIndexUtils.parseFollowers(metadataService.getFollowers()));
return doc;
}
}
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MlModelIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MlModelIndex.java
index f9d890e51aff..095d57af6c3f 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MlModelIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MlModelIndex.java
@@ -9,20 +9,22 @@
import org.openmetadata.service.search.ParseTags;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public class MlModelIndex implements SearchIndex {
final MlModel mlModel;
- final List excludeFields = List.of("changeDescription");
public MlModelIndex(MlModel mlModel) {
this.mlModel = mlModel;
}
- public Map buildESDoc() {
- Map doc = JsonUtils.getMap(mlModel);
+ @Override
+ public Object getEntity() {
+ return mlModel;
+ }
+
+ @Override
+ public Map buildSearchIndexDocInternal(Map doc) {
List suggest = new ArrayList<>();
- SearchIndexUtils.removeNonIndexableFields(doc, excludeFields);
suggest.add(SearchSuggest.builder().input(mlModel.getFullyQualifiedName()).weight(5).build());
suggest.add(SearchSuggest.builder().input(mlModel.getName()).weight(10).build());
diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MlModelServiceIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MlModelServiceIndex.java
index 71db73140d27..d618de74f18f 100644
--- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MlModelServiceIndex.java
+++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/MlModelServiceIndex.java
@@ -3,19 +3,20 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import java.util.stream.Collectors;
import org.openmetadata.schema.entity.services.MlModelService;
import org.openmetadata.service.Entity;
import org.openmetadata.service.search.SearchIndexUtils;
import org.openmetadata.service.search.models.SearchSuggest;
-import org.openmetadata.service.util.JsonUtils;
public record MlModelServiceIndex(MlModelService mlModelService) implements SearchIndex {
- private static final List excludeFields = List.of("changeDescription");
- public Map