Skip to content

Commit

Permalink
Fix: changed ContainsAnySubstringFromSlice to ContainsAnyStringFromSl…
Browse files Browse the repository at this point in the history
…ice for Unsupported datatypes handling (#1673)

Description:

Fixes the issue where some user-defined type name can have a substring of any unsupported type.
https://yugabyte.atlassian.net/browse/DB-12623

Verified for all the source types that string full match is fine as other information for types like length, precision, and scale for column type is not present in the `Data_Type`/ `typname` column of the catalog queries we use.
Note: just for Oracle, timestamp and interval types can have this information in the column but we don't have any of these as unsupported.

Tests:
Added automation for this case of type name in basic-non-public-live-test for all workflows.
  • Loading branch information
priyanshi-yb authored Aug 29, 2024
1 parent dd871b1 commit 3d8f270
Show file tree
Hide file tree
Showing 12 changed files with 89 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,9 @@ INSERT INTO user_table (email) VALUES
('user5@example.com'),
('user6@example.com'),
('user7@example.com'),
('user8@example.com');
('user8@example.com');

INSERT INTO test_enum values(1, 'duplicate_payment_method');
INSERT INTO test_enum values(2, 'server_failure');
INSERT INTO test_enum values(3, 'server_failure');
INSERT INTO test_enum values(4, 'duplicate_payment_method');
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,12 @@ CREATE TABLE user_table (
status VARCHAR(50) DEFAULT 'active'
);

CREATE TYPE decline_reason AS ENUM (
'duplicate_payment_method',
'server_failure'
);

CREATE TABLE test_enum (
id int PRIMARY KEY,
reason decline_reason
);
10 changes: 9 additions & 1 deletion migtests/tests/pg/basic-non-public-live-test/source_delta.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,12 @@ UPDATE user_table SET email = 'updated_user4@example.com' WHERE id = 4;
UPDATE user_table SET email = 'user4@example.com' WHERE id = 6;

-- events with NULL value for unique key columns
UPDATE user_table SET status = 'inactive' where id > 0;
UPDATE user_table SET status = 'inactive' where id > 0;

--events for test_enum table
INSERT INTO test_enum values(5, 'duplicate_payment_method');
INSERT INTO test_enum values(6, 'server_failure');

UPDATE test_enum set reason = 'server_failure' where id = 4;

DELETE from test_enum where id = 2;
8 changes: 8 additions & 0 deletions migtests/tests/pg/basic-non-public-live-test/target_delta.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,11 @@ UPDATE user_table SET email = 'user4@example.com' WHERE id = 4;

-- events with NULL value for unique key columns
UPDATE user_table SET status = 'active' where id > 0;

--events for test_enum table
INSERT INTO test_enum values(7, 'duplicate_payment_method');
INSERT INTO test_enum values(8, 'server_failure');

UPDATE test_enum set reason = 'server_failure' where id = 6;

DELETE from test_enum where id = 5;
10 changes: 9 additions & 1 deletion migtests/tests/pg/basic-non-public-live-test/validate
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def main():

EXPECTED_ROW_COUNT = {
'x':3,
'user_table': 8
'user_table': 8,
'test_enum':4
}

EXPECTED_DATA_TYPES = {
Expand All @@ -32,6 +33,10 @@ EXPECTED_DATA_TYPES = {
'id': 'integer',
'email': 'character varying',
'status': 'character varying'
},
'test_enum': {
'id': 'integer',
'reason': 'USER-DEFINED'
}
}

Expand All @@ -42,6 +47,9 @@ EXPECTED_SUM_OF_COLUMN = {
},
'user_table': {
'id': '36',
},
'test_enum':{
'id':'10'
}
}

Expand Down
16 changes: 14 additions & 2 deletions migtests/tests/pg/basic-non-public-live-test/validateAfterChanges
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def main():

EXPECTED_ROW_COUNT = {
'x':5,
'user_table': 8
'user_table': 8,
'test_enum': 5
}

EXPECTED_DATA_TYPES = {
Expand All @@ -34,6 +35,10 @@ EXPECTED_DATA_TYPES = {
'id': 'integer',
'email': 'character varying',
'status': 'character varying'
},
'test_enum': {
'id': 'integer',
'reason': 'USER-DEFINED'
}
}

Expand All @@ -44,12 +49,16 @@ EXPECTED_SUM_OF_COLUMN = {
},
'user_table': {
'id': '51',
},
'test_enum': {
'id':'19',
}
}

EXPECTED_ROW_COUNT_FF = {
'x':7,
'user_table': 8
'user_table': 8,
'test_enum': 6
}

EXPECTED_SUM_OF_COLUMN_FF = {
Expand All @@ -59,6 +68,9 @@ EXPECTED_SUM_OF_COLUMN_FF = {
},
'user_table': {
'id': '49',
},
'test_enum':{
'id':'29'
}
}

Expand Down
7 changes: 5 additions & 2 deletions yb-voyager/cmd/assessMigrationCommand.go
Original file line number Diff line number Diff line change
Expand Up @@ -923,10 +923,13 @@ func fetchColumnsWithUnsupportedDataTypes() ([]utils.TableColumnsDataTypes, []ut

// filter columns with unsupported data types using sourceUnsupportedDataTypes
for i := 0; i < len(allColumnsDataTypes); i++ {
if utils.ContainsAnySubstringFromSlice(sourceUnsupportedDataTypes, allColumnsDataTypes[i].DataType) {
//Using this ContainsAnyStringFromSlice as the catalog we use for fetching datatypes uses the data_type only
// which just contains the base type for example VARCHARs it won't include any length, precision or scale information
//of these types there are other columns available for these information so we just do string match of types with our list
if utils.ContainsAnyStringFromSlice(sourceUnsupportedDataTypes, allColumnsDataTypes[i].DataType) {
unsupportedDataTypes = append(unsupportedDataTypes, allColumnsDataTypes[i])
}
if utils.ContainsAnySubstringFromSlice(liveMigrationUnsupportedDataTypes, allColumnsDataTypes[i].DataType) {
if utils.ContainsAnyStringFromSlice(liveMigrationUnsupportedDataTypes, allColumnsDataTypes[i].DataType) {
unsupportedDataTypesForLiveMigration = append(unsupportedDataTypesForLiveMigration, allColumnsDataTypes[i])
}
}
Expand Down
5 changes: 4 additions & 1 deletion yb-voyager/src/srcdb/mysql.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,10 @@ func (ms *MySQL) GetColumnsWithSupportedTypes(tableList []sqlname.NameTuple, use
_, tname := tableName.ForCatalogQuery()
var unsupportedColumnNames []string
for i := 0; i < len(columns); i++ {
if utils.ContainsAnySubstringFromSlice(mysqlUnsupportedDataTypes, dataTypes[i]) {
//Using this ContainsAnyStringFromSlice as the catalog we use for fetching datatypes uses the data_type only
// which just contains the base type for example VARCHARs it won't include any length, precision or scale information
//of these types there are other columns available for these information so we just do string match of types with our list
if utils.ContainsAnyStringFromSlice(mysqlUnsupportedDataTypes, dataTypes[i]) {
log.Infof("Skipping unsupproted column %s.%s of type %s", tname, columns[i], dataTypes[i])
unsupportedColumnNames = append(unsupportedColumnNames, fmt.Sprintf("%s.%s of type %s", tname, columns[i], dataTypes[i]))
} else {
Expand Down
5 changes: 4 additions & 1 deletion yb-voyager/src/srcdb/oracle.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,10 @@ func (ora *Oracle) GetColumnsWithSupportedTypes(tableList []sqlname.NameTuple, u
var unsupportedColumnNames []string
for i := 0; i < len(columns); i++ {
isUdtWithDebezium := (dataTypesOwner[i] == sname) && useDebezium // datatype owner check is for UDT type detection as VARRAY are created using UDT
if isUdtWithDebezium || utils.ContainsAnySubstringFromSlice(OracleUnsupportedDataTypes, dataTypes[i]) {
//Using this ContainsAnyStringFromSlice as the catalog we use for fetching datatypes uses the data_type only
// which just contains the base type for example VARCHARs it won't include any length, precision or scale information
//of these types there are other columns available for these information so we just do string match of types with our list
if isUdtWithDebezium || utils.ContainsAnyStringFromSlice(OracleUnsupportedDataTypes, dataTypes[i]) {
log.Infof("Skipping unsupproted column %s.%s of type %s", tname, columns[i], dataTypes[i])
unsupportedColumnNames = append(unsupportedColumnNames, fmt.Sprintf("%s.%s of type %s", tname, columns[i], dataTypes[i]))
} else {
Expand Down
7 changes: 6 additions & 1 deletion yb-voyager/src/srcdb/postgres.go
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,12 @@ func (pg *PostgreSQL) GetColumnsWithSupportedTypes(tableList []sqlname.NameTuple
var supportedColumnNames []string
for i, column := range columns {
if useDebezium || isStreamingEnabled {
if utils.ContainsAnySubstringFromSlice(PostgresUnsupportedDataTypesForDbzm, dataTypes[i]) {
//Using this ContainsAnyStringFromSlice as the catalog we use for fetching datatypes uses the data_type only
// which just contains the base type for example VARCHARs it won't include any length, precision or scale information
//of these types there are other columns available for these information so we just do string match of types with our list
//And also for geometry or complex types like if a column is defined with public.geometry(Point,4326) then also only geometry is available
//in the typname column of those catalog tables and further details (Point,4326) is managed by Postgis extension.
if utils.ContainsAnyStringFromSlice(PostgresUnsupportedDataTypesForDbzm, dataTypes[i]) {
unsupportedColumnNames = append(unsupportedColumnNames, column)
} else {
supportedColumnNames = append(supportedColumnNames, column)
Expand Down
10 changes: 6 additions & 4 deletions yb-voyager/src/srcdb/yugabytedb.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,15 @@ import (
"time"

"github.com/google/uuid"
"github.com/jackc/pglogrepl"
"github.com/jackc/pgx/v5/pgconn"
"github.com/samber/lo"
log "github.com/sirupsen/logrus"
"golang.org/x/exp/slices"

"github.com/yugabyte/yb-voyager/yb-voyager/src/datafile"
"github.com/yugabyte/yb-voyager/yb-voyager/src/utils"
"github.com/yugabyte/yb-voyager/yb-voyager/src/utils/sqlname"

"github.com/jackc/pglogrepl"
"github.com/jackc/pgx/v5/pgconn"
)

var yugabyteUnsupportedDataTypesForDbzm = []string{"BOX", "CIRCLE", "LINE", "LSEG", "PATH", "PG_LSN", "POINT", "POLYGON", "TSQUERY", "TSVECTOR", "TXID_SNAPSHOT", "GEOMETRY", "GEOGRAPHY", "RASTER"}
Expand Down Expand Up @@ -625,7 +624,10 @@ func (yb *YugabyteDB) GetColumnsWithSupportedTypes(tableList []sqlname.NameTuple
var unsupportedColumnNames []string
for i, column := range columns {
if useDebezium || isStreamingEnabled {
if utils.ContainsAnySubstringFromSlice(yugabyteUnsupportedDataTypesForDbzm, dataTypes[i]) {
//Using this ContainsAnyStringFromSlice as the catalog we use for fetching datatypes uses the data_type only
// which just contains the base type for example VARCHARs it won't include any length, precision or scale information
//of these types there are other columns available for these information so we just do string match of types with our list
if utils.ContainsAnyStringFromSlice(yugabyteUnsupportedDataTypesForDbzm, dataTypes[i]) {
unsupportedColumnNames = append(unsupportedColumnNames, column)
} else {
supportedColumnNames = append(supportedColumnNames, column)
Expand Down
9 changes: 9 additions & 0 deletions yb-voyager/src/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,15 @@ func ContainsAnySubstringFromSlice(slice []string, s string) bool {
return false
}

func ContainsAnyStringFromSlice(slice []string, s string) bool {
for i := 0; i < len(slice); i++ {
if strings.EqualFold(s, slice[i]) {
return true
}
}
return false
}

func WaitForLineInLogFile(filePath string, message string, timeoutDuration time.Duration) error {
// Wait for log file to be created
timeout := time.After(timeoutDuration)
Expand Down

0 comments on commit 3d8f270

Please sign in to comment.