From 38bc519d644b1d6a315514c5aaf4e16c7fa261ab Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 22 May 2024 17:55:12 +0300 Subject: [PATCH 1/4] schemadiff: normalize missing column collation Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schemadiff/column.go | 15 ++++++++++++++- go/vt/schemadiff/table_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/go/vt/schemadiff/column.go b/go/vt/schemadiff/column.go index ae341776ccc..6a635119079 100644 --- a/go/vt/schemadiff/column.go +++ b/go/vt/schemadiff/column.go @@ -104,7 +104,13 @@ func (c *ColumnDefinitionEntity) ColumnDiff( ) (*ModifyColumnDiff, error) { if c.IsTextual() || other.IsTextual() { // We will now denormalize the columns charset & collate as needed (if empty, populate from table.) - + if c.columnDefinition.Type.Charset.Name != "" && c.columnDefinition.Type.Options.Collate == "" { + collation := env.CollationEnv().DefaultCollationForCharset(c.columnDefinition.Type.Charset.Name) + if collation == collations.Unknown { + return nil, &UnknownColumnCharsetCollationError{Column: c.columnDefinition.Name.String(), Charset: t1cc.charset} + } + c.columnDefinition.Type.Options.Collate = env.CollationEnv().LookupName(collation) + } if c.columnDefinition.Type.Charset.Name == "" && c.columnDefinition.Type.Options.Collate != "" { // Column has explicit collation but no charset. We can infer the charset from the collation. collationID := env.CollationEnv().LookupByName(c.columnDefinition.Type.Options.Collate) @@ -137,6 +143,13 @@ func (c *ColumnDefinitionEntity) ColumnDiff( c.columnDefinition.Type.Options.Collate = env.CollationEnv().LookupName(collation) } } + if other.columnDefinition.Type.Charset.Name != "" && other.columnDefinition.Type.Options.Collate == "" { + collation := env.CollationEnv().DefaultCollationForCharset(other.columnDefinition.Type.Charset.Name) + if collation == collations.Unknown { + return nil, &UnknownColumnCharsetCollationError{Column: other.columnDefinition.Name.String(), Charset: t1cc.charset} + } + other.columnDefinition.Type.Options.Collate = env.CollationEnv().LookupName(collation) + } if other.columnDefinition.Type.Charset.Name == "" && other.columnDefinition.Type.Options.Collate != "" { // Column has explicit collation but no charset. We can infer the charset from the collation. collationID := env.CollationEnv().LookupByName(other.columnDefinition.Type.Options.Collate) diff --git a/go/vt/schemadiff/table_test.go b/go/vt/schemadiff/table_test.go index 09997057e16..1a82d73c722 100644 --- a/go/vt/schemadiff/table_test.go +++ b/go/vt/schemadiff/table_test.go @@ -1896,6 +1896,26 @@ func TestCreateTableDiff(t *testing.T) { from: "create table t (id int primary key, v varchar(64) character set utf8mb3 collate utf8mb3_bin)", to: "create table t (id int primary key, v varchar(64) collate utf8mb3_bin)", }, + { + name: "ignore identical implicit ascii charset", + from: "create table t (id int primary key, v varchar(64) character set ascii collate ascii_general_ci)", + to: "create table t (id int primary key, v varchar(64) collate ascii_general_ci)", + }, + { + name: "ignore identical implicit collation", + from: "create table t (id int primary key, v varchar(64) character set utf8mb3 collate utf8mb3_general_ci)", + to: "create table t (id int primary key, v varchar(64) character set utf8mb3)", + }, + { + name: "ignore identical implicit collation, reverse", + from: "create table t (id int primary key, v varchar(64) character set utf8mb3)", + to: "create table t (id int primary key, v varchar(64) character set utf8mb3 collate utf8mb3_general_ci)", + }, + { + name: "ignore identical implicit ascii collation", + from: "create table t (id int primary key, v varchar(64) character set ascii collate ascii_general_ci)", + to: "create table t (id int primary key, v varchar(64) character set ascii)", + }, { name: "normalized unsigned attribute", from: "create table t1 (id int primary key)", @@ -2925,6 +2945,11 @@ func TestNormalize(t *testing.T) { from: "create table t (id int primary key, v varchar(255) charset utf8mb4 collate utf8mb4_german2_ci)", to: "CREATE TABLE `t` (\n\t`id` int,\n\t`v` varchar(255) COLLATE utf8mb4_german2_ci,\n\tPRIMARY KEY (`id`)\n)", }, + { + name: "ascii charset and collation", + from: "create table t (id int primary key, v varchar(255) charset ascii collate ascii_general_ci) charset utf8mb3 collate utf8_general_ci", + to: "CREATE TABLE `t` (\n\t`id` int,\n\t`v` varchar(255) CHARACTER SET ascii COLLATE ascii_general_ci,\n\tPRIMARY KEY (`id`)\n) CHARSET utf8mb3,\n COLLATE utf8mb3_general_ci", + }, { name: "correct case table options for engine", from: "create table t (id int signed primary key) engine innodb", From e1c2b979b095d5aaef909d8a41584b19c51f48c6 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 22 May 2024 21:13:31 +0300 Subject: [PATCH 2/4] defer restoring original collation Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schemadiff/column.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/go/vt/schemadiff/column.go b/go/vt/schemadiff/column.go index 6a635119079..f3024b7a26e 100644 --- a/go/vt/schemadiff/column.go +++ b/go/vt/schemadiff/column.go @@ -104,11 +104,15 @@ func (c *ColumnDefinitionEntity) ColumnDiff( ) (*ModifyColumnDiff, error) { if c.IsTextual() || other.IsTextual() { // We will now denormalize the columns charset & collate as needed (if empty, populate from table.) + // This column definition if c.columnDefinition.Type.Charset.Name != "" && c.columnDefinition.Type.Options.Collate == "" { collation := env.CollationEnv().DefaultCollationForCharset(c.columnDefinition.Type.Charset.Name) if collation == collations.Unknown { return nil, &UnknownColumnCharsetCollationError{Column: c.columnDefinition.Name.String(), Charset: t1cc.charset} } + defer func() { + c.columnDefinition.Type.Options.Collate = "" + }() c.columnDefinition.Type.Options.Collate = env.CollationEnv().LookupName(collation) } if c.columnDefinition.Type.Charset.Name == "" && c.columnDefinition.Type.Options.Collate != "" { @@ -143,11 +147,15 @@ func (c *ColumnDefinitionEntity) ColumnDiff( c.columnDefinition.Type.Options.Collate = env.CollationEnv().LookupName(collation) } } + // other column definition if other.columnDefinition.Type.Charset.Name != "" && other.columnDefinition.Type.Options.Collate == "" { collation := env.CollationEnv().DefaultCollationForCharset(other.columnDefinition.Type.Charset.Name) if collation == collations.Unknown { - return nil, &UnknownColumnCharsetCollationError{Column: other.columnDefinition.Name.String(), Charset: t1cc.charset} + return nil, &UnknownColumnCharsetCollationError{Column: other.columnDefinition.Name.String(), Charset: t2cc.charset} } + defer func() { + other.columnDefinition.Type.Options.Collate = "" + }() other.columnDefinition.Type.Options.Collate = env.CollationEnv().LookupName(collation) } if other.columnDefinition.Type.Charset.Name == "" && other.columnDefinition.Type.Options.Collate != "" { From 333c7f80043521389c1fec15447490a8f5dcfbd0 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Thu, 23 May 2024 07:55:22 +0300 Subject: [PATCH 3/4] more tests: compare column with just charset vs column with just collation Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schemadiff/table_test.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/go/vt/schemadiff/table_test.go b/go/vt/schemadiff/table_test.go index 1a82d73c722..1168f53f3b6 100644 --- a/go/vt/schemadiff/table_test.go +++ b/go/vt/schemadiff/table_test.go @@ -1911,11 +1911,21 @@ func TestCreateTableDiff(t *testing.T) { from: "create table t (id int primary key, v varchar(64) character set utf8mb3)", to: "create table t (id int primary key, v varchar(64) character set utf8mb3 collate utf8mb3_general_ci)", }, + { + name: "implicit charset and implciit collation", + from: "create table t (id int primary key, v varchar(64) character set utf8mb3)", + to: "create table t (id int primary key, v varchar(64) collate utf8mb3_general_ci)", + }, { name: "ignore identical implicit ascii collation", from: "create table t (id int primary key, v varchar(64) character set ascii collate ascii_general_ci)", to: "create table t (id int primary key, v varchar(64) character set ascii)", }, + { + name: "implicit charset and implciit collation, ascii", + from: "create table t (id int primary key, v varchar(64) collate ascii_general_ci)", + to: "create table t (id int primary key, v varchar(64) character set ascii)", + }, { name: "normalized unsigned attribute", from: "create table t1 (id int primary key)", From 58983d39bb6f8dd228a195593a87de6c30c35ed4 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Fri, 24 May 2024 10:47:41 +0300 Subject: [PATCH 4/4] revised comments Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schemadiff/column.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/go/vt/schemadiff/column.go b/go/vt/schemadiff/column.go index f3024b7a26e..da2145f3ab0 100644 --- a/go/vt/schemadiff/column.go +++ b/go/vt/schemadiff/column.go @@ -104,8 +104,9 @@ func (c *ColumnDefinitionEntity) ColumnDiff( ) (*ModifyColumnDiff, error) { if c.IsTextual() || other.IsTextual() { // We will now denormalize the columns charset & collate as needed (if empty, populate from table.) - // This column definition + // Normalizing _this_ column definition: if c.columnDefinition.Type.Charset.Name != "" && c.columnDefinition.Type.Options.Collate == "" { + // Charset defined without collation. Assign the default collation for that charset. collation := env.CollationEnv().DefaultCollationForCharset(c.columnDefinition.Type.Charset.Name) if collation == collations.Unknown { return nil, &UnknownColumnCharsetCollationError{Column: c.columnDefinition.Name.String(), Charset: t1cc.charset} @@ -128,6 +129,7 @@ func (c *ColumnDefinitionEntity) ColumnDiff( c.columnDefinition.Type.Charset.Name = charset } if c.columnDefinition.Type.Charset.Name == "" { + // Still nothing? Assign the table's charset/collation. defer func() { c.columnDefinition.Type.Charset.Name = "" c.columnDefinition.Type.Options.Collate = "" @@ -147,8 +149,9 @@ func (c *ColumnDefinitionEntity) ColumnDiff( c.columnDefinition.Type.Options.Collate = env.CollationEnv().LookupName(collation) } } - // other column definition + // Normalizing _the other_ column definition: if other.columnDefinition.Type.Charset.Name != "" && other.columnDefinition.Type.Options.Collate == "" { + // Charset defined without collation. Assign the default collation for that charset. collation := env.CollationEnv().DefaultCollationForCharset(other.columnDefinition.Type.Charset.Name) if collation == collations.Unknown { return nil, &UnknownColumnCharsetCollationError{Column: other.columnDefinition.Name.String(), Charset: t2cc.charset} @@ -172,6 +175,7 @@ func (c *ColumnDefinitionEntity) ColumnDiff( } if other.columnDefinition.Type.Charset.Name == "" { + // Still nothing? Assign the table's charset/collation. defer func() { other.columnDefinition.Type.Charset.Name = "" other.columnDefinition.Type.Options.Collate = ""