Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v18 backport: schemadiff: fix diffing of textual columns with implicit charsets #15873

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 73 additions & 3 deletions go/vt/schemadiff/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package schemadiff
import (
"strings"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/vt/sqlparser"
)

Expand Down Expand Up @@ -81,12 +82,81 @@ func NewColumnDefinitionEntity(c *sqlparser.ColumnDefinition) *ColumnDefinitionE
// change this table to look like the other table.
// It returns an AlterTable statement if changes are found, or nil if not.
// the other table may be of different name; its name is ignored.
func (c *ColumnDefinitionEntity) ColumnDiff(other *ColumnDefinitionEntity, _ *DiffHints) *ModifyColumnDiff {
func (c *ColumnDefinitionEntity) ColumnDiff(
other *ColumnDefinitionEntity,
_ *DiffHints,
t1cc *charsetCollate,
t2cc *charsetCollate,
) (*ModifyColumnDiff, error) {
if c.IsTextual() || other.IsTextual() {
// We will now denormalize the columns charset & collate as needed (if empty, populate from table.)

if c.columnDefinition.Type.Charset.Name == "" && c.columnDefinition.Type.Options.Collate != "" {
// Column has explicit collation but no charset. We can infer the charset from the collation.
collationID := collationEnv.LookupByName(c.columnDefinition.Type.Options.Collate)
charset := collationEnv.LookupCharsetName(collationID)
if charset == "" {
return nil, &UnknownColumnCollationCharsetError{Column: c.columnDefinition.Name.String(), Collation: c.columnDefinition.Type.Options.Collate}
}
defer func() {
c.columnDefinition.Type.Charset.Name = ""
}()
c.columnDefinition.Type.Charset.Name = charset
}
if c.columnDefinition.Type.Charset.Name == "" {
defer func() {
c.columnDefinition.Type.Charset.Name = ""
c.columnDefinition.Type.Options.Collate = ""
}()
c.columnDefinition.Type.Charset.Name = t1cc.charset
if c.columnDefinition.Type.Options.Collate == "" {
defer func() {
c.columnDefinition.Type.Options.Collate = ""
}()
c.columnDefinition.Type.Options.Collate = t1cc.collate
}
if c.columnDefinition.Type.Options.Collate = t1cc.collate; c.columnDefinition.Type.Options.Collate == "" {
collation := collationEnv.DefaultCollationForCharset(t1cc.charset)
if collation == collations.Unknown {
return nil, &UnknownColumnCharsetCollationError{Column: c.columnDefinition.Name.String(), Charset: t1cc.charset}
}
c.columnDefinition.Type.Options.Collate = collationEnv.LookupName(collation)
}
}
if other.columnDefinition.Type.Charset.Name == "" && other.columnDefinition.Type.Options.Collate != "" {
// Column has explicit collation but no charset. We can infer the charset from the collation.
collationID := collationEnv.LookupByName(other.columnDefinition.Type.Options.Collate)
charset := collationEnv.LookupCharsetName(collationID)
if charset == "" {
return nil, &UnknownColumnCollationCharsetError{Column: other.columnDefinition.Name.String(), Collation: other.columnDefinition.Type.Options.Collate}
}
defer func() {
other.columnDefinition.Type.Charset.Name = ""
}()
other.columnDefinition.Type.Charset.Name = charset
}

if other.columnDefinition.Type.Charset.Name == "" {
defer func() {
other.columnDefinition.Type.Charset.Name = ""
other.columnDefinition.Type.Options.Collate = ""
}()
other.columnDefinition.Type.Charset.Name = t2cc.charset
if other.columnDefinition.Type.Options.Collate = t2cc.collate; other.columnDefinition.Type.Options.Collate == "" {
collation := collationEnv.DefaultCollationForCharset(t2cc.charset)
if collation == collations.Unknown {
return nil, &UnknownColumnCharsetCollationError{Column: other.columnDefinition.Name.String(), Charset: t2cc.charset}
}
other.columnDefinition.Type.Options.Collate = collationEnv.LookupName(collation)
}
}
}

if sqlparser.Equals.RefOfColumnDefinition(c.columnDefinition, other.columnDefinition) {
return nil
return nil, nil
}

return NewModifyColumnDiffByDefinition(other.columnDefinition)
return NewModifyColumnDiffByDefinition(other.columnDefinition), nil
}

// IsTextual returns true when this column is of textual type, and is capable of having a character set property
Expand Down
18 changes: 18 additions & 0 deletions go/vt/schemadiff/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,3 +402,21 @@ type EntityNotFoundError struct {
func (e *EntityNotFoundError) Error() string {
return fmt.Sprintf("entity %s not found", sqlescape.EscapeID(e.Name))
}

type UnknownColumnCharsetCollationError struct {
Column string
Charset string
}

func (e *UnknownColumnCharsetCollationError) Error() string {
return fmt.Sprintf("unable to determine collation for column %s with charset %q", sqlescape.EscapeID(e.Column), e.Charset)
}

type UnknownColumnCollationCharsetError struct {
Column string
Collation string
}

func (e *UnknownColumnCollationCharsetError) Error() string {
return fmt.Sprintf("unable to determine charset for column %s with collation %q", sqlescape.EscapeID(e.Column), e.Collation)
}
10 changes: 10 additions & 0 deletions go/vt/schemadiff/schema_diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,16 @@ func TestSchemaDiff(t *testing.T) {
expectDeps: 0,
entityOrder: []string{"t1"},
},
{
name: "two identical tables, one with explicit charset, one without",
fromQueries: []string{
"create table t (id int primary key, foo varchar(64) character set utf8mb3 collate utf8mb3_bin)",
},
toQueries: []string{
"create table t (id int primary key, foo varchar(64) collate utf8mb3_bin)",
},
entityOrder: []string{},
},
}
hints := &DiffHints{RangeRotationStrategy: RangeRotationDistinctStatements}
for _, tc := range tt {
Expand Down
38 changes: 35 additions & 3 deletions go/vt/schemadiff/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ import (
"vitess.io/vitess/go/vt/sqlparser"
)

type charsetCollate struct {
charset string
collate string
}

type AlterTableEntityDiff struct {
from *CreateTableEntity
to *CreateTableEntity
Expand Down Expand Up @@ -388,6 +393,22 @@ func (c *CreateTableEntity) Clone() Entity {
return &CreateTableEntity{CreateTable: sqlparser.CloneRefOfCreateTable(c.CreateTable)}
}

func getTableCharsetCollate(tableOptions *sqlparser.TableOptions) *charsetCollate {
cc := &charsetCollate{
charset: collationEnv.LookupCharsetName(collations.ID(collationEnv.DefaultConnectionCharset())),
collate: collationEnv.LookupName(collations.ID(collationEnv.DefaultConnectionCharset())),
}
for _, option := range *tableOptions {
if strings.EqualFold(option.Name, "charset") {
cc.charset = option.String
}
if strings.EqualFold(option.Name, "collate") {
cc.collate = option.String
}
}
return cc
}

// Right now we assume MySQL 8.0 for the collation normalization handling.
const mysqlCollationVersion = "8.0.0"

Expand Down Expand Up @@ -804,6 +825,9 @@ func (c *CreateTableEntity) TableDiff(other *CreateTableEntity, hints *DiffHints
alterTable.Table.Qualifier = other.Table.Qualifier
}

t1cc := getTableCharsetCollate(&c.CreateTable.TableSpec.Options)
t2cc := getTableCharsetCollate(&other.CreateTable.TableSpec.Options)

diffedTableCharset := ""
var parentAlterTableEntityDiff *AlterTableEntityDiff
var partitionSpecs []*sqlparser.PartitionSpec
Expand All @@ -818,7 +842,9 @@ func (c *CreateTableEntity) TableDiff(other *CreateTableEntity, hints *DiffHints
// ordered columns for both tables:
t1Columns := c.CreateTable.TableSpec.Columns
t2Columns := other.CreateTable.TableSpec.Columns
c.diffColumns(alterTable, t1Columns, t2Columns, hints, diffedTableCharset != "")
if err := c.diffColumns(alterTable, t1Columns, t2Columns, hints, t1cc, t2cc, diffedTableCharset != ""); err != nil {
return nil, err
}
}
{
// diff keys
Expand Down Expand Up @@ -1514,8 +1540,10 @@ func (c *CreateTableEntity) diffColumns(alterTable *sqlparser.AlterTable,
t1Columns []*sqlparser.ColumnDefinition,
t2Columns []*sqlparser.ColumnDefinition,
hints *DiffHints,
t1cc *charsetCollate,
t2cc *charsetCollate,
tableCharsetChanged bool,
) {
) error {
getColumnsMap := func(cols []*sqlparser.ColumnDefinition) map[string]*columnDetails {
var prevCol *columnDetails
m := map[string]*columnDetails{}
Expand Down Expand Up @@ -1577,7 +1605,10 @@ func (c *CreateTableEntity) diffColumns(alterTable *sqlparser.AlterTable,
t2ColEntity := NewColumnDefinitionEntity(t2Col)

// check diff between before/after columns:
modifyColumnDiff := t1ColEntity.ColumnDiff(t2ColEntity, hints)
modifyColumnDiff, err := t1ColEntity.ColumnDiff(t2ColEntity, hints, t1cc, t2cc)
if err != nil {
return err
}
if modifyColumnDiff == nil {
// even if there's no apparent change, there can still be implicit changes
// it is possible that the table charset is changed. the column may be some col1 TEXT NOT NULL, possibly in both varsions 1 and 2,
Expand Down Expand Up @@ -1644,6 +1675,7 @@ func (c *CreateTableEntity) diffColumns(alterTable *sqlparser.AlterTable,
for _, c := range addColumns {
alterTable.AlterOptions = append(alterTable.AlterOptions, c)
}
return nil
}

func heuristicallyDetectColumnRenames(
Expand Down
14 changes: 14 additions & 0 deletions go/vt/schemadiff/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1136,6 +1136,20 @@ func TestCreateTableDiff(t *testing.T) {
diff: "alter table t modify column t1 varchar(128) not null, modify column t2 varchar(128) not null, modify column t3 tinytext, charset utf8mb4",
cdiff: "ALTER TABLE `t` MODIFY COLUMN `t1` varchar(128) NOT NULL, MODIFY COLUMN `t2` varchar(128) NOT NULL, MODIFY COLUMN `t3` tinytext, CHARSET utf8mb4",
},
{
name: "ignore identical implicit charset",
from: "create table t (id int primary key, v varchar(64) character set utf8mb3 collate utf8mb3_bin)",
to: "create table t (id int primary key, v varchar(64) collate utf8mb3_bin)",
},
{
name: "ignore identical implicit charset",
from: "create table t (id int primary key, v varchar(64) character set utf8mb3 collate utf8mb3_bin)",
to: "create table t (id int primary key, v varchar(64) collate utf8mb3_bin)",
},
{
from: "create table t (id int primary key, v varchar(64) character set utf8 collate utf8_bin)",
to: "create table t (id int primary key, v varchar(64) character set utf8mb3 collate utf8mb3_bin)",
},
{
name: "normalized unsigned attribute",
from: "create table t1 (id int primary key)",
Expand Down
Loading