From c46dc5b6a4329a10589ca928392218d96031ac8d Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Wed, 8 May 2024 12:21:33 +0200 Subject: [PATCH] Merge pull request from GHSA-649x-hxfx-57j2 * collations: Fix OOM and handle padding for multibyte This fixes the OOM issue where a simple query can trigger a denial of service attack. It also ensures we return the right result for these queries by doing the correct padding. Signed-off-by: Dirkjan Bussink * Address review comments Signed-off-by: Dirkjan Bussink --------- Signed-off-by: Dirkjan Bussink --- go/mysql/collations/charset/convert.go | 2 +- go/mysql/collations/charset/helpers.go | 2 +- go/mysql/collations/charset/unicode/utf16.go | 6 +-- go/mysql/collations/charset/unicode/utf32.go | 2 +- go/vt/vtgate/evalengine/compiler_asm.go | 3 +- go/vt/vtgate/evalengine/compiler_test.go | 24 ++++++++++++ go/vt/vtgate/evalengine/expr_collate.go | 39 ++++++++++++++++++-- go/vt/vtgate/evalengine/translate.go | 2 +- 8 files changed, 68 insertions(+), 12 deletions(-) diff --git a/go/mysql/collations/charset/convert.go b/go/mysql/collations/charset/convert.go index 3904329654c..526bb31c5d9 100644 --- a/go/mysql/collations/charset/convert.go +++ b/go/mysql/collations/charset/convert.go @@ -70,7 +70,7 @@ func convertSlow(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) for len(src) > 0 { cp, width := srcCharset.DecodeRune(src) - if cp == utf8.RuneError && width < 3 { + if cp == utf8.RuneError { failed++ cp = '?' } diff --git a/go/mysql/collations/charset/helpers.go b/go/mysql/collations/charset/helpers.go index 851ce4bebf9..b66a6c77b87 100644 --- a/go/mysql/collations/charset/helpers.go +++ b/go/mysql/collations/charset/helpers.go @@ -41,7 +41,7 @@ func Validate(charset Charset, input []byte) bool { } for len(input) > 0 { r, size := charset.DecodeRune(input) - if r == RuneError && size < 2 { + if r == RuneError { return false } input = input[size:] diff --git a/go/mysql/collations/charset/unicode/utf16.go b/go/mysql/collations/charset/unicode/utf16.go index eb055db7382..924c12be7b7 100644 --- a/go/mysql/collations/charset/unicode/utf16.go +++ b/go/mysql/collations/charset/unicode/utf16.go @@ -67,7 +67,7 @@ func (Charset_utf16be) EncodeRune(dst []byte, r rune) int { func (Charset_utf16be) DecodeRune(b []byte) (rune, int) { if len(b) < 2 { - return utf8.RuneError, 0 + return utf8.RuneError, len(b) } r1 := uint16(b[1]) | uint16(b[0])<<8 @@ -129,7 +129,7 @@ func (Charset_utf16le) EncodeRune(dst []byte, r rune) int { func (Charset_utf16le) DecodeRune(b []byte) (rune, int) { if len(b) < 2 { - return utf8.RuneError, 0 + return utf8.RuneError, len(b) } r1 := uint16(b[0]) | uint16(b[1])<<8 @@ -185,7 +185,7 @@ func (Charset_ucs2) EncodeRune(dst []byte, r rune) int { func (Charset_ucs2) DecodeRune(p []byte) (rune, int) { if len(p) < 2 { - return utf8.RuneError, 0 + return utf8.RuneError, len(p) } return rune(p[0])<<8 | rune(p[1]), 2 } diff --git a/go/mysql/collations/charset/unicode/utf32.go b/go/mysql/collations/charset/unicode/utf32.go index 97095bb7f98..6053d7d10f8 100644 --- a/go/mysql/collations/charset/unicode/utf32.go +++ b/go/mysql/collations/charset/unicode/utf32.go @@ -49,7 +49,7 @@ func (Charset_utf32) EncodeRune(dst []byte, r rune) int { func (Charset_utf32) DecodeRune(p []byte) (rune, int) { if len(p) < 4 { - return utf8.RuneError, 0 + return utf8.RuneError, len(p) } return (rune(p[0]) << 24) | (rune(p[1]) << 16) | (rune(p[2]) << 8) | rune(p[3]), 4 } diff --git a/go/vt/vtgate/evalengine/compiler_asm.go b/go/vt/vtgate/evalengine/compiler_asm.go index c62c31f6782..78e535bfcf6 100644 --- a/go/vt/vtgate/evalengine/compiler_asm.go +++ b/go/vt/vtgate/evalengine/compiler_asm.go @@ -4253,7 +4253,8 @@ func (asm *assembler) Fn_UUID_TO_BIN1() { func (asm *assembler) Introduce(offset int, t sqltypes.Type, col collations.TypedCollation) { asm.emit(func(env *ExpressionEnv) int { - arg := evalToBinary(env.vm.stack[env.vm.sp-offset]) + var arg *evalBytes + arg, env.vm.err = introducerCast(env.vm.stack[env.vm.sp-offset], col.Collation) arg.tt = int16(t) arg.col = col env.vm.stack[env.vm.sp-offset] = arg diff --git a/go/vt/vtgate/evalengine/compiler_test.go b/go/vt/vtgate/evalengine/compiler_test.go index b38b5f14557..5cf335c14e2 100644 --- a/go/vt/vtgate/evalengine/compiler_test.go +++ b/go/vt/vtgate/evalengine/compiler_test.go @@ -492,6 +492,30 @@ func TestCompilerSingle(t *testing.T) { expression: `week('2024-12-31', 5)`, result: `INT64(53)`, }, + { + expression: `convert(0xFF using utf16)`, + result: `VARCHAR("ÿ")`, + }, + { + expression: `_utf16 0xFF`, + result: `VARCHAR("ÿ")`, + }, + { + expression: `convert(0xFF using utf32)`, + result: `NULL`, + }, + { + expression: `cast(_utf32 0xFF as binary)`, + result: `VARBINARY("\x00\x00\x00\xff")`, + }, + { + expression: `cast(_utf32 0x00FF as binary)`, + result: `VARBINARY("\x00\x00\x00\xff")`, + }, + { + expression: `cast(_utf32 0x0000FF as binary)`, + result: `VARBINARY("\x00\x00\x00\xff")`, + }, } for _, tc := range testCases { diff --git a/go/vt/vtgate/evalengine/expr_collate.go b/go/vt/vtgate/evalengine/expr_collate.go index c03a61b2178..2d1f04d1243 100644 --- a/go/vt/vtgate/evalengine/expr_collate.go +++ b/go/vt/vtgate/evalengine/expr_collate.go @@ -18,6 +18,7 @@ package evalengine import ( "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/mysql/collations/charset" "vitess.io/vitess/go/sqltypes" querypb "vitess.io/vitess/go/vt/proto/query" vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" @@ -210,15 +211,45 @@ func (ca *collationAggregation) result() collations.TypedCollation { var _ Expr = (*IntroducerExpr)(nil) +func introducerCast(e eval, col collations.ID) (*evalBytes, error) { + if col == collations.CollationBinaryID { + return evalToBinary(e), nil + } + + var bytes []byte + if b, ok := e.(*evalBytes); !ok { + bytes = b.ToRawBytes() + } else { + cs := col.Get().Charset() + bytes = b.bytes + // We only need to pad here for encodings that have a minimum + // character byte width larger than 1, which is all UTF-16 + // variations and UTF-32. + switch cs.(type) { + case charset.Charset_utf16, charset.Charset_utf16le, charset.Charset_ucs2: + if len(bytes)%2 != 0 { + bytes = append([]byte{0}, bytes...) + } + case charset.Charset_utf32: + if mod := len(bytes) % 4; mod != 0 { + bytes = append(make([]byte, 4-mod), bytes...) + } + } + } + typedcol := collations.TypedCollation{ + Collation: col, + Coercibility: collations.CoerceCoercible, + Repertoire: collations.RepertoireASCII, + } + return newEvalText(bytes, typedcol), nil +} + func (expr *IntroducerExpr) eval(env *ExpressionEnv) (eval, error) { e, err := expr.Inner.eval(env) if err != nil { return nil, err } - if expr.TypedCollation.Collation == collations.CollationBinaryID { - return evalToBinary(e), nil - } - return evalToVarchar(e, expr.TypedCollation.Collation, false) + return introducerCast(e, expr.TypedCollation.Collation) } func (expr *IntroducerExpr) typeof(env *ExpressionEnv, fields []*querypb.Field) (sqltypes.Type, typeFlag) { diff --git a/go/vt/vtgate/evalengine/translate.go b/go/vt/vtgate/evalengine/translate.go index 6b14d8d350c..8dedeb8bfb4 100644 --- a/go/vt/vtgate/evalengine/translate.go +++ b/go/vt/vtgate/evalengine/translate.go @@ -361,7 +361,7 @@ func (ast *astCompiler) translateIntroducerExpr(introduced *sqlparser.Introducer case collations.CollationBinaryID: lit.inner = evalToBinary(lit.inner) default: - lit.inner, err = evalToVarchar(lit.inner, collation, false) + lit.inner, err = introducerCast(lit.inner, collation) if err != nil { return nil, err }