diff --git a/go/mysql/collations/charset/convert.go b/go/mysql/collations/charset/convert.go
index 1c0ced27e4e..6054ae33559 100644
--- a/go/mysql/collations/charset/convert.go
+++ b/go/mysql/collations/charset/convert.go
@@ -19,6 +19,8 @@ package charset
 import (
 	"fmt"
 	"unicode/utf8"
+
+	"vitess.io/vitess/go/hack"
 )
 
 func failedConversionError(from, to Charset, input []byte) error {
@@ -126,6 +128,78 @@ func Convert(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) ([]
 	}
 }
 
+func Expand(dst []rune, src []byte, srcCharset Charset) []rune {
+	switch srcCharset := srcCharset.(type) {
+	case Charset_utf8mb3, Charset_utf8mb4:
+		if dst == nil {
+			return []rune(string(src))
+		}
+		dst = make([]rune, 0, len(src))
+		for _, cp := range string(src) {
+			dst = append(dst, cp)
+		}
+		return dst
+	case Charset_binary:
+		if dst == nil {
+			dst = make([]rune, 0, len(src))
+		}
+		for _, c := range src {
+			dst = append(dst, rune(c))
+		}
+		return dst
+	default:
+		if dst == nil {
+			dst = make([]rune, 0, len(src))
+		}
+		for len(src) > 0 {
+			cp, width := srcCharset.DecodeRune(src)
+			src = src[width:]
+			dst = append(dst, cp)
+		}
+		return dst
+	}
+}
+
+func Collapse(dst []byte, src []rune, dstCharset Charset) []byte {
+	switch dstCharset := dstCharset.(type) {
+	case Charset_utf8mb3, Charset_utf8mb4:
+		if dst == nil {
+			return hack.StringBytes(string(src))
+		}
+		return append(dst, hack.StringBytes(string(src))...)
+	case Charset_binary:
+		if dst == nil {
+			dst = make([]byte, 0, len(src))
+		}
+		for _, b := range src {
+			dst = append(dst, byte(b))
+		}
+		return dst
+	default:
+		nDst := 0
+		if dst == nil {
+			dst = make([]byte, len(src)*dstCharset.MaxWidth())
+		} else {
+			dst = dst[:cap(dst)]
+		}
+		for _, c := range src {
+			if len(dst)-nDst < 4 {
+				newDst := make([]byte, len(dst)*2)
+				copy(newDst, dst[:nDst])
+				dst = newDst
+			}
+			w := dstCharset.EncodeRune(dst[nDst:], c)
+			if w < 0 {
+				if w = dstCharset.EncodeRune(dst[nDst:], '?'); w < 0 {
+					break
+				}
+			}
+			nDst += w
+		}
+		return dst[:nDst]
+	}
+}
+
 func ConvertFromUTF8(dst []byte, dstCharset Charset, src []byte) ([]byte, error) {
 	return Convert(dst, dstCharset, src, Charset_utf8mb4{})
 }
diff --git a/go/mysql/collations/env.go b/go/mysql/collations/env.go
index 52a255b6f41..0c063e140d5 100644
--- a/go/mysql/collations/env.go
+++ b/go/mysql/collations/env.go
@@ -194,10 +194,11 @@ func makeEnv(version collver) *Environment {
 // A few interesting character set values.
 // See http://dev.mysql.com/doc/internals/en/character-set.html#packet-Protocol::CharacterSet
 const (
-	CollationUtf8ID       = 33
-	CollationUtf8mb4ID    = 255
-	CollationBinaryID     = 63
-	CollationUtf8mb4BinID = 46
+	CollationUtf8ID        = 33
+	CollationUtf8mb4ID     = 255
+	CollationBinaryID      = 63
+	CollationUtf8mb4BinID  = 46
+	CollationLatin1Swedish = 8
 )
 
 // Binary is the default Binary collation
diff --git a/go/mysql/constants.go b/go/mysql/constants.go
index b2c9b4d49a5..bedc9871426 100644
--- a/go/mysql/constants.go
+++ b/go/mysql/constants.go
@@ -565,6 +565,31 @@ const (
 	ERJSONValueTooBig              = ErrorCode(3150)
 	ERJSONDocumentTooDeep          = ErrorCode(3157)
 
+	ERRegexpStringNotTerminated = ErrorCode(3684)
+	ERRegexpBufferOverflow      = ErrorCode(3684)
+	ERRegexpIllegalArgument     = ErrorCode(3685)
+	ERRegexpIndexOutOfBounds    = ErrorCode(3686)
+	ERRegexpInternal            = ErrorCode(3687)
+	ERRegexpRuleSyntax          = ErrorCode(3688)
+	ERRegexpBadEscapeSequence   = ErrorCode(3689)
+	ERRegexpUnimplemented       = ErrorCode(3690)
+	ERRegexpMismatchParen       = ErrorCode(3691)
+	ERRegexpBadInterval         = ErrorCode(3692)
+	ERRRegexpMaxLtMin           = ErrorCode(3693)
+	ERRegexpInvalidBackRef      = ErrorCode(3694)
+	ERRegexpLookBehindLimit     = ErrorCode(3695)
+	ERRegexpMissingCloseBracket = ErrorCode(3696)
+	ERRegexpInvalidRange        = ErrorCode(3697)
+	ERRegexpStackOverflow       = ErrorCode(3698)
+	ERRegexpTimeOut             = ErrorCode(3699)
+	ERRegexpPatternTooBig       = ErrorCode(3700)
+	ERRegexpInvalidCaptureGroup = ErrorCode(3887)
+	ERRegexpInvalidFlag         = ErrorCode(3900)
+
+	ERCharacterSetMismatch = ErrorCode(3995)
+
+	ERWrongParametersToNativeFct = ErrorCode(1583)
+
 	// max execution time exceeded
 	ERQueryTimeout = ErrorCode(3024)
 
diff --git a/go/mysql/icuregex/compiler.go b/go/mysql/icuregex/compiler.go
new file mode 100644
index 00000000000..eba297d0f21
--- /dev/null
+++ b/go/mysql/icuregex/compiler.go
@@ -0,0 +1,3647 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex
+
+import (
+	"math"
+	"strings"
+	"unicode/utf8"
+
+	"golang.org/x/exp/slices"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/pattern"
+	"vitess.io/vitess/go/mysql/icuregex/internal/ucase"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uchar"
+	"vitess.io/vitess/go/mysql/icuregex/internal/unames"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uprops"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uset"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utf16"
+)
+
+const BreakIteration = false
+const stackSize = 100
+
+type reChar struct {
+	char   rune
+	quoted bool
+}
+
+const (
+	parenPlain        = -1
+	parenCapturing    = -2
+	parenAtomic       = -3
+	parenLookahead    = -4
+	parenNegLookahead = -5
+	parenFlags        = -6
+	parenLookBehind   = -7
+	parenLookBehindN  = -8
+)
+
+type setOperation uint32
+
+const (
+	setStart         setOperation = 0<<16 | 1
+	setEnd           setOperation = 1<<16 | 2
+	setNegation      setOperation = 2<<16 | 3
+	setCaseClose     setOperation = 2<<16 | 9
+	setDifference2   setOperation = 3<<16 | 4 // '--' set difference operator
+	setIntersection2 setOperation = 3<<16 | 5 // '&&' set intersection operator
+	setUnion         setOperation = 4<<16 | 6 // implicit union of adjacent items
+	setDifference1   setOperation = 4<<16 | 7 // '-', single dash difference op, for compatibility with old UnicodeSet.
+	setIntersection1 setOperation = 4<<16 | 8 // '&', single amp intersection op, for compatibility with old UnicodeSet.
+)
+
+type compiler struct {
+	err error
+	out *Pattern
+	p   []rune
+
+	scanIndex        int
+	quoteMode        bool
+	inBackslashQuote bool
+	eolComments      bool
+
+	lineNum  int
+	charNum  int
+	lastChar rune
+	peekChar rune
+
+	c        reChar
+	stack    [stackSize]uint16
+	stackPtr int
+
+	modeFlags    RegexpFlag
+	newModeFlags RegexpFlag
+	setModeFlag  bool
+
+	literalChars []rune
+
+	parenStack      []int
+	matchOpenParen  int
+	matchCloseParen int
+
+	intervalLow   int
+	intervalUpper int
+
+	setStack   []*uset.UnicodeSet
+	setOpStack []setOperation
+
+	lastSetLiteral rune
+	captureName    *strings.Builder
+}
+
+func newCompiler(pat *Pattern) *compiler {
+	return &compiler{
+		out:             pat,
+		scanIndex:       0,
+		eolComments:     true,
+		lineNum:         1,
+		charNum:         0,
+		lastChar:        -1,
+		peekChar:        -1,
+		modeFlags:       RegexpFlag(uint32(pat.flags) | 0x80000000),
+		matchOpenParen:  -1,
+		matchCloseParen: -1,
+		lastSetLiteral:  -1,
+	}
+}
+
+func (c *compiler) nextCharLL() (ch rune) {
+	if c.peekChar != -1 {
+		ch, c.peekChar = c.peekChar, -1
+		return
+	}
+	if len(c.p) == 0 {
+		return -1
+	}
+
+	ch = c.p[0]
+	c.p = c.p[1:]
+	if ch == utf8.RuneError {
+		return -1
+	}
+
+	if ch == chCR || ch == chNEL || ch == chLS || (ch == chLF && c.lastChar != chCR) {
+		c.lineNum++
+		c.charNum = 0
+	} else {
+		if ch != chLF {
+			c.charNum++
+		}
+	}
+	c.lastChar = ch
+	return
+}
+
+func (c *compiler) peekCharLL() rune {
+	if c.peekChar == -1 {
+		c.peekChar = c.nextCharLL()
+	}
+	return c.peekChar
+}
+
+func (c *compiler) nextChar(ch *reChar) {
+	c.scanIndex++
+	ch.char = c.nextCharLL()
+	ch.quoted = false
+
+	if c.quoteMode {
+		ch.quoted = true
+		if (ch.char == chBackSlash && c.peekCharLL() == chE && ((c.modeFlags & Literal) == 0)) ||
+			ch.char == -1 {
+			c.quoteMode = false //  Exit quote mode,
+			c.nextCharLL()      // discard the E
+			c.nextChar(ch)
+			return
+		}
+	} else if c.inBackslashQuote {
+		// The current character immediately follows a '\'
+		// Don't check for any further escapes, just return it as-is.
+		// Don't set c.fQuoted, because that would prevent the state machine from
+		//    dispatching on the character.
+		c.inBackslashQuote = false
+	} else {
+		// We are not in a \Q quoted region \E of the source.
+		//
+		if (c.modeFlags & Comments) != 0 {
+			//
+			// We are in free-spacing and comments mode.
+			//  Scan through any white space and comments, until we
+			//  reach a significant character or the end of inut.
+			for {
+				if ch.char == -1 {
+					break // End of Input
+				}
+				if ch.char == chPound && c.eolComments {
+					// Start of a comment.  Consume the rest of it, until EOF or a new line
+					for {
+						ch.char = c.nextCharLL()
+						if ch.char == -1 || // EOF
+							ch.char == chCR ||
+							ch.char == chLF ||
+							ch.char == chNEL ||
+							ch.char == chLS {
+							break
+						}
+					}
+				}
+				// TODO:  check what Java & Perl do with non-ASCII white spaces.  Ticket 6061.
+				if !pattern.IsWhitespace(ch.char) {
+					break
+				}
+				ch.char = c.nextCharLL()
+			}
+		}
+
+		//
+		//  check for backslash escaped characters.
+		//
+		if ch.char == chBackSlash {
+			beforeEscape := c.p
+			if staticSetUnescape.ContainsRune(c.peekCharLL()) {
+				//
+				// A '\' sequence that is handled by ICU's standard unescapeAt function.
+				//   Includes \uxxxx, \n, \r, many others.
+				//   Return the single equivalent character.
+				//
+				c.nextCharLL() // get & discard the peeked char.
+				ch.quoted = true
+
+				ch.char, c.p = pattern.UnescapeAtRunes(beforeEscape)
+				if ch.char < 0 {
+					c.error(BadEscapeSequence)
+				}
+				c.charNum += len(beforeEscape) - len(c.p)
+			} else if c.peekCharLL() == chDigit0 {
+				//  Octal Escape, using Java Regexp Conventions
+				//    which are \0 followed by 1-3 octal digits.
+				//    Different from ICU Unescape handling of Octal, which does not
+				//    require the leading 0.
+				//  Java also has the convention of only consuming 2 octal digits if
+				//    the three digit number would be > 0xff
+				//
+				ch.char = 0
+				c.nextCharLL() // Consume the initial 0.
+				for index := 0; index < 3; index++ {
+					ch2 := c.peekCharLL()
+					if ch2 < chDigit0 || ch2 > chDigit7 {
+						if index == 0 {
+							// \0 is not followed by any octal digits.
+							c.error(BadEscapeSequence)
+						}
+						break
+					}
+					ch.char <<= 3
+					ch.char += ch2 & 7
+					if ch.char <= 255 {
+						c.nextCharLL()
+					} else {
+						// The last digit made the number too big.  Forget we saw it.
+						ch.char >>= 3
+					}
+				}
+				ch.quoted = true
+			} else if c.peekCharLL() == chQ {
+				//  "\Q"  enter quote mode, which will continue until "\E"
+				c.quoteMode = true
+				c.nextCharLL() // discard the 'Q'.
+				c.nextChar(ch) // recurse to get the real next char.
+				return
+			} else {
+				// We are in a '\' escape that will be handled by the state table scanner.
+				// Just return the backslash, but remember that the following char is to
+				//  be taken literally.
+				c.inBackslashQuote = true
+			}
+		}
+	}
+
+	// re-enable # to end-of-line comments, in case they were disabled.
+	// They are disabled by the parser upon seeing '(?', but this lasts for
+	//  the fetching of the next character only.
+	c.eolComments = true
+}
+
+const (
+	chCR        = 0x0d // New lines, for terminating comments.
+	chLF        = 0x0a // Line Feed
+	chPound     = 0x23 // '#', introduces a comment.
+	chDigit0    = 0x30 // '0'
+	chDigit7    = 0x37 // '9'
+	chColon     = 0x3A // ':'
+	chE         = 0x45 // 'E'
+	chQ         = 0x51 // 'Q'
+	chN         = 0x4E // 'N'
+	chP         = 0x50 // 'P'
+	chBackSlash = 0x5c // '\'  introduces a char escape
+	chLBracket  = 0x5b // '['
+	chRBracket  = 0x5d // ']'
+	chUp        = 0x5e // '^'
+	chLowerP    = 0x70
+	chLBrace    = 0x7b   // '{'
+	chRBrace    = 0x7d   // '}'
+	chNEL       = 0x85   //    NEL newline variant
+	chLS        = 0x2028 //    Unicode Line Separator
+	chAmp       = 0x26   // '&'
+	chDash      = 0x2d   // '-'
+)
+
+func (c *compiler) compile(pat []rune) error {
+	if c.err != nil {
+		return c.err
+	}
+	if c.out.pattern != "" {
+		panic("cannot reuse pattern")
+	}
+
+	c.out.pattern = string(pat)
+	c.p = pat
+
+	var state uint16 = 1
+	var table []regexTableEl
+
+	// UREGEX_LITERAL force entire pattern to be treated as a literal string.
+	if c.modeFlags&Literal != 0 {
+		c.quoteMode = true
+	}
+
+	c.nextChar(&c.c)
+
+	// Main loop for the regex pattern parsing state machine.
+	//   Runs once per state transition.
+	//   Each time through optionally performs, depending on the state table,
+	//      - an advance to the the next pattern char
+	//      - an action to be performed.
+	//      - pushing or popping a state to/from the local state return stack.
+	//   file regexcst.txt is the source for the state table.  The logic behind
+	//     recongizing the pattern syntax is there, not here.
+	for {
+		if c.err != nil {
+			break
+		}
+
+		if state == 0 {
+			panic("bad state?")
+		}
+
+		table = parseStateTable[state:]
+		for len(table) > 0 {
+			if table[0].charClass < 127 && !c.c.quoted && rune(table[0].charClass) == c.c.char {
+				break
+			}
+			if table[0].charClass == 255 {
+				break
+			}
+			if table[0].charClass == 254 && c.c.quoted {
+				break
+			}
+			if table[0].charClass == 253 && c.c.char == -1 {
+				break
+			}
+			if table[0].charClass >= 128 && table[0].charClass < 240 && !c.c.quoted && c.c.char != -1 {
+				if staticRuleSet[table[0].charClass-128].ContainsRune(c.c.char) {
+					break
+				}
+			}
+
+			table = table[1:]
+		}
+
+		if !c.doParseActions(table[0].action) {
+			break
+		}
+
+		if table[0].pushState != 0 {
+			c.stackPtr++
+			if c.stackPtr >= stackSize {
+				c.error(InternalError)
+				c.stackPtr--
+			}
+			c.stack[c.stackPtr] = uint16(table[0].pushState)
+		}
+
+		if table[0].nextChar {
+			c.nextChar(&c.c)
+		}
+
+		if table[0].nextState != 255 {
+			state = uint16(table[0].nextState)
+		} else {
+			state = c.stack[c.stackPtr]
+			c.stackPtr--
+			if c.stackPtr < 0 {
+				c.stackPtr++
+				c.error(MismatchedParen)
+			}
+		}
+	}
+
+	if c.err != nil {
+		return c.err
+	}
+
+	c.allocateStackData(restackframeHdrCount)
+	c.stripNOPs()
+
+	c.out.minMatchLen = c.minMatchLength(3, len(c.out.compiledPat)-1)
+
+	c.matchStartType()
+	return c.err
+}
+
+func (c *compiler) doParseActions(action patternParseAction) bool {
+	switch action {
+	case doPatStart:
+		// Start of pattern compiles to:
+		//0   SAVE   2        Fall back to position of FAIL
+		//1   jmp    3
+		//2   FAIL            Stop if we ever reach here.
+		//3   NOP             Dummy, so start of pattern looks the same as
+		//                    the start of an ( grouping.
+		//4   NOP             Resreved, will be replaced by a save if there are
+		//                    OR | operators at the top level
+		c.appendOp(urxStateSave, 2)
+		c.appendOp(urxJmp, 3)
+		c.appendOp(urxFail, 0)
+
+		// Standard open nonCapture paren action emits the two NOPs and
+		//   sets up the paren stack frame.
+		c.doParseActions(doOpenNonCaptureParen)
+
+	case doPatFinish:
+		// We've scanned to the end of the pattern
+		//  The end of pattern compiles to:
+		//        URX_END
+		//    which will stop the runtime match engine.
+		//  Encountering end of pattern also behaves like a close paren,
+		//   and forces fixups of the State Save at the beginning of the compiled pattern
+		//   and of any OR operations at the top level.
+		//
+		c.handleCloseParen()
+		if len(c.parenStack) > 0 {
+			// Missing close paren in pattern.
+			c.error(MismatchedParen)
+		}
+
+		// add the END operation to the compiled pattern.
+		c.appendOp(urxEnd, 0)
+
+		// Terminate the pattern compilation state machine.
+		return false
+
+	case doOrOperator:
+		// Scanning a '|', as in (A|B)
+		// Generate code for any pending literals preceding the '|'
+		c.fixLiterals(false)
+
+		// Insert a SAVE operation at the start of the pattern section preceding
+		//   this OR at this level.  This SAVE will branch the match forward
+		//   to the right hand side of the OR in the event that the left hand
+		//   side fails to match and backtracks.  Locate the position for the
+		//   save from the location on the top of the parentheses stack.
+		var savePosition int
+		savePosition, c.parenStack = stackPop(c.parenStack)
+		op := c.out.compiledPat[savePosition]
+
+		if op.typ() != urxNop {
+			panic("expected a NOP placeholder")
+		}
+
+		op = c.buildOp(urxStateSave, len(c.out.compiledPat)+1)
+		c.out.compiledPat[savePosition] = op
+
+		// Append an JMP operation into the compiled pattern.  The operand for
+		//  the JMP will eventually be the location following the ')' for the
+		//  group.  This will be patched in later, when the ')' is encountered.
+		c.appendOp(urxJmp, 0)
+
+		// Push the position of the newly added JMP op onto the parentheses stack.
+		// This registers if for fixup when this block's close paren is encountered.
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1)
+
+		// Append a NOP to the compiled pattern.  This is the slot reserved
+		//   for a SAVE in the event that there is yet another '|' following
+		//   this one.
+		c.appendOp(urxNop, 0)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1)
+
+	case doBeginNamedCapture:
+		// Scanning (?<letter.
+		//   The first letter of the name will come through again under doConinueNamedCapture.
+		c.captureName = &strings.Builder{}
+
+	case doContinueNamedCapture:
+		c.captureName.WriteRune(c.c.char)
+
+	case doBadNamedCapture:
+		c.error(InvalidCaptureGroupName)
+
+	case doOpenCaptureParen:
+		// Open Capturing Paren, possibly named.
+		//   Compile to a
+		//      - NOP, which later may be replaced by a save-state if the
+		//         parenthesized group gets a * quantifier, followed by
+		//      - START_CAPTURE  n    where n is stack frame offset to the capture group variables.
+		//      - NOP, which may later be replaced by a save-state if there
+		//             is an '|' alternation within the parens.
+		//
+		//    Each capture group gets three slots in the save stack frame:
+		//         0: Capture Group start position (in input string being matched.)
+		//         1: Capture Group end position.
+		//         2: Start of Match-in-progress.
+		//    The first two locations are for a completed capture group, and are
+		//     referred to by back references and the like.
+		//    The third location stores the capture start position when an START_CAPTURE is
+		//      encountered.  This will be promoted to a completed capture when (and if) the corresponding
+		//      END_CAPTURE is encountered.
+		c.fixLiterals(false)
+		c.appendOp(urxNop, 0)
+		varsLoc := c.allocateStackData(3) // Reserve three slots in match stack frame.
+		c.appendOp(urxStartCapture, varsLoc)
+		c.appendOp(urxNop, 0)
+
+		// On the Parentheses stack, start a new frame and add the postions
+		//   of the two NOPs.  Depending on what follows in the pattern, the
+		//   NOPs may be changed to SAVE_STATE or JMP ops, with a target
+		//   address of the end of the parenthesized group.
+		c.parenStack = append(c.parenStack, int(c.modeFlags))         // Match mode state
+		c.parenStack = append(c.parenStack, parenCapturing)           // Frame type.
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-3) // The first  NOP location
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1) // The second NOP loc
+
+		// Save the mapping from group number to stack frame variable position.
+		c.out.groupMap = append(c.out.groupMap, int32(varsLoc))
+
+		if c.captureName != nil {
+			if c.out.namedCaptureMap == nil {
+				c.out.namedCaptureMap = make(map[string]int)
+			}
+
+			groupNumber := len(c.out.groupMap)
+			captureName := c.captureName.String()
+			c.captureName = nil
+
+			if _, ok := c.out.namedCaptureMap[captureName]; ok {
+				c.error(InvalidCaptureGroupName)
+			}
+			c.out.namedCaptureMap[captureName] = groupNumber
+		}
+
+	case doOpenNonCaptureParen:
+		// Open non-caputuring (grouping only) Paren.
+		//   Compile to a
+		//      - NOP, which later may be replaced by a save-state if the
+		//         parenthesized group gets a * quantifier, followed by
+		//      - NOP, which may later be replaced by a save-state if there
+		//             is an '|' alternation within the parens.
+		c.fixLiterals(false)
+		c.appendOp(urxNop, 0)
+		c.appendOp(urxNop, 0)
+
+		// On the Parentheses stack, start a new frame and add the postions
+		//   of the two NOPs.
+		c.parenStack = append(c.parenStack, int(c.modeFlags))
+		c.parenStack = append(c.parenStack, parenPlain)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1)
+
+	case doOpenAtomicParen:
+		// Open Atomic Paren.  (?>
+		//   Compile to a
+		//      - NOP, which later may be replaced if the parenthesized group
+		//         has a quantifier, followed by
+		//      - STO_SP  save state stack position, so it can be restored at the ")"
+		//      - NOP, which may later be replaced by a save-state if there
+		//             is an '|' alternation within the parens.
+		c.fixLiterals(false)
+		c.appendOp(urxNop, 0)
+		varLoc := c.allocateData(1) // Reserve a data location for saving the state stack ptr.
+		c.appendOp(urxStoSp, varLoc)
+		c.appendOp(urxNop, 0)
+
+		// On the Parentheses stack, start a new frame and add the postions
+		//   of the two NOPs.  Depending on what follows in the pattern, the
+		//   NOPs may be changed to SAVE_STATE or JMP ops, with a target
+		//   address of the end of the parenthesized group.
+		c.parenStack = append(c.parenStack, int(c.modeFlags))
+		c.parenStack = append(c.parenStack, parenAtomic)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-3)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1)
+
+	case doOpenLookAhead:
+		// Positive Look-ahead   (?=  stuff  )
+		//
+		//   Note:   Addition of transparent input regions, with the need to
+		//           restore the original regions when failing out of a lookahead
+		//           block, complicated this sequence.  Some combined opcodes
+		//           might make sense - or might not, lookahead aren't that common.
+		//
+		//      Caution:  min match length optimization knows about this
+		//               sequence; don't change without making updates there too.
+		//
+		// Compiles to
+		//    1    LA_START     dataLoc     Saves SP, Input Pos, Active input region.
+		//    2.   STATE_SAVE   4            on failure of lookahead, goto 4
+		//    3    JMP          6           continue ...
+		//
+		//    4.   LA_END                   Look Ahead failed.  Restore regions.
+		//    5.   BACKTRACK                and back track again.
+		//
+		//    6.   NOP              reserved for use by quantifiers on the block.
+		//                          Look-ahead can't have quantifiers, but paren stack
+		//                             compile time conventions require the slot anyhow.
+		//    7.   NOP              may be replaced if there is are '|' ops in the block.
+		//    8.     code for parenthesized stuff.
+		//    9.   LA_END
+		//
+		//  Four data slots are reserved, for saving state on entry to the look-around
+		//    0:   stack pointer on entry.
+		//    1:   input position on entry.
+		//    2:   fActiveStart, the active bounds start on entry.
+		//    3:   fActiveLimit, the active bounds limit on entry.
+		c.fixLiterals(false)
+		dataLoc := c.allocateData(4)
+		c.appendOp(urxLaStart, dataLoc)
+		c.appendOp(urxStateSave, len(c.out.compiledPat)+2)
+		c.appendOp(urxJmp, len(c.out.compiledPat)+3)
+		c.appendOp(urxLaEnd, dataLoc)
+		c.appendOp(urxBacktrack, 0)
+		c.appendOp(urxNop, 0)
+		c.appendOp(urxNop, 0)
+
+		// On the Parentheses stack, start a new frame and add the postions
+		//   of the NOPs.
+		c.parenStack = append(c.parenStack, int(c.modeFlags))
+		c.parenStack = append(c.parenStack, parenLookahead)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1)
+
+	case doOpenLookAheadNeg:
+		// Negated Lookahead.   (?! stuff )
+		// Compiles to
+		//    1.    LA_START    dataloc
+		//    2.    SAVE_STATE  7         // Fail within look-ahead block restores to this state,
+		//                                //   which continues with the match.
+		//    3.    NOP                   // Std. Open Paren sequence, for possible '|'
+		//    4.       code for parenthesized stuff.
+		//    5.    LA_END                // Cut back stack, remove saved state from step 2.
+		//    6.    BACKTRACK             // code in block succeeded, so neg. lookahead fails.
+		//    7.    END_LA                // Restore match region, in case look-ahead was using
+		//                                        an alternate (transparent) region.
+		//  Four data slots are reserved, for saving state on entry to the look-around
+		//    0:   stack pointer on entry.
+		//    1:   input position on entry.
+		//    2:   fActiveStart, the active bounds start on entry.
+		//    3:   fActiveLimit, the active bounds limit on entry.
+		c.fixLiterals(false)
+		dataLoc := c.allocateData(4)
+		c.appendOp(urxLaStart, dataLoc)
+		c.appendOp(urxStateSave, 0) // dest address will be patched later.
+		c.appendOp(urxNop, 0)
+
+		// On the Parentheses stack, start a new frame and add the postions
+		//   of the StateSave and NOP.
+		c.parenStack = append(c.parenStack, int(c.modeFlags))
+		c.parenStack = append(c.parenStack, parenNegLookahead)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1)
+
+		// Instructions #5 - #7 will be added when the ')' is encountered.
+
+	case doOpenLookBehind:
+		//   Compile a (?<= look-behind open paren.
+		//
+		//          Compiles to
+		//              0       URX_LB_START     dataLoc
+		//              1       URX_LB_CONT      dataLoc
+		//              2                        MinMatchLen
+		//              3                        MaxMatchLen
+		//              4       URX_NOP          Standard '(' boilerplate.
+		//              5       URX_NOP          Reserved slot for use with '|' ops within (block).
+		//              6         <code for LookBehind expression>
+		//              7       URX_LB_END       dataLoc    # Check match len, restore input  len
+		//              8       URX_LA_END       dataLoc    # Restore stack, input pos
+		//
+		//          Allocate a block of matcher data, to contain (when running a match)
+		//              0:    Stack ptr on entry
+		//              1:    Input Index on entry
+		//              2:    fActiveStart, the active bounds start on entry.
+		//              3:    fActiveLimit, the active bounds limit on entry.
+		//              4:    Start index of match current match attempt.
+		//          The first four items must match the layout of data for LA_START / LA_END
+
+		// Generate match code for any pending literals.
+		c.fixLiterals(false)
+
+		// Allocate data space
+		dataLoc := c.allocateData(5)
+
+		// Emit URX_LB_START
+		c.appendOp(urxLbStart, dataLoc)
+
+		// Emit URX_LB_CONT
+		c.appendOp(urxLbCont, dataLoc)
+		c.appendOp(urxReservedOp, 0) // MinMatchLength.  To be filled later.
+		c.appendOp(urxReservedOp, 0) // MaxMatchLength.  To be filled later.
+
+		// Emit the NOPs
+		c.appendOp(urxNop, 0)
+		c.appendOp(urxNop, 0)
+
+		// On the Parentheses stack, start a new frame and add the postions
+		//   of the URX_LB_CONT and the NOP.
+		c.parenStack = append(c.parenStack, int(c.modeFlags))
+		c.parenStack = append(c.parenStack, parenLookBehind)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1)
+
+	// The final two instructions will be added when the ')' is encountered.
+
+	case doOpenLookBehindNeg:
+		//   Compile a (?<! negated look-behind open paren.
+		//
+		//          Compiles to
+		//              0       URX_LB_START     dataLoc    # Save entry stack, input len
+		//              1       URX_LBN_CONT     dataLoc    # Iterate possible match positions
+		//              2                        MinMatchLen
+		//              3                        MaxMatchLen
+		//              4                        continueLoc (9)
+		//              5       URX_NOP          Standard '(' boilerplate.
+		//              6       URX_NOP          Reserved slot for use with '|' ops within (block).
+		//              7         <code for LookBehind expression>
+		//              8       URX_LBN_END      dataLoc    # Check match len, cause a FAIL
+		//              9       ...
+		//
+		//          Allocate a block of matcher data, to contain (when running a match)
+		//              0:    Stack ptr on entry
+		//              1:    Input Index on entry
+		//              2:    fActiveStart, the active bounds start on entry.
+		//              3:    fActiveLimit, the active bounds limit on entry.
+		//              4:    Start index of match current match attempt.
+		//          The first four items must match the layout of data for LA_START / LA_END
+
+		// Generate match code for any pending literals.
+		c.fixLiterals(false)
+
+		// Allocate data space
+		dataLoc := c.allocateData(5)
+
+		// Emit URX_LB_START
+		c.appendOp(urxLbStart, dataLoc)
+
+		// Emit URX_LBN_CONT
+		c.appendOp(urxLbnCount, dataLoc)
+		c.appendOp(urxReservedOp, 0) // MinMatchLength.  To be filled later.
+		c.appendOp(urxReservedOp, 0) // MaxMatchLength.  To be filled later.
+		c.appendOp(urxReservedOp, 0) // Continue Loc.    To be filled later.
+
+		// Emit the NOPs
+		c.appendOp(urxNop, 0)
+		c.appendOp(urxNop, 0)
+
+		// On the Parentheses stack, start a new frame and add the postions
+		//   of the URX_LB_CONT and the NOP.
+		c.parenStack = append(c.parenStack, int(c.modeFlags))
+		c.parenStack = append(c.parenStack, parenLookBehindN)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1)
+
+		// The final two instructions will be added when the ')' is encountered.
+
+	case doConditionalExpr, doPerlInline:
+		// Conditionals such as (?(1)a:b)
+		// Perl inline-condtionals.  (?{perl code}a|b) We're not perl, no way to do them.
+		c.error(Unimplemented)
+
+	case doCloseParen:
+		c.handleCloseParen()
+		if len(c.parenStack) == 0 {
+			//  Extra close paren, or missing open paren.
+			c.error(MismatchedParen)
+		}
+
+	case doNOP:
+
+	case doBadOpenParenType, doRuleError:
+		c.error(RuleSyntax)
+
+	case doMismatchedParenErr:
+		c.error(MismatchedParen)
+
+	case doPlus:
+		//  Normal '+'  compiles to
+		//     1.   stuff to be repeated  (already built)
+		//     2.   jmp-sav 1
+		//     3.   ...
+		//
+		//  Or, if the item to be repeated can match a zero length string,
+		//     1.   STO_INP_LOC  data-loc
+		//     2.      body of stuff to be repeated
+		//     3.   JMP_SAV_X    2
+		//     4.   ...
+
+		//
+		//  Or, if the item to be repeated is simple
+		//     1.   Item to be repeated.
+		//     2.   LOOP_SR_I    set number  (assuming repeated item is a set ref)
+		//     3.   LOOP_C       stack location
+		topLoc := c.blockTopLoc(false) // location of item #1
+
+		// Check for simple constructs, which may get special optimized code.
+		if topLoc == len(c.out.compiledPat)-1 {
+			repeatedOp := c.out.compiledPat[topLoc]
+
+			if repeatedOp.typ() == urxSetref {
+				// Emit optimized code for [char set]+
+				c.appendOp(urxLoopSrI, repeatedOp.value())
+				frameLoc := c.allocateStackData(1)
+				c.appendOp(urxLoopC, frameLoc)
+				break
+			}
+
+			if repeatedOp.typ() == urxDotany || repeatedOp.typ() == urxDotanyAll || repeatedOp.typ() == urxDotanyUnix {
+				// Emit Optimized code for .+ operations.
+				loopOpI := c.buildOp(urxLoopDotI, 0)
+				if repeatedOp.typ() == urxDotanyAll {
+					// URX_LOOP_DOT_I operand is a flag indicating ". matches any" mode.
+					loopOpI |= 1
+				}
+				if c.modeFlags&UnixLines != 0 {
+					loopOpI |= 2
+				}
+				c.appendIns(loopOpI)
+				frameLoc := c.allocateStackData(1)
+				c.appendOp(urxLoopC, frameLoc)
+				break
+			}
+		}
+
+		// General case.
+
+		// Check for minimum match length of zero, which requires
+		//    extra loop-breaking code.
+		if c.minMatchLength(topLoc, len(c.out.compiledPat)-1) == 0 {
+			// Zero length match is possible.
+			// Emit the code sequence that can handle it.
+			c.insertOp(topLoc)
+			frameLoc := c.allocateStackData(1)
+			op := c.buildOp(urxStoInpLoc, frameLoc)
+			c.out.compiledPat[topLoc] = op
+
+			c.appendOp(urxJmpSavX, topLoc+1)
+		} else {
+			// Simpler code when the repeated body must match something non-empty
+			c.appendOp(urxJmpSav, topLoc)
+		}
+
+	case doNGPlus:
+		//  Non-greedy '+?'  compiles to
+		//     1.   stuff to be repeated  (already built)
+		//     2.   state-save  1
+		//     3.   ...
+		topLoc := c.blockTopLoc(false)
+		c.appendOp(urxStateSave, topLoc)
+
+	case doOpt:
+		// Normal (greedy) ? quantifier.
+		//  Compiles to
+		//     1. state save 3
+		//     2.    body of optional block
+		//     3. ...
+		// Insert the state save into the compiled pattern, and we're done.
+		saveStateLoc := c.blockTopLoc(true)
+		saveStateOp := c.buildOp(urxStateSave, len(c.out.compiledPat))
+		c.out.compiledPat[saveStateLoc] = saveStateOp
+
+	case doNGOpt:
+		// Non-greedy ?? quantifier
+		//   compiles to
+		//    1.  jmp   4
+		//    2.     body of optional block
+		//    3   jmp   5
+		//    4.  state save 2
+		//    5    ...
+		//  This code is less than ideal, with two jmps instead of one, because we can only
+		//  insert one instruction at the top of the block being iterated.
+		jmp1Loc := c.blockTopLoc(true)
+		jmp2Loc := len(c.out.compiledPat)
+
+		jmp1Op := c.buildOp(urxJmp, jmp2Loc+1)
+		c.out.compiledPat[jmp1Loc] = jmp1Op
+
+		c.appendOp(urxJmp, jmp2Loc+2)
+		c.appendOp(urxStateSave, jmp1Loc+1)
+
+	case doStar:
+		// Normal (greedy) * quantifier.
+		// Compiles to
+		//       1.   STATE_SAVE   4
+		//       2.      body of stuff being iterated over
+		//       3.   JMP_SAV      2
+		//       4.   ...
+		//
+		// Or, if the body is a simple [Set],
+		//       1.   LOOP_SR_I    set number
+		//       2.   LOOP_C       stack location
+		//       ...
+		//
+		// Or if this is a .*
+		//       1.   LOOP_DOT_I    (. matches all mode flag)
+		//       2.   LOOP_C        stack location
+		//
+		// Or, if the body can match a zero-length string, to inhibit infinite loops,
+		//       1.   STATE_SAVE   5
+		//       2.   STO_INP_LOC  data-loc
+		//       3.      body of stuff
+		//       4.   JMP_SAV_X    2
+		//       5.   ...
+		// location of item #1, the STATE_SAVE
+		topLoc := c.blockTopLoc(false)
+
+		// Check for simple *, where the construct being repeated
+		//   compiled to single opcode, and might be optimizable.
+		if topLoc == len(c.out.compiledPat)-1 {
+			repeatedOp := c.out.compiledPat[topLoc]
+
+			if repeatedOp.typ() == urxSetref {
+				// Emit optimized code for a [char set]*
+				loopOpI := c.buildOp(urxLoopSrI, repeatedOp.value())
+				c.out.compiledPat[topLoc] = loopOpI
+				dataLoc := c.allocateStackData(1)
+				c.appendOp(urxLoopC, dataLoc)
+				break
+			}
+
+			if repeatedOp.typ() == urxDotany || repeatedOp.typ() == urxDotanyAll || repeatedOp.typ() == urxDotanyUnix {
+				// Emit Optimized code for .* operations.
+				loopOpI := c.buildOp(urxLoopDotI, 0)
+				if repeatedOp.typ() == urxDotanyAll {
+					// URX_LOOP_DOT_I operand is a flag indicating . matches any mode.
+					loopOpI |= 1
+				}
+				if (c.modeFlags & UnixLines) != 0 {
+					loopOpI |= 2
+				}
+				c.out.compiledPat[topLoc] = loopOpI
+				dataLoc := c.allocateStackData(1)
+				c.appendOp(urxLoopC, dataLoc)
+				break
+			}
+		}
+
+		// Emit general case code for this *
+		// The optimizations did not apply.
+
+		saveStateLoc := c.blockTopLoc(true)
+		jmpOp := c.buildOp(urxJmpSav, saveStateLoc+1)
+
+		// Check for minimum match length of zero, which requires
+		//    extra loop-breaking code.
+		if c.minMatchLength(saveStateLoc, len(c.out.compiledPat)-1) == 0 {
+			c.insertOp(saveStateLoc)
+			dataLoc := c.allocateStackData(1)
+
+			op := c.buildOp(urxStoInpLoc, dataLoc)
+			c.out.compiledPat[saveStateLoc+1] = op
+			jmpOp = c.buildOp(urxJmpSavX, saveStateLoc+2)
+		}
+
+		// Locate the position in the compiled pattern where the match will continue
+		//   after completing the *.   (4 or 5 in the comment above)
+		continueLoc := len(c.out.compiledPat) + 1
+
+		// Put together the save state op and store it into the compiled code.
+		saveStateOp := c.buildOp(urxStateSave, continueLoc)
+		c.out.compiledPat[saveStateLoc] = saveStateOp
+
+		// Append the URX_JMP_SAV or URX_JMPX operation to the compiled pattern.
+		c.appendIns(jmpOp)
+
+	case doNGStar:
+		// Non-greedy *? quantifier
+		// compiles to
+		//     1.   JMP    3
+		//     2.      body of stuff being iterated over
+		//     3.   STATE_SAVE  2
+		//     4    ...
+		jmpLoc := c.blockTopLoc(true)     // loc  1.
+		saveLoc := len(c.out.compiledPat) // loc  3.
+		jmpOp := c.buildOp(urxJmp, saveLoc)
+		c.out.compiledPat[jmpLoc] = jmpOp
+		c.appendOp(urxStateSave, jmpLoc+1)
+
+	case doIntervalInit:
+		// The '{' opening an interval quantifier was just scanned.
+		// Init the counter varaiables that will accumulate the values as the digits
+		//    are scanned.
+		c.intervalLow = 0
+		c.intervalUpper = -1
+
+	case doIntevalLowerDigit:
+		// Scanned a digit from the lower value of an {lower,upper} interval
+		digitValue := uCharDigitValue(c.c.char)
+		val := int64(c.intervalLow)*10 + digitValue
+		if val > math.MaxInt32 {
+			c.error(NumberTooBig)
+		} else {
+			c.intervalLow = int(val)
+		}
+
+	case doIntervalUpperDigit:
+		// Scanned a digit from the upper value of an {lower,upper} interval
+		if c.intervalUpper < 0 {
+			c.intervalUpper = 0
+		}
+		digitValue := uCharDigitValue(c.c.char)
+		val := int64(c.intervalUpper)*10 + digitValue
+		if val > math.MaxInt32 {
+			c.error(NumberTooBig)
+		} else {
+			c.intervalUpper = int(val)
+		}
+
+	case doIntervalSame:
+		// Scanned a single value interval like {27}.  Upper = Lower.
+		c.intervalUpper = c.intervalLow
+
+	case doInterval:
+		// Finished scanning a normal {lower,upper} interval.  Generate the code for it.
+		if !c.compileInlineInterval() {
+			c.compileInterval(urxCtrInit, utxCtrLoop)
+		}
+
+	case doPossessiveInterval:
+		// Finished scanning a Possessive {lower,upper}+ interval.  Generate the code for it.
+
+		// Remember the loc for the top of the block being looped over.
+		//   (Can not reserve a slot in the compiled pattern at this time, because
+		//    compileInterval needs to reserve also, and blockTopLoc can only reserve
+		//    once per block.)
+		topLoc := c.blockTopLoc(false)
+
+		// Produce normal looping code.
+		c.compileInterval(urxCtrInit, utxCtrLoop)
+
+		// Surround the just-emitted normal looping code with a STO_SP ... LD_SP
+		//  just as if the loop was inclosed in atomic parentheses.
+
+		// First the STO_SP before the start of the loop
+		c.insertOp(topLoc)
+
+		varLoc := c.allocateData(1) // Reserve a data location for saving the
+		op := c.buildOp(urxStoSp, varLoc)
+		c.out.compiledPat[topLoc] = op
+
+		var loopOp instruction
+		loopOp, c.out.compiledPat = stackPop(c.out.compiledPat)
+		if loopOp.typ() != utxCtrLoop || loopOp.value() != topLoc {
+			panic("bad instruction at the end of compiled pattern")
+		}
+
+		loopOp++ // point LoopOp after the just-inserted STO_SP
+		c.appendIns(loopOp)
+
+		// Then the LD_SP after the end of the loop
+		c.appendOp(urxLdSp, varLoc)
+
+	case doNGInterval:
+		// Finished scanning a non-greedy {lower,upper}? interval.  Generate the code for it.
+		c.compileInterval(urxCtrInitNg, urxCtrLoopNg)
+
+	case doIntervalError:
+		c.error(BadInterval)
+
+	case doLiteralChar:
+		// We've just scanned a "normal" character from the pattern,
+		c.literalChar(c.c.char)
+
+	case doEscapedLiteralChar:
+		// We've just scanned an backslashed escaped character with  no
+		//   special meaning.  It represents itself.
+		if (c.modeFlags&ErrorOnUnknownEscapes) != 0 && ((c.c.char >= 0x41 && c.c.char <= 0x5A) || /* in [A-Z] */ (c.c.char >= 0x61 && c.c.char <= 0x7a)) { // in [a-z]
+			c.error(BadEscapeSequence)
+		}
+		c.literalChar(c.c.char)
+
+	case doDotAny:
+		// scanned a ".",  match any single character.
+		c.fixLiterals(false)
+		if (c.modeFlags & DotAll) != 0 {
+			c.appendOp(urxDotanyAll, 0)
+		} else if (c.modeFlags & UnixLines) != 0 {
+			c.appendOp(urxDotanyUnix, 0)
+		} else {
+			c.appendOp(urxDotany, 0)
+		}
+
+	case doCaret:
+		c.fixLiterals(false)
+		if (c.modeFlags&Multiline) == 0 && (c.modeFlags&UnixLines) == 0 {
+			c.appendOp(urxCaret, 0)
+		} else if (c.modeFlags&Multiline) != 0 && (c.modeFlags&UnixLines) == 0 {
+			c.appendOp(urxCaretM, 0)
+		} else if (c.modeFlags&Multiline) == 0 && (c.modeFlags&UnixLines) != 0 {
+			c.appendOp(urxCaret, 0) // Only testing true start of input.
+		} else if (c.modeFlags&Multiline) != 0 && (c.modeFlags&UnixLines) != 0 {
+			c.appendOp(urxCaretMUnix, 0)
+		}
+
+	case doDollar:
+		c.fixLiterals(false)
+		if (c.modeFlags&Multiline) == 0 && (c.modeFlags&UnixLines) == 0 {
+			c.appendOp(urxDollar, 0)
+		} else if (c.modeFlags&Multiline) != 0 && (c.modeFlags&UnixLines) == 0 {
+			c.appendOp(urxDollarM, 0)
+		} else if (c.modeFlags&Multiline) == 0 && (c.modeFlags&UnixLines) != 0 {
+			c.appendOp(urxDollarD, 0)
+		} else if (c.modeFlags&Multiline) != 0 && (c.modeFlags&UnixLines) != 0 {
+			c.appendOp(urxDollarMd, 0)
+		}
+
+	case doBackslashA:
+		c.fixLiterals(false)
+		c.appendOp(urxCaret, 0)
+
+	case doBackslashB:
+		if !BreakIteration {
+			if (c.modeFlags & UWord) != 0 {
+				c.error(Unimplemented)
+			}
+		}
+		c.fixLiterals(false)
+		if c.modeFlags&UWord != 0 {
+			c.appendOp(urxBackslashBu, 1)
+		} else {
+			c.appendOp(urxBackslashB, 1)
+		}
+
+	case doBackslashb:
+		if !BreakIteration {
+			if (c.modeFlags & UWord) != 0 {
+				c.error(Unimplemented)
+			}
+		}
+		c.fixLiterals(false)
+		if c.modeFlags&UWord != 0 {
+			c.appendOp(urxBackslashBu, 0)
+		} else {
+			c.appendOp(urxBackslashB, 0)
+		}
+
+	case doBackslashD:
+		c.fixLiterals(false)
+		c.appendOp(urxBackslashD, 1)
+
+	case doBackslashd:
+		c.fixLiterals(false)
+		c.appendOp(urxBackslashD, 0)
+
+	case doBackslashG:
+		c.fixLiterals(false)
+		c.appendOp(urxBackslashG, 0)
+
+	case doBackslashH:
+		c.fixLiterals(false)
+		c.appendOp(urxBackslashH, 1)
+
+	case doBackslashh:
+		c.fixLiterals(false)
+		c.appendOp(urxBackslashH, 0)
+
+	case doBackslashR:
+		c.fixLiterals(false)
+		c.appendOp(urxBackslashR, 0)
+
+	case doBackslashS:
+		c.fixLiterals(false)
+		c.appendOp(urxStatSetrefN, urxIsspaceSet)
+
+	case doBackslashs:
+		c.fixLiterals(false)
+		c.appendOp(urxStaticSetref, urxIsspaceSet)
+
+	case doBackslashV:
+		c.fixLiterals(false)
+		c.appendOp(urxBackslashV, 1)
+
+	case doBackslashv:
+		c.fixLiterals(false)
+		c.appendOp(urxBackslashV, 0)
+
+	case doBackslashW:
+		c.fixLiterals(false)
+		c.appendOp(urxStatSetrefN, urxIswordSet)
+
+	case doBackslashw:
+		c.fixLiterals(false)
+		c.appendOp(urxStaticSetref, urxIswordSet)
+
+	case doBackslashX:
+		if !BreakIteration {
+			// Grapheme Cluster Boundary requires ICU break iteration.
+			c.error(Unimplemented)
+		}
+		c.fixLiterals(false)
+		c.appendOp(urxBackslashX, 0)
+
+	case doBackslashZ:
+		c.fixLiterals(false)
+		c.appendOp(urxDollar, 0)
+
+	case doBackslashz:
+		c.fixLiterals(false)
+		c.appendOp(urxBackslashZ, 0)
+
+	case doEscapeError:
+		c.error(BadEscapeSequence)
+
+	case doExit:
+		c.fixLiterals(false)
+		return false
+
+	case doProperty:
+		c.fixLiterals(false)
+		theSet := c.scanProp()
+		c.compileSet(theSet)
+
+	case doNamedChar:
+		ch := c.scanNamedChar()
+		c.literalChar(ch)
+
+	case doBackRef:
+		// BackReference.  Somewhat unusual in that the front-end can not completely parse
+		//                 the regular expression, because the number of digits to be consumed
+		//                 depends on the number of capture groups that have been defined.  So
+		//                 we have to do it here instead.
+		numCaptureGroups := len(c.out.groupMap)
+		groupNum := int64(0)
+		ch := c.c.char
+
+		for {
+			// Loop once per digit, for max allowed number of digits in a back reference.
+			digit := uCharDigitValue(ch)
+			groupNum = groupNum*10 + digit
+			if groupNum >= int64(numCaptureGroups) {
+				break
+			}
+			ch = c.peekCharLL()
+			if !staticRuleSet[ruleSetDigitChar-128].ContainsRune(ch) {
+				break
+			}
+			c.nextCharLL()
+		}
+
+		// Scan of the back reference in the source regexp is complete.  Now generate
+		//  the compiled code for it.
+		// Because capture groups can be forward-referenced by back-references,
+		//  we fill the operand with the capture group number.  At the end
+		//  of compilation, it will be changed to the variable's location.
+		if groupNum == 0 {
+			panic("\\0 begins an octal escape sequence, and shouldn't enter this code path at all")
+		}
+		c.fixLiterals(false)
+		if (c.modeFlags & CaseInsensitive) != 0 {
+			c.appendOp(urxBackrefI, int(groupNum))
+		} else {
+			c.appendOp(urxBackref, int(groupNum))
+		}
+
+	case doBeginNamedBackRef:
+		if c.captureName != nil {
+			panic("should not replace capture name")
+		}
+		c.captureName = &strings.Builder{}
+
+	case doContinueNamedBackRef:
+		c.captureName.WriteRune(c.c.char)
+
+	case doCompleteNamedBackRef:
+		{
+			groupNumber := c.out.namedCaptureMap[c.captureName.String()]
+			if groupNumber == 0 {
+				// Group name has not been defined.
+				//   Could be a forward reference. If we choose to support them at some
+				//   future time, extra mechanism will be required at this point.
+				c.error(InvalidCaptureGroupName)
+			} else {
+				// Given the number, handle identically to a \n numbered back reference.
+				// See comments above, under doBackRef
+				c.fixLiterals(false)
+				if (c.modeFlags & CaseInsensitive) != 0 {
+					c.appendOp(urxBackrefI, groupNumber)
+				} else {
+					c.appendOp(urxBackref, groupNumber)
+				}
+			}
+			c.captureName = nil
+		}
+
+	case doPossessivePlus:
+		// Possessive ++ quantifier.
+		// Compiles to
+		//       1.   STO_SP
+		//       2.      body of stuff being iterated over
+		//       3.   STATE_SAVE 5
+		//       4.   JMP        2
+		//       5.   LD_SP
+		//       6.   ...
+		//
+		//  Note:  TODO:  This is pretty inefficient.  A mass of saved state is built up
+		//                then unconditionally discarded.  Perhaps introduce a new opcode.  Ticket 6056
+		//
+		// Emit the STO_SP
+		topLoc := c.blockTopLoc(true)
+		stoLoc := c.allocateData(1) // Reserve the data location for storing save stack ptr.
+		op := c.buildOp(urxStoSp, stoLoc)
+		c.out.compiledPat[topLoc] = op
+
+		// Emit the STATE_SAVE
+		c.appendOp(urxStateSave, len(c.out.compiledPat)+2)
+
+		// Emit the JMP
+		c.appendOp(urxJmp, topLoc+1)
+
+		// Emit the LD_SP
+		c.appendOp(urxLdSp, stoLoc)
+
+	case doPossessiveStar:
+		// Possessive *+ quantifier.
+		// Compiles to
+		//       1.   STO_SP       loc
+		//       2.   STATE_SAVE   5
+		//       3.      body of stuff being iterated over
+		//       4.   JMP          2
+		//       5.   LD_SP        loc
+		//       6    ...
+		// TODO:  do something to cut back the state stack each time through the loop.
+		// Reserve two slots at the top of the block.
+		topLoc := c.blockTopLoc(true)
+		c.insertOp(topLoc)
+
+		// emit   STO_SP     loc
+		stoLoc := c.allocateData(1) // Reserve the data location for storing save stack ptr.
+		op := c.buildOp(urxStoSp, stoLoc)
+		c.out.compiledPat[topLoc] = op
+
+		// Emit the SAVE_STATE   5
+		L7 := len(c.out.compiledPat) + 1
+		op = c.buildOp(urxStateSave, L7)
+		c.out.compiledPat[topLoc+1] = op
+
+		// Append the JMP operation.
+		c.appendOp(urxJmp, topLoc+1)
+
+		// Emit the LD_SP       loc
+		c.appendOp(urxLdSp, stoLoc)
+
+	case doPossessiveOpt:
+		// Possessive  ?+ quantifier.
+		//  Compiles to
+		//     1. STO_SP      loc
+		//     2. SAVE_STATE  5
+		//     3.    body of optional block
+		//     4. LD_SP       loc
+		//     5. ...
+		//
+		// Reserve two slots at the top of the block.
+		topLoc := c.blockTopLoc(true)
+		c.insertOp(topLoc)
+
+		// Emit the STO_SP
+		stoLoc := c.allocateData(1) // Reserve the data location for storing save stack ptr.
+		op := c.buildOp(urxStoSp, stoLoc)
+		c.out.compiledPat[topLoc] = op
+
+		// Emit the SAVE_STATE
+		continueLoc := len(c.out.compiledPat) + 1
+		op = c.buildOp(urxStateSave, continueLoc)
+		c.out.compiledPat[topLoc+1] = op
+
+		// Emit the LD_SP
+		c.appendOp(urxLdSp, stoLoc)
+
+	case doBeginMatchMode:
+		c.newModeFlags = c.modeFlags
+		c.setModeFlag = true
+	case doMatchMode: //  (?i)    and similar
+		var bit RegexpFlag
+		switch c.c.char {
+		case 0x69: /* 'i' */
+			bit = CaseInsensitive
+		case 0x64: /* 'd' */
+			bit = UnixLines
+		case 0x6d: /* 'm' */
+			bit = Multiline
+		case 0x73: /* 's' */
+			bit = DotAll
+		case 0x75: /* 'u' */
+			bit = 0 /* Unicode casing */
+		case 0x77: /* 'w' */
+			bit = UWord
+		case 0x78: /* 'x' */
+			bit = Comments
+		case 0x2d: /* '-' */
+			c.setModeFlag = false
+		default:
+			// Should never happen.  Other chars are filtered out by the scanner.
+			panic("unreachable")
+		}
+		if c.setModeFlag {
+			c.newModeFlags |= bit
+		} else {
+			c.newModeFlags &= ^bit
+		}
+
+	case doSetMatchMode:
+		// Emit code to match any pending literals, using the not-yet changed match mode.
+		c.fixLiterals(false)
+
+		// We've got a (?i) or similar.  The match mode is being changed, but
+		//   the change is not scoped to a parenthesized block.
+		if c.newModeFlags >= 0 {
+			panic("cNewModeFlags not properly initialized")
+		}
+		c.modeFlags = c.newModeFlags
+
+	case doMatchModeParen:
+		// We've got a (?i: or similar.  Begin a parenthesized block, save old
+		//   mode flags so they can be restored at the close of the block.
+		//
+		//   Compile to a
+		//      - NOP, which later may be replaced by a save-state if the
+		//         parenthesized group gets a * quantifier, followed by
+		//      - NOP, which may later be replaced by a save-state if there
+		//             is an '|' alternation within the parens.
+		c.fixLiterals(false)
+		c.appendOp(urxNop, 0)
+		c.appendOp(urxNop, 0)
+
+		// On the Parentheses stack, start a new frame and add the postions
+		//   of the two NOPs (a normal non-capturing () frame, except for the
+		//   saving of the orignal mode flags.)
+		c.parenStack = append(c.parenStack, int(c.modeFlags))
+		c.parenStack = append(c.parenStack, parenFlags)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2)
+		c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1)
+
+		// Set the current mode flags to the new values.
+		if c.newModeFlags >= 0 {
+			panic("cNewModeFlags not properly initialized")
+		}
+		c.modeFlags = c.newModeFlags
+
+	case doBadModeFlag:
+		c.error(InvalidFlag)
+
+	case doSuppressComments:
+		// We have just scanned a '(?'.  We now need to prevent the character scanner from
+		// treating a '#' as a to-the-end-of-line comment.
+		//   (This Perl compatibility just gets uglier and uglier to do...)
+		c.eolComments = false
+
+	case doSetAddAmp:
+		set := c.setStack[len(c.setStack)-1]
+		set.AddRune(chAmp)
+
+	case doSetAddDash:
+		set := c.setStack[len(c.setStack)-1]
+		set.AddRune(chDash)
+
+	case doSetBackslashs:
+		set := c.setStack[len(c.setStack)-1]
+		set.AddAll(staticPropertySets[urxIsspaceSet])
+
+	case doSetBackslashS:
+		sset := uset.New()
+		sset.AddAll(staticPropertySets[urxIsspaceSet]) // TODO: add latin1 spaces
+		sset.Complement()
+
+		set := c.setStack[len(c.setStack)-1]
+		set.AddAll(sset)
+
+	case doSetBackslashd:
+		set := c.setStack[len(c.setStack)-1]
+		c.err = uprops.AddCategory(set, uchar.GcNdMask)
+
+	case doSetBackslashD:
+		digits := uset.New()
+		c.err = uprops.ApplyIntPropertyValue(digits, uprops.UCharGeneralCategoryMask, int32(uchar.GcNdMask))
+		digits.Complement()
+		set := c.setStack[len(c.setStack)-1]
+		set.AddAll(digits)
+
+	case doSetBackslashh:
+		h := uset.New()
+		c.err = uprops.ApplyIntPropertyValue(h, uprops.UCharGeneralCategoryMask, int32(uchar.GcZsMask))
+		h.AddRune(9) // Tab
+
+		set := c.setStack[len(c.setStack)-1]
+		set.AddAll(h)
+
+	case doSetBackslashH:
+		h := uset.New()
+		c.err = uprops.ApplyIntPropertyValue(h, uprops.UCharGeneralCategoryMask, int32(uchar.GcZsMask))
+		h.AddRune(9) // Tab
+		h.Complement()
+
+		set := c.setStack[len(c.setStack)-1]
+		set.AddAll(h)
+
+	case doSetBackslashv:
+		set := c.setStack[len(c.setStack)-1]
+		set.AddRuneRange(0x0a, 0x0d) // add range
+		set.AddRune(0x85)
+		set.AddRuneRange(0x2028, 0x2029)
+
+	case doSetBackslashV:
+		v := uset.New()
+		v.AddRuneRange(0x0a, 0x0d) // add range
+		v.AddRune(0x85)
+		v.AddRuneRange(0x2028, 0x2029)
+		v.Complement()
+
+		set := c.setStack[len(c.setStack)-1]
+		set.AddAll(v)
+
+	case doSetBackslashw:
+		set := c.setStack[len(c.setStack)-1]
+		set.AddAll(staticPropertySets[urxIswordSet])
+
+	case doSetBackslashW:
+		sset := uset.New()
+		sset.AddAll(staticPropertySets[urxIswordSet])
+		sset.Complement()
+
+		set := c.setStack[len(c.setStack)-1]
+		set.AddAll(sset)
+
+	case doSetBegin:
+		c.fixLiterals(false)
+		c.setStack = append(c.setStack, uset.New())
+		c.setOpStack = append(c.setOpStack, setStart)
+		if (c.modeFlags & CaseInsensitive) != 0 {
+			c.setOpStack = append(c.setOpStack, setCaseClose)
+		}
+
+	case doSetBeginDifference1:
+		//  We have scanned something like [[abc]-[
+		//  Set up a new UnicodeSet for the set beginning with the just-scanned '['
+		//  Push a Difference operator, which will cause the new set to be subtracted from what
+		//    went before once it is created.
+		c.setPushOp(setDifference1)
+		c.setOpStack = append(c.setOpStack, setStart)
+		if (c.modeFlags & CaseInsensitive) != 0 {
+			c.setOpStack = append(c.setOpStack, setCaseClose)
+		}
+
+	case doSetBeginIntersection1:
+		//  We have scanned something like  [[abc]&[
+		//   Need both the '&' operator and the open '[' operator.
+		c.setPushOp(setIntersection1)
+		c.setOpStack = append(c.setOpStack, setStart)
+		if (c.modeFlags & CaseInsensitive) != 0 {
+			c.setOpStack = append(c.setOpStack, setCaseClose)
+		}
+
+	case doSetBeginUnion:
+		//  We have scanned something like  [[abc][
+		//     Need to handle the union operation explicitly [[abc] | [
+		c.setPushOp(setUnion)
+		c.setOpStack = append(c.setOpStack, setStart)
+		if (c.modeFlags & CaseInsensitive) != 0 {
+			c.setOpStack = append(c.setOpStack, setCaseClose)
+		}
+
+	case doSetDifference2:
+		// We have scanned something like [abc--
+		//   Consider this to unambiguously be a set difference operator.
+		c.setPushOp(setDifference2)
+
+	case doSetEnd:
+		// Have encountered the ']' that closes a set.
+		//    Force the evaluation of any pending operations within this set,
+		//    leave the completed set on the top of the set stack.
+		c.setEval(setEnd)
+		var start setOperation
+		start, c.setOpStack = stackPop(c.setOpStack)
+		if start != setStart {
+			panic("bad set operation in stack")
+		}
+
+	case doSetFinish:
+		// Finished a complete set expression, including all nested sets.
+		//   The close bracket has already triggered clearing out pending set operators,
+		//    the operator stack should be empty and the operand stack should have just
+		//    one entry, the result set.
+		if len(c.setOpStack) > 0 {
+			panic("expected setOpStack to be empty")
+		}
+		var set *uset.UnicodeSet
+		set, c.setStack = stackPop(c.setStack)
+		c.compileSet(set)
+
+	case doSetIntersection2:
+		// Have scanned something like [abc&&
+		c.setPushOp(setIntersection2)
+
+	case doSetLiteral:
+		// Union the just-scanned literal character into the set being built.
+		//    This operation is the highest precedence set operation, so we can always do
+		//    it immediately, without waiting to see what follows.  It is necessary to perform
+		//    any pending '-' or '&' operation first, because these have the same precedence
+		//    as union-ing in a literal'
+		c.setEval(setUnion)
+		set := c.setStack[len(c.setStack)-1]
+		set.AddRune(c.c.char)
+		c.lastSetLiteral = c.c.char
+
+	case doSetLiteralEscaped:
+		// A back-slash escaped literal character was encountered.
+		// Processing is the same as with setLiteral, above, with the addition of
+		//  the optional check for errors on escaped ASCII letters.
+		if (c.modeFlags&ErrorOnUnknownEscapes) != 0 &&
+			((c.c.char >= 0x41 && c.c.char <= 0x5A) || // in [A-Z]
+				(c.c.char >= 0x61 && c.c.char <= 0x7a)) { // in [a-z]
+			c.error(BadEscapeSequence)
+		}
+		c.setEval(setUnion)
+		set := c.setStack[len(c.setStack)-1]
+		set.AddRune(c.c.char)
+		c.lastSetLiteral = c.c.char
+
+	case doSetNamedChar:
+		// Scanning a \N{UNICODE CHARACTER NAME}
+		//  Aside from the source of the character, the processing is identical to doSetLiteral,
+		//    above.
+		ch := c.scanNamedChar()
+		c.setEval(setUnion)
+		set := c.setStack[len(c.setStack)-1]
+		set.AddRune(ch)
+		c.lastSetLiteral = ch
+
+	case doSetNamedRange:
+		// We have scanned literal-\N{CHAR NAME}.  Add the range to the set.
+		// The left character is already in the set, and is saved in fLastSetLiteral.
+		// The right side needs to be picked up, the scan is at the 'N'.
+		// Lower Limit > Upper limit being an error matches both Java
+		//        and ICU UnicodeSet behavior.
+		ch := c.scanNamedChar()
+		if c.err == nil && (c.lastSetLiteral == -1 || c.lastSetLiteral > ch) {
+			c.error(InvalidRange)
+		}
+		set := c.setStack[len(c.setStack)-1]
+		set.AddRuneRange(c.lastSetLiteral, ch)
+		c.lastSetLiteral = ch
+
+	case doSetNegate:
+		// Scanned a '^' at the start of a set.
+		// Push the negation operator onto the set op stack.
+		// A twist for case-insensitive matching:
+		//   the case closure operation must happen _before_ negation.
+		//   But the case closure operation will already be on the stack if it's required.
+		//   This requires checking for case closure, and swapping the stack order
+		//    if it is present.
+		tosOp := c.setOpStack[len(c.setOpStack)-1]
+		if tosOp == setCaseClose {
+			_, c.setOpStack = stackPop(c.setOpStack)
+			c.setOpStack = append(c.setOpStack, setNegation)
+			c.setOpStack = append(c.setOpStack, setCaseClose)
+		} else {
+			c.setOpStack = append(c.setOpStack, setNegation)
+		}
+
+	case doSetNoCloseError:
+		c.error(MissingCloseBracket)
+
+	case doSetOpError:
+		c.error(RuleSyntax) //  -- or && at the end of a set.  Illegal.
+
+	case doSetPosixProp:
+		if set := c.scanPosixProp(); set != nil {
+			c.setStack[len(c.setStack)-1].AddAll(set)
+		}
+
+	case doSetProp:
+		//  Scanned a \p \P within [brackets].
+		if set := c.scanProp(); set != nil {
+			c.setStack[len(c.setStack)-1].AddAll(set)
+		}
+
+	case doSetRange:
+		// We have scanned literal-literal.  Add the range to the set.
+		// The left character is already in the set, and is saved in fLastSetLiteral.
+		// The right side is the current character.
+		// Lower Limit > Upper limit being an error matches both Java
+		//        and ICU UnicodeSet behavior.
+
+		if c.lastSetLiteral == -1 || c.lastSetLiteral > c.c.char {
+			c.error(InvalidRange)
+		}
+		c.setStack[len(c.setStack)-1].AddRuneRange(c.lastSetLiteral, c.c.char)
+
+	default:
+		panic("unexpected OP in parser")
+	}
+
+	return c.err == nil
+}
+
+func uCharDigitValue(char rune) int64 {
+	if char >= '0' && char <= '9' {
+		return int64(char - '0')
+	}
+	return -1
+}
+
+func stackPop[T any](stack []T) (T, []T) {
+	var out T
+	if len(stack) > 0 {
+		out = stack[len(stack)-1]
+		stack = stack[:len(stack)-1]
+	}
+	return out, stack
+}
+
+func (c *compiler) error(e CompileErrorCode) {
+	c.err = &CompileError{
+		Code:    e,
+		Line:    c.lineNum,
+		Offset:  c.charNum,
+		Context: c.out.pattern,
+	}
+}
+
+func (c *compiler) stripNOPs() {
+	if c.err != nil {
+		return
+	}
+
+	end := len(c.out.compiledPat)
+	deltas := make([]int, 0, end)
+
+	// Make a first pass over the code, computing the amount that things
+	//   will be offset at each location in the original code.
+	var loc, d int
+	for loc = 0; loc < end; loc++ {
+		deltas = append(deltas, d)
+		op := c.out.compiledPat[loc]
+		if op.typ() == urxNop {
+			d++
+		}
+	}
+
+	// Make a second pass over the code, removing the NOPs by moving following
+	//  code up, and patching operands that refer to code locations that
+	//  are being moved.  The array of offsets from the first step is used
+	//  to compute the new operand values.
+	var src, dst int
+	for src = 0; src < end; src++ {
+		op := c.out.compiledPat[src]
+		opType := op.typ()
+
+		switch opType {
+		case urxNop:
+			// skip
+
+		case urxStateSave,
+			urxJmp,
+			utxCtrLoop,
+			urxCtrLoopNg,
+			urxRelocOprnd,
+			urxJmpx,
+			urxJmpSav,
+			urxJmpSavX:
+			// These are instructions with operands that refer to code locations.
+			operandAddress := op.value()
+			fixedOperandAddress := operandAddress - deltas[operandAddress]
+			op = c.buildOp(opType, fixedOperandAddress)
+			c.out.compiledPat[dst] = op
+			dst++
+
+		case urxBackref, urxBackrefI:
+			where := op.value()
+			if where > len(c.out.groupMap) {
+				c.error(InvalidBackRef)
+				break
+			}
+
+			where = int(c.out.groupMap[where-1])
+			op = c.buildOp(opType, where)
+			c.out.compiledPat[dst] = op
+			dst++
+			c.out.needsAltInput = true
+
+		case urxReservedOp,
+			urxReservedOpN,
+			urxBacktrack,
+			urxEnd,
+			urxOnechar,
+			urxString,
+			urxStringLen,
+			urxStartCapture,
+			urxEndCapture,
+			urxStaticSetref,
+			urxStatSetrefN,
+			urxSetref,
+			urxDotany,
+			urxFail,
+			urxBackslashB,
+			urxBackslashBu,
+			urxBackslashG,
+			urxBackslashX,
+			urxBackslashZ,
+			urxDotanyAll,
+			urxBackslashD,
+			urxCaret,
+			urxDollar,
+			urxCtrInit,
+			urxCtrInitNg,
+			urxDotanyUnix,
+			urxStoSp,
+			urxLdSp,
+			urxStoInpLoc,
+			urxLaStart,
+			urxLaEnd,
+			urcOnecharI,
+			urxStringI,
+			urxDollarM,
+			urxCaretM,
+			urxCaretMUnix,
+			urxLbStart,
+			urxLbCont,
+			urxLbEnd,
+			urxLbnCount,
+			urxLbnEnd,
+			urxLoopSrI,
+			urxLoopDotI,
+			urxLoopC,
+			urxDollarD,
+			urxDollarMd,
+			urxBackslashH,
+			urxBackslashR,
+			urxBackslashV:
+			// These instructions are unaltered by the relocation.
+			c.out.compiledPat[dst] = op
+			dst++
+
+		default:
+			// Some op is unaccounted for.
+			panic("unreachable")
+		}
+	}
+
+	c.out.compiledPat = c.out.compiledPat[:dst]
+}
+
+func (c *compiler) matchStartType() {
+	var loc int               // Location in the pattern of the current op being processed.
+	var currentLen int32      // Minimum length of a match to this point (loc) in the pattern
+	var numInitialStrings int // Number of strings encountered that could match at start.
+	var atStart = true        // True if no part of the pattern yet encountered
+	//   could have advanced the position in a match.
+	//   (Maximum match length so far == 0)
+
+	// forwardedLength is a vector holding minimum-match-length values that
+	//   are propagated forward in the pattern by JMP or STATE_SAVE operations.
+	//   It must be one longer than the pattern being checked because some  ops
+	//   will jmp to a end-of-block+1 location from within a block, and we must
+	//   count those when checking the block.
+	end := len(c.out.compiledPat)
+	forwardedLength := make([]int32, end+1)
+
+	for loc = 3; loc < end; loc++ {
+		forwardedLength[loc] = math.MaxInt32
+	}
+
+	for loc = 3; loc < end; loc++ {
+		op := c.out.compiledPat[loc]
+		opType := op.typ()
+
+		// The loop is advancing linearly through the pattern.
+		// If the op we are now at was the destination of a branch in the pattern,
+		// and that path has a shorter minimum length than the current accumulated value,
+		// replace the current accumulated value.
+		if forwardedLength[loc] < currentLen {
+			currentLen = forwardedLength[loc]
+		}
+
+		switch opType {
+		// Ops that don't change the total length matched
+		case urxReservedOp,
+			urxEnd,
+			urxFail,
+			urxStringLen,
+			urxNop,
+			urxStartCapture,
+			urxEndCapture,
+			urxBackslashB,
+			urxBackslashBu,
+			urxBackslashG,
+			urxBackslashZ,
+			urxDollar,
+			urxDollarM,
+			urxDollarD,
+			urxDollarMd,
+			urxRelocOprnd,
+			urxStoInpLoc,
+			urxBackref, // BackRef.  Must assume that it might be a zero length match
+			urxBackrefI,
+			urxStoSp, // Setup for atomic or possessive blocks.  Doesn't change what can match.
+			urxLdSp:
+			// skip
+
+		case urxCaret:
+			if atStart {
+				c.out.startType = startStart
+			}
+
+		case urxCaretM, urxCaretMUnix:
+			if atStart {
+				c.out.startType = startLine
+			}
+
+		case urxOnechar:
+			if currentLen == 0 {
+				// This character could appear at the start of a match.
+				//   Add it to the set of possible starting characters.
+				c.out.initialChars.AddRune(op.value32())
+				numInitialStrings += 2
+			}
+			currentLen = safeIncrement(currentLen, 1)
+			atStart = false
+
+		case urxSetref:
+			if currentLen == 0 {
+				sn := op.value()
+				set := c.out.sets[sn]
+				c.out.initialChars.AddAll(set)
+				numInitialStrings += 2
+			}
+			currentLen = safeIncrement(currentLen, 1)
+			atStart = false
+
+		case urxLoopSrI:
+			// [Set]*, like a SETREF, above, in what it can match,
+			//  but may not match at all, so currentLen is not incremented.
+			if currentLen == 0 {
+				sn := op.value()
+				set := c.out.sets[sn]
+				c.out.initialChars.AddAll(set)
+				numInitialStrings += 2
+			}
+			atStart = false
+
+		case urxLoopDotI:
+			if currentLen == 0 {
+				// .* at the start of a pattern.
+				//    Any character can begin the match.
+				c.out.initialChars.Clear()
+				c.out.initialChars.Complement()
+				numInitialStrings += 2
+			}
+			atStart = false
+
+		case urxStaticSetref:
+			if currentLen == 0 {
+				sn := op.value()
+				c.out.initialChars.AddAll(staticPropertySets[sn])
+				numInitialStrings += 2
+			}
+			currentLen = safeIncrement(currentLen, 1)
+			atStart = false
+
+		case urxStatSetrefN:
+			if currentLen == 0 {
+				sn := op.value()
+				sc := uset.New()
+				sc.AddAll(staticPropertySets[sn])
+				sc.Complement()
+
+				c.out.initialChars.AddAll(sc)
+				numInitialStrings += 2
+			}
+			currentLen = safeIncrement(currentLen, 1)
+			atStart = false
+
+		case urxBackslashD:
+			// Digit Char
+			if currentLen == 0 {
+				s := uset.New()
+				c.err = uprops.ApplyIntPropertyValue(s, uprops.UCharGeneralCategoryMask, int32(uchar.GcNdMask))
+				if op.value() != 0 {
+					s.Complement()
+				}
+				c.out.initialChars.AddAll(s)
+				numInitialStrings += 2
+			}
+			currentLen = safeIncrement(currentLen, 1)
+			atStart = false
+
+		case urxBackslashH:
+			// Horiz white space
+			if currentLen == 0 {
+				s := uset.New()
+				c.err = uprops.ApplyIntPropertyValue(s, uprops.UCharGeneralCategoryMask, int32(uchar.GcZsMask))
+				s.AddRune(9) // Tab
+				if op.value() != 0 {
+					s.Complement()
+				}
+				c.out.initialChars.AddAll(s)
+				numInitialStrings += 2
+			}
+			currentLen = safeIncrement(currentLen, 1)
+			atStart = false
+
+		case urxBackslashR, // Any line ending sequence
+			urxBackslashV: // Any line ending code point, with optional negation
+			if currentLen == 0 {
+				s := uset.New()
+				s.AddRuneRange(0x0a, 0x0d) // add range
+				s.AddRune(0x85)
+				s.AddRuneRange(0x2028, 0x2029)
+				if op.value() != 0 {
+					// Complement option applies to URX_BACKSLASH_V only.
+					s.Complement()
+				}
+				c.out.initialChars.AddAll(s)
+				numInitialStrings += 2
+			}
+			currentLen = safeIncrement(currentLen, 1)
+			atStart = false
+
+		case urcOnecharI:
+			// Case Insensitive Single Character.
+			if currentLen == 0 {
+				ch := op.value32()
+				if uprops.HasBinaryProperty(ch, uprops.UCharCaseSensitive) {
+					starters := uset.New()
+					starters.AddRuneRange(ch, ch)
+					starters.CloseOver(uset.CaseInsensitive)
+					// findCaseInsensitiveStarters(c, &starters);
+					//   For ONECHAR_I, no need to worry about text chars that expand on folding into
+					//   strings. The expanded folding can't match the pattern.
+					c.out.initialChars.AddAll(starters)
+				} else {
+					// Char has no case variants.  Just add it as-is to the
+					//   set of possible starting chars.
+					c.out.initialChars.AddRune(ch)
+				}
+				numInitialStrings += 2
+			}
+			currentLen = safeIncrement(currentLen, 1)
+			atStart = false
+
+		case urxBackslashX, // Grahpeme Cluster.  Minimum is 1, max unbounded.
+			urxDotanyAll, // . matches one or two.
+			urxDotany,
+			urxDotanyUnix:
+			if currentLen == 0 {
+				// These constructs are all bad news when they appear at the start
+				//   of a match.  Any character can begin the match.
+				c.out.initialChars.Clear()
+				c.out.initialChars.Complement()
+				numInitialStrings += 2
+			}
+			currentLen = safeIncrement(currentLen, 1)
+			atStart = false
+
+		case urxJmpx:
+			loc++ // Except for extra operand on URX_JMPX, same as URX_JMP.
+			fallthrough
+
+		case urxJmp:
+			jmpDest := op.value()
+			if jmpDest < loc {
+				// Loop of some kind.  Can safely ignore, the worst that will happen
+				//  is that we understate the true minimum length
+				currentLen = forwardedLength[loc+1]
+			} else {
+				// Forward jump.  Propagate the current min length to the target loc of the jump.
+				if forwardedLength[jmpDest] > currentLen {
+					forwardedLength[jmpDest] = currentLen
+				}
+			}
+			atStart = false
+
+		case urxJmpSav,
+			urxJmpSavX:
+			// Combo of state save to the next loc, + jmp backwards.
+			//   Net effect on min. length computation is nothing.
+			atStart = false
+
+		case urxBacktrack:
+			// Fails are kind of like a branch, except that the min length was
+			//   propagated already, by the state save.
+			currentLen = forwardedLength[loc+1]
+			atStart = false
+
+		case urxStateSave:
+			// State Save, for forward jumps, propagate the current minimum.
+			//             of the state save.
+			jmpDest := op.value()
+			if jmpDest > loc {
+				if currentLen < forwardedLength[jmpDest] {
+					forwardedLength[jmpDest] = (currentLen)
+				}
+			}
+			atStart = false
+
+		case urxString:
+			loc++
+			stringLenOp := c.out.compiledPat[loc]
+			stringLen := stringLenOp.value()
+			if currentLen == 0 {
+				// Add the starting character of this string to the set of possible starting
+				//   characters for this pattern.
+				stringStartIdx := op.value()
+				ch := c.out.literalText[stringStartIdx]
+				c.out.initialChars.AddRune(ch)
+
+				// Remember this string.  After the entire pattern has been checked,
+				//  if nothing else is identified that can start a match, we'll use it.
+				numInitialStrings++
+				c.out.initialStringIdx = stringStartIdx
+				c.out.initialStringLen = stringLen
+			}
+
+			currentLen = safeIncrement(currentLen, stringLen)
+			atStart = false
+
+		case urxStringI:
+			// Case-insensitive string.  Unlike exact-match strings, we won't
+			//   attempt a string search for possible match positions.  But we
+			//   do update the set of possible starting characters.
+			loc++
+			stringLenOp := c.out.compiledPat[loc]
+			stringLen := stringLenOp.value()
+			if currentLen == 0 {
+				// Add the starting character of this string to the set of possible starting
+				//   characters for this pattern.
+				stringStartIdx := op.value()
+				ch := c.out.literalText[stringStartIdx]
+				s := uset.New()
+				c.findCaseInsensitiveStarters(ch, s)
+				c.out.initialChars.AddAll(s)
+				numInitialStrings += 2 // Matching on an initial string not possible.
+			}
+			currentLen = safeIncrement(currentLen, stringLen)
+			atStart = false
+
+		case urxCtrInit,
+			urxCtrInitNg:
+			// Loop Init Ops.  These don't change the min length, but they are 4 word ops
+			//   so location must be updated accordingly.
+			// Loop Init Ops.
+			//   If the min loop count == 0
+			//      move loc forwards to the end of the loop, skipping over the body.
+			//   If the min count is > 0,
+			//      continue normal processing of the body of the loop.
+			loopEndLoc := c.out.compiledPat[loc+1].value()
+			minLoopCount := int(c.out.compiledPat[loc+2])
+			if minLoopCount == 0 {
+				// Min Loop Count of 0, treat like a forward branch and
+				//   move the current minimum length up to the target
+				//   (end of loop) location.
+				if forwardedLength[loopEndLoc] > currentLen {
+					forwardedLength[loopEndLoc] = currentLen
+				}
+			}
+			loc += 3 // Skips over operands of CTR_INIT
+			atStart = false
+
+		case utxCtrLoop,
+			urxCtrLoopNg:
+			// Loop ops.
+			//  The jump is conditional, backwards only.
+			atStart = false
+
+		case urxLoopC:
+			// More loop ops.  These state-save to themselves.
+			//   don't change the minimum match
+			atStart = false
+
+		case urxLaStart,
+			urxLbStart:
+			// Look-around.  Scan forward until the matching look-ahead end,
+			//   without processing the look-around block.  This is overly pessimistic.
+
+			// Keep track of the nesting depth of look-around blocks.  Boilerplate code for
+			//   lookahead contains two LA_END instructions, so count goes up by two
+			//   for each LA_START.
+			var depth int
+			if opType == urxLaStart {
+				depth = 2
+			} else {
+				depth = 1
+			}
+			for {
+				loc++
+				op = c.out.compiledPat[loc]
+				if op.typ() == urxLaStart {
+					depth += 2
+				}
+				if op.typ() == urxLbStart {
+					depth++
+				}
+				if op.typ() == urxLaEnd || op.typ() == urxLbnEnd {
+					depth--
+					if depth == 0 {
+						break
+					}
+				}
+				if op.typ() == urxStateSave {
+					// Need this because neg lookahead blocks will FAIL to outside
+					//   of the block.
+					jmpDest := op.value()
+					if jmpDest > loc {
+						if currentLen < forwardedLength[jmpDest] {
+							forwardedLength[jmpDest] = (currentLen)
+						}
+					}
+				}
+			}
+
+		case urxLaEnd,
+			urxLbCont,
+			urxLbEnd,
+			urxLbnCount,
+			urxLbnEnd:
+			panic("should be consumed in URX_LA_START")
+
+		default:
+			panic("unreachable")
+		}
+	}
+
+	// Sort out what we should check for when looking for candidate match start positions.
+	// In order of preference,
+	//     1.   Start of input text buffer.
+	//     2.   A literal string.
+	//     3.   Start of line in multi-line mode.
+	//     4.   A single literal character.
+	//     5.   A character from a set of characters.
+	//
+	if c.out.startType == startStart {
+		// Match only at the start of an input text string.
+		//    start type is already set.  We're done.
+	} else if numInitialStrings == 1 && c.out.minMatchLen > 0 {
+		// Match beginning only with a literal string.
+		ch := c.out.literalText[c.out.initialStringIdx]
+		c.out.startType = startString
+		c.out.initialChar = ch
+	} else if c.out.startType == startLine {
+		// Match at start of line in Multi-Line mode.
+		// Nothing to do here; everything is already set.
+	} else if c.out.minMatchLen == 0 {
+		// Zero length match possible.  We could start anywhere.
+		c.out.startType = startNoInfo
+	} else if c.out.initialChars.Len() == 1 {
+		// All matches begin with the same char.
+		c.out.startType = startChar
+		c.out.initialChar = c.out.initialChars.RuneAt(0)
+	} else if !c.out.initialChars.ContainsRuneRange(0, 0x10ffff) && c.out.minMatchLen > 0 {
+		// Matches start with a set of character smaller than the set of all chars.
+		c.out.startType = startSet
+	} else {
+		// Matches can start with anything
+		c.out.startType = startNoInfo
+	}
+}
+
+func (c *compiler) appendOp(typ opcode, arg int) {
+	c.appendIns(c.buildOp(typ, arg))
+}
+
+func (c *compiler) appendIns(ins instruction) {
+	if c.err != nil {
+		return
+	}
+	c.out.compiledPat = append(c.out.compiledPat, ins)
+}
+
+func (c *compiler) buildOp(typ opcode, val int) instruction {
+	if c.err != nil {
+		return 0
+	}
+	if val > 0x00ffffff {
+		panic("bad argument to buildOp")
+	}
+	if val < 0 {
+		if !(typ == urxReservedOpN || typ == urxReservedOp) {
+			panic("bad value to buildOp")
+		}
+		typ = urxReservedOpN
+	}
+	return instruction(int32(typ)<<24 | int32(val))
+}
+
+func (c *compiler) handleCloseParen() {
+	if len(c.parenStack) == 0 {
+		c.error(MismatchedParen)
+		return
+	}
+
+	c.fixLiterals(false)
+
+	var patIdx int
+	var patOp instruction
+
+	for {
+		patIdx, c.parenStack = stackPop(c.parenStack)
+		if patIdx < 0 {
+			break
+		}
+
+		patOp = c.out.compiledPat[patIdx]
+		if patOp.value() != 0 {
+			panic("branch target for JMP should not be set")
+		}
+		patOp |= instruction(len(c.out.compiledPat))
+		c.out.compiledPat[patIdx] = patOp
+		c.matchOpenParen = patIdx
+	}
+
+	var modeFlags int
+	modeFlags, c.parenStack = stackPop(c.parenStack)
+	if modeFlags >= 0 {
+		panic("modeFlags in paren stack was not negated")
+	}
+
+	c.modeFlags = RegexpFlag(modeFlags)
+
+	switch patIdx {
+	case parenPlain, parenFlags:
+	// No additional fixups required.
+	//   (Grouping-only parentheses)
+	case parenCapturing:
+		// Capturing Parentheses.
+		//   Insert a End Capture op into the pattern.
+		//   The frame offset of the variables for this cg is obtained from the
+		//       start capture op and put it into the end-capture op.
+
+		captureOp := c.out.compiledPat[c.matchOpenParen+1]
+		if captureOp.typ() != urxStartCapture {
+			panic("bad type in capture op (expected URX_START_CAPTURE)")
+		}
+		frameVarLocation := captureOp.value()
+		c.appendOp(urxEndCapture, frameVarLocation)
+
+	case parenAtomic:
+		// Atomic Parenthesis.
+		//   Insert a LD_SP operation to restore the state stack to the position
+		//   it was when the atomic parens were entered.
+		stoOp := c.out.compiledPat[c.matchOpenParen+1]
+		if stoOp.typ() != urxStoSp {
+			panic("bad type in capture op (expected URX_STO_SP)")
+		}
+		stoLoc := stoOp.value()
+		c.appendOp(urxLdSp, stoLoc)
+
+	case parenLookahead:
+		startOp := c.out.compiledPat[c.matchOpenParen-5]
+		if startOp.typ() != urxLaStart {
+			panic("bad type in capture op (expected URX_LA_START)")
+		}
+		dataLoc := startOp.value()
+		c.appendOp(urxLaEnd, dataLoc)
+
+	case parenNegLookahead:
+		startOp := c.out.compiledPat[c.matchOpenParen-1]
+		if startOp.typ() != urxLaStart {
+			panic("bad type in capture op (expected URX_LA_START)")
+		}
+		dataLoc := startOp.value()
+		c.appendOp(urxLaEnd, dataLoc)
+		c.appendOp(urxBacktrack, 0)
+		c.appendOp(urxLaEnd, dataLoc)
+
+		// Patch the URX_SAVE near the top of the block.
+		// The destination of the SAVE is the final LA_END that was just added.
+		saveOp := c.out.compiledPat[c.matchOpenParen]
+		if saveOp.typ() != urxStateSave {
+			panic("bad type in capture op (expected URX_STATE_SAVE)")
+		}
+		saveOp = c.buildOp(urxStateSave, len(c.out.compiledPat)-1)
+		c.out.compiledPat[c.matchOpenParen] = saveOp
+
+	case parenLookBehind:
+		startOp := c.out.compiledPat[c.matchOpenParen-4]
+		if startOp.typ() != urxLbStart {
+			panic("bad type in capture op (expected URX_LB_START)")
+		}
+		dataLoc := startOp.value()
+		c.appendOp(urxLbEnd, dataLoc)
+		c.appendOp(urxLaEnd, dataLoc)
+
+		// Determine the min and max bounds for the length of the
+		//  string that the pattern can match.
+		//  An unbounded upper limit is an error.
+		patEnd := len(c.out.compiledPat) - 1
+		minML := c.minMatchLength(c.matchOpenParen, patEnd)
+		maxML := c.maxMatchLength(c.matchOpenParen, patEnd)
+
+		if maxML == math.MaxInt32 {
+			c.error(LookBehindLimit)
+			break
+		}
+		if minML == math.MaxInt32 {
+			// This condition happens when no match is possible, such as with a
+			// [set] expression containing no elements.
+			// In principle, the generated code to evaluate the expression could be deleted,
+			// but it's probably not worth the complication.
+			minML = 0
+		}
+
+		c.out.compiledPat[c.matchOpenParen-2] = instruction(minML)
+		c.out.compiledPat[c.matchOpenParen-1] = instruction(maxML)
+
+	case parenLookBehindN:
+		startOp := c.out.compiledPat[c.matchOpenParen-5]
+		if startOp.typ() != urxLbStart {
+			panic("bad type in capture op (expected URX_LB_START)")
+		}
+		dataLoc := startOp.value()
+		c.appendOp(urxLbnEnd, dataLoc)
+
+		// Determine the min and max bounds for the length of the
+		//  string that the pattern can match.
+		//  An unbounded upper limit is an error.
+		patEnd := len(c.out.compiledPat) - 1
+		minML := c.minMatchLength(c.matchOpenParen, patEnd)
+		maxML := c.maxMatchLength(c.matchOpenParen, patEnd)
+
+		if instruction(maxML).typ() != 0 {
+			c.error(LookBehindLimit)
+			break
+		}
+		if maxML == math.MaxInt32 {
+			c.error(LookBehindLimit)
+			break
+		}
+		if minML == math.MaxInt32 {
+			// This condition happens when no match is possible, such as with a
+			// [set] expression containing no elements.
+			// In principle, the generated code to evaluate the expression could be deleted,
+			// but it's probably not worth the complication.
+			minML = 0
+		}
+
+		c.out.compiledPat[c.matchOpenParen-3] = instruction(minML)
+		c.out.compiledPat[c.matchOpenParen-2] = instruction(maxML)
+
+		op := c.buildOp(urxRelocOprnd, len(c.out.compiledPat))
+		c.out.compiledPat[c.matchOpenParen-1] = op
+
+	default:
+		panic("unexpected opcode in parenStack")
+	}
+
+	c.matchCloseParen = len(c.out.compiledPat)
+}
+
+func (c *compiler) fixLiterals(split bool) {
+	if len(c.literalChars) == 0 {
+		return
+	}
+
+	lastCodePoint := c.literalChars[len(c.literalChars)-1]
+
+	// Split:  We need to  ensure that the last item in the compiled pattern
+	//     refers only to the last literal scanned in the pattern, so that
+	//     quantifiers (*, +, etc.) affect only it, and not a longer string.
+	//     Split before case folding for case insensitive matches.
+	if split {
+		c.literalChars = c.literalChars[:len(c.literalChars)-1]
+		c.fixLiterals(false)
+
+		c.literalChar(lastCodePoint)
+		c.fixLiterals(false)
+		return
+	}
+
+	if c.modeFlags&CaseInsensitive != 0 {
+		c.literalChars = ucase.FoldRunes(c.literalChars)
+		lastCodePoint = c.literalChars[len(c.literalChars)-1]
+	}
+
+	if len(c.literalChars) == 1 {
+		if c.modeFlags&CaseInsensitive != 0 && uprops.HasBinaryProperty(lastCodePoint, uprops.UCharCaseSensitive) {
+			c.appendOp(urcOnecharI, int(lastCodePoint))
+		} else {
+			c.appendOp(urxOnechar, int(lastCodePoint))
+		}
+	} else {
+		if len(c.literalChars) > 0x00ffffff || len(c.out.literalText) > 0x00ffffff {
+			c.error(PatternTooBig)
+		}
+		if c.modeFlags&CaseInsensitive != 0 {
+			c.appendOp(urxStringI, len(c.out.literalText))
+		} else {
+			c.appendOp(urxString, len(c.out.literalText))
+		}
+		c.appendOp(urxStringLen, len(c.literalChars))
+		c.out.literalText = append(c.out.literalText, c.literalChars...)
+	}
+
+	c.literalChars = c.literalChars[:0]
+}
+
+func (c *compiler) literalChar(point rune) {
+	c.literalChars = append(c.literalChars, point)
+}
+
+func (c *compiler) allocateData(size int) int {
+	if c.err != nil {
+		return 0
+	}
+	if size <= 0 || size > 0x100 || c.out.dataSize < 0 {
+		c.error(InternalError)
+		return 0
+	}
+
+	dataIndex := c.out.dataSize
+	c.out.dataSize += size
+	if c.out.dataSize >= 0x00fffff0 {
+		c.error(InternalError)
+	}
+	return dataIndex
+}
+
+func (c *compiler) allocateStackData(size int) int {
+	if c.err != nil {
+		return 0
+	}
+	if size <= 0 || size > 0x100 || c.out.frameSize < 0 {
+		c.error(InternalError)
+		return 0
+	}
+	dataIndex := c.out.frameSize
+	c.out.frameSize += size
+	if c.out.frameSize >= 0x00fffff0 {
+		c.error(InternalError)
+	}
+	return dataIndex
+}
+
+func (c *compiler) insertOp(where int) {
+	if where < 0 || where >= len(c.out.compiledPat) {
+		panic("insertOp: out of bounds")
+	}
+
+	nop := c.buildOp(urxNop, 0)
+	c.out.compiledPat = slices.Insert(c.out.compiledPat, where, nop)
+
+	// Walk through the pattern, looking for any ops with targets that
+	//  were moved down by the insert.  Fix them.
+	for loc, op := range c.out.compiledPat {
+		switch op.typ() {
+		case urxJmp, urxJmpx, urxStateSave, utxCtrLoop, urxCtrLoopNg, urxJmpSav, urxJmpSavX, urxRelocOprnd:
+			if op.value() > where {
+				op = c.buildOp(op.typ(), op.value()+1)
+				c.out.compiledPat[loc] = op
+			}
+		}
+	}
+
+	// Now fix up the parentheses stack.  All positive values in it are locations in
+	//  the compiled pattern.   (Negative values are frame boundaries, and don't need fixing.)
+	for loc, x := range c.parenStack {
+		if x > where {
+			c.parenStack[loc] = x + 1
+		}
+	}
+
+	if c.matchCloseParen > where {
+		c.matchCloseParen++
+	}
+	if c.matchOpenParen > where {
+		c.matchOpenParen++
+	}
+}
+
+func (c *compiler) blockTopLoc(reserve bool) int {
+	var loc int
+	c.fixLiterals(true)
+
+	if len(c.out.compiledPat) == c.matchCloseParen {
+		// The item just processed is a parenthesized block.
+		loc = c.matchOpenParen
+	} else {
+		// Item just compiled is a single thing, a ".", or a single char, a string or a set reference.
+		// No slot for STATE_SAVE was pre-reserved in the compiled code.
+		// We need to make space now.
+		loc = len(c.out.compiledPat) - 1
+		op := c.out.compiledPat[loc]
+		if op.typ() == urxStringLen {
+			// Strings take two opcode, we want the position of the first one.
+			// We can have a string at this point if a single character case-folded to two.
+			loc--
+		}
+		if reserve {
+			nop := c.buildOp(urxNop, 0)
+			c.out.compiledPat = slices.Insert(c.out.compiledPat, loc, nop)
+		}
+	}
+	return loc
+}
+
+func (c *compiler) compileInlineInterval() bool {
+	if c.intervalUpper > 10 || c.intervalUpper < c.intervalLow {
+		return false
+	}
+
+	topOfBlock := c.blockTopLoc(false)
+	if c.intervalUpper == 0 {
+		// Pathological case.  Attempt no matches, as if the block doesn't exist.
+		// Discard the generated code for the block.
+		// If the block included parens, discard the info pertaining to them as well.
+		c.out.compiledPat = c.out.compiledPat[:topOfBlock]
+		if c.matchOpenParen >= topOfBlock {
+			c.matchOpenParen = -1
+		}
+		if c.matchCloseParen >= topOfBlock {
+			c.matchCloseParen = -1
+		}
+		return true
+	}
+
+	if topOfBlock != len(c.out.compiledPat)-1 && c.intervalUpper != 1 {
+		// The thing being repeated is not a single op, but some
+		//   more complex block.  Do it as a loop, not inlines.
+		//   Note that things "repeated" a max of once are handled as inline, because
+		//     the one copy of the code already generated is just fine.
+		return false
+	}
+
+	// Pick up the opcode that is to be repeated
+	//
+	op := c.out.compiledPat[topOfBlock]
+
+	// Compute the pattern location where the inline sequence
+	//   will end, and set up the state save op that will be needed.
+	//
+	endOfSequenceLoc := len(c.out.compiledPat) - 1 + c.intervalUpper + (c.intervalUpper - c.intervalLow)
+
+	saveOp := c.buildOp(urxStateSave, endOfSequenceLoc)
+	if c.intervalLow == 0 {
+		c.insertOp(topOfBlock)
+		c.out.compiledPat[topOfBlock] = saveOp
+	}
+
+	//  Loop, emitting the op for the thing being repeated each time.
+	//    Loop starts at 1 because one instance of the op already exists in the pattern,
+	//    it was put there when it was originally encountered.
+	for i := 1; i < c.intervalUpper; i++ {
+		if i >= c.intervalLow {
+			c.appendIns(saveOp)
+		}
+		c.appendIns(op)
+	}
+	return true
+}
+
+func (c *compiler) compileInterval(init opcode, loop opcode) {
+	// The CTR_INIT op at the top of the block with the {n,m} quantifier takes
+	//   four slots in the compiled code.  Reserve them.
+	topOfBlock := c.blockTopLoc(true)
+	c.insertOp(topOfBlock)
+	c.insertOp(topOfBlock)
+	c.insertOp(topOfBlock)
+
+	// The operands for the CTR_INIT opcode include the index in the matcher data
+	//   of the counter.  Allocate it now. There are two data items
+	//        counterLoc   -->  Loop counter
+	//               +1    -->  Input index (for breaking non-progressing loops)
+	//                          (Only present if unbounded upper limit on loop)
+	var dataSize int
+	if c.intervalUpper < 0 {
+		dataSize = 2
+	} else {
+		dataSize = 1
+	}
+	counterLoc := c.allocateStackData(dataSize)
+
+	op := c.buildOp(init, counterLoc)
+	c.out.compiledPat[topOfBlock] = op
+
+	// The second operand of CTR_INIT is the location following the end of the loop.
+	//   Must put in as a URX_RELOC_OPRND so that the value will be adjusted if the
+	//   compilation of something later on causes the code to grow and the target
+	//   position to move.
+	loopEnd := len(c.out.compiledPat)
+	op = c.buildOp(urxRelocOprnd, loopEnd)
+	c.out.compiledPat[topOfBlock+1] = op
+
+	// Followed by the min and max counts.
+	c.out.compiledPat[topOfBlock+2] = instruction(c.intervalLow)
+	c.out.compiledPat[topOfBlock+3] = instruction(c.intervalUpper)
+
+	// Append the CTR_LOOP op.  The operand is the location of the CTR_INIT op.
+	//   Goes at end of the block being looped over, so just append to the code so far.
+	c.appendOp(loop, topOfBlock)
+
+	if (c.intervalLow&0xff000000) != 0 || (c.intervalUpper > 0 && (c.intervalUpper&0xff000000) != 0) {
+		c.error(NumberTooBig)
+	}
+
+	if c.intervalLow > c.intervalUpper && c.intervalUpper != -1 {
+		c.error(MaxLtMin)
+	}
+}
+
+func (c *compiler) scanNamedChar() rune {
+	c.nextChar(&c.c)
+	if c.c.char != chLBrace {
+		c.error(PropertySyntax)
+		return 0
+	}
+
+	var charName []rune
+	for {
+		c.nextChar(&c.c)
+		if c.c.char == chRBrace {
+			break
+		}
+		if c.c.char == -1 {
+			c.error(PropertySyntax)
+			return 0
+		}
+		charName = append(charName, c.c.char)
+	}
+
+	if !isInvariantUString(charName) {
+		// All Unicode character names have only invariant characters.
+		// The API to get a character, given a name, accepts only char *, forcing us to convert,
+		//   which requires this error check
+		c.error(PropertySyntax)
+		return 0
+	}
+
+	theChar := unames.CharForName(unames.UnicodeCharName, string(charName))
+	if c.err != nil {
+		c.error(PropertySyntax)
+	}
+
+	c.nextChar(&c.c) // Continue overall regex pattern processing with char after the '}'
+	return theChar
+}
+
+func isInvariantUString(name []rune) bool {
+	for _, c := range name {
+		/*
+		 * no assertions here because these functions are legitimately called
+		 * for strings with variant characters
+		 */
+		if !ucharIsInvariant(c) {
+			return false /* found a variant char */
+		}
+	}
+	return true
+}
+
+var invariantChars = [...]uint32{
+	0xfffffbff, /* 00..1f but not 0a */
+	0xffffffe5, /* 20..3f but not 21 23 24 */
+	0x87fffffe, /* 40..5f but not 40 5b..5e */
+	0x87fffffe, /* 60..7f but not 60 7b..7e */
+}
+
+func ucharIsInvariant(c rune) bool {
+	return c <= 0x7f && (invariantChars[(c)>>5]&(uint32(1)<<(c&0x1f))) != 0
+}
+
+func (c *compiler) setPushOp(op setOperation) {
+	c.setEval(op)
+	c.setOpStack = append(c.setOpStack, op)
+	c.setStack = append(c.setStack, uset.New())
+}
+
+func (c *compiler) setEval(nextOp setOperation) {
+	var rightOperand *uset.UnicodeSet
+	var leftOperand *uset.UnicodeSet
+
+	for {
+		pendingSetOp := c.setOpStack[len(c.setOpStack)-1]
+		if (pendingSetOp & 0xffff0000) < (nextOp & 0xffff0000) {
+			break
+		}
+
+		c.setOpStack = c.setOpStack[:len(c.setOpStack)-1]
+		rightOperand = c.setStack[len(c.setStack)-1]
+
+		switch pendingSetOp {
+		case setNegation:
+			rightOperand.Complement()
+
+		case setCaseClose:
+			rightOperand.CloseOver(uset.CaseInsensitive)
+
+		case setDifference1, setDifference2:
+			c.setStack = c.setStack[:len(c.setStack)-1]
+			leftOperand = c.setStack[len(c.setStack)-1]
+			leftOperand.RemoveAll(rightOperand)
+
+		case setIntersection1, setIntersection2:
+			c.setStack = c.setStack[:len(c.setStack)-1]
+			leftOperand = c.setStack[len(c.setStack)-1]
+			leftOperand.RetainAll(rightOperand)
+
+		case setUnion:
+			c.setStack = c.setStack[:len(c.setStack)-1]
+			leftOperand = c.setStack[len(c.setStack)-1]
+			leftOperand.AddAll(rightOperand)
+
+		default:
+			panic("unreachable")
+		}
+	}
+}
+
+func safeIncrement(val int32, delta int) int32 {
+	if delta <= math.MaxInt32 && math.MaxInt32-val > int32(delta) {
+		return val + int32(delta)
+	}
+	return math.MaxInt32
+}
+
+func (c *compiler) minMatchLength(start, end int) int32 {
+	if c.err != nil {
+		return 0
+	}
+
+	var loc int
+	var currentLen int32
+
+	// forwardedLength is a vector holding minimum-match-length values that
+	//   are propagated forward in the pattern by JMP or STATE_SAVE operations.
+	//   It must be one longer than the pattern being checked because some  ops
+	//   will jmp to a end-of-block+1 location from within a block, and we must
+	//   count those when checking the block.
+	forwardedLength := make([]int32, end+2)
+	for i := range forwardedLength {
+		forwardedLength[i] = math.MaxInt32
+	}
+
+	for loc = start; loc <= end; loc++ {
+		op := c.out.compiledPat[loc]
+		opType := op.typ()
+
+		// The loop is advancing linearly through the pattern.
+		// If the op we are now at was the destination of a branch in the pattern,
+		// and that path has a shorter minimum length than the current accumulated value,
+		// replace the current accumulated value.
+		//   no-match-possible cases.
+		if forwardedLength[loc] < currentLen {
+			currentLen = forwardedLength[loc]
+		}
+
+		switch opType {
+		// Ops that don't change the total length matched
+		case urxReservedOp,
+			urxEnd,
+			urxStringLen,
+			urxNop,
+			urxStartCapture,
+			urxEndCapture,
+			urxBackslashB,
+			urxBackslashBu,
+			urxBackslashG,
+			urxBackslashZ,
+			urxCaret,
+			urxDollar,
+			urxDollarM,
+			urxDollarD,
+			urxDollarMd,
+			urxRelocOprnd,
+			urxStoInpLoc,
+			urxCaretM,
+			urxCaretMUnix,
+			urxBackref, // BackRef.  Must assume that it might be a zero length match
+			urxBackrefI,
+			urxStoSp, // Setup for atomic or possessive blocks.  Doesn't change what can match.
+			urxLdSp,
+			urxJmpSav,
+			urxJmpSavX:
+			// no-op
+
+			// Ops that match a minimum of one character (one or two 16 bit code units.)
+			//
+		case urxOnechar,
+			urxStaticSetref,
+			urxStatSetrefN,
+			urxSetref,
+			urxBackslashD,
+			urxBackslashH,
+			urxBackslashR,
+			urxBackslashV,
+			urcOnecharI,
+			urxBackslashX, // Grahpeme Cluster.  Minimum is 1, max unbounded.
+			urxDotanyAll,  // . matches one or two.
+			urxDotany,
+			urxDotanyUnix:
+			currentLen = safeIncrement(currentLen, 1)
+
+		case urxJmpx:
+			loc++ // URX_JMPX has an extra operand, ignored here, otherwise processed identically to URX_JMP.
+			fallthrough
+
+		case urxJmp:
+			jmpDest := op.value()
+			if jmpDest < loc {
+				// Loop of some kind.  Can safely ignore, the worst that will happen
+				//  is that we understate the true minimum length
+				currentLen = forwardedLength[loc+1]
+			} else {
+				// Forward jump.  Propagate the current min length to the target loc of the jump.
+				if forwardedLength[jmpDest] > currentLen {
+					forwardedLength[jmpDest] = currentLen
+				}
+			}
+
+		case urxBacktrack:
+			// Back-tracks are kind of like a branch, except that the min length was
+			//   propagated already, by the state save.
+			currentLen = forwardedLength[loc+1]
+
+		case urxStateSave:
+			// State Save, for forward jumps, propagate the current minimum.
+			//             of the state save.
+			jmpDest := op.value()
+			if jmpDest > loc {
+				if currentLen < forwardedLength[jmpDest] {
+					forwardedLength[jmpDest] = currentLen
+				}
+			}
+
+		case urxString:
+			loc++
+			stringLenOp := c.out.compiledPat[loc]
+			currentLen = safeIncrement(currentLen, stringLenOp.value())
+
+		case urxStringI:
+			loc++
+			// TODO: with full case folding, matching input text may be shorter than
+			//       the string we have here.  More smarts could put some bounds on it.
+			//       Assume a min length of one for now.  A min length of zero causes
+			//        optimization failures for a pattern like "string"+
+			// currentLen += URX_VAL(stringLenOp);
+			currentLen = safeIncrement(currentLen, 1)
+
+		case urxCtrInit, urxCtrInitNg:
+			// Loop Init Ops.
+			//   If the min loop count == 0
+			//      move loc forwards to the end of the loop, skipping over the body.
+			//   If the min count is > 0,
+			//      continue normal processing of the body of the loop.
+			loopEndOp := c.out.compiledPat[loc+1]
+			loopEndLoc := loopEndOp.value()
+			minLoopCount := c.out.compiledPat[loc+2]
+			if minLoopCount == 0 {
+				loc = loopEndLoc
+			} else {
+				loc += 3 // Skips over operands of CTR_INIT
+			}
+
+		case utxCtrLoop, urxCtrLoopNg:
+			// Loop ops. The jump is conditional, backwards only.
+
+		case urxLoopSrI, urxLoopDotI, urxLoopC:
+			// More loop ops.  These state-save to themselves. don't change the minimum match - could match nothing at all.
+
+		case urxLaStart, urxLbStart:
+			// Look-around.  Scan forward until the matching look-ahead end,
+			//   without processing the look-around block.  This is overly pessimistic for look-ahead,
+			//   it assumes that the look-ahead match might be zero-length.
+			//   TODO:  Positive lookahead could recursively do the block, then continue
+			//          with the longer of the block or the value coming in.  Ticket 6060
+			var depth int32
+			if opType == urxLaStart {
+				depth = 2
+			} else {
+				depth = 1
+			}
+
+			for {
+				loc++
+				op = c.out.compiledPat[loc]
+				if op.typ() == urxLaStart {
+					// The boilerplate for look-ahead includes two LA_END insturctions,
+					//    Depth will be decremented by each one when it is seen.
+					depth += 2
+				}
+				if op.typ() == urxLbStart {
+					depth++
+				}
+				if op.typ() == urxLaEnd {
+					depth--
+					if depth == 0 {
+						break
+					}
+				}
+				if op.typ() == urxLbnEnd {
+					depth--
+					if depth == 0 {
+						break
+					}
+				}
+				if op.typ() == urxStateSave {
+					// Need this because neg lookahead blocks will FAIL to outside of the block.
+					jmpDest := op.value()
+					if jmpDest > loc {
+						if currentLen < forwardedLength[jmpDest] {
+							forwardedLength[jmpDest] = currentLen
+						}
+					}
+				}
+			}
+
+		case urxLaEnd, urxLbCont, urxLbEnd, urxLbnCount, urxLbnEnd:
+			// Only come here if the matching URX_LA_START or URX_LB_START was not in the
+			//   range being sized, which happens when measuring size of look-behind blocks.
+
+		default:
+			panic("unreachable")
+		}
+	}
+
+	// We have finished walking through the ops.  Check whether some forward jump
+	//   propagated a shorter length to location end+1.
+	if forwardedLength[end+1] < currentLen {
+		currentLen = forwardedLength[end+1]
+	}
+
+	return currentLen
+}
+
+func (c *compiler) maxMatchLength(start, end int) int32 {
+	if c.err != nil {
+		return 0
+	}
+	var loc int
+	var currentLen int32
+
+	forwardedLength := make([]int32, end+1)
+
+	for loc = start; loc <= end; loc++ {
+		op := c.out.compiledPat[loc]
+		opType := op.typ()
+
+		// The loop is advancing linearly through the pattern.
+		// If the op we are now at was the destination of a branch in the pattern,
+		// and that path has a longer maximum length than the current accumulated value,
+		// replace the current accumulated value.
+		if forwardedLength[loc] > currentLen {
+			currentLen = forwardedLength[loc]
+		}
+
+		switch opType {
+		// Ops that don't change the total length matched
+		case urxReservedOp,
+			urxEnd,
+			urxStringLen,
+			urxNop,
+			urxStartCapture,
+			urxEndCapture,
+			urxBackslashB,
+			urxBackslashBu,
+			urxBackslashG,
+			urxBackslashZ,
+			urxCaret,
+			urxDollar,
+			urxDollarM,
+			urxDollarD,
+			urxDollarMd,
+			urxRelocOprnd,
+			urxStoInpLoc,
+			urxCaretM,
+			urxCaretMUnix,
+			urxStoSp, // Setup for atomic or possessive blocks.  Doesn't change what can match.
+			urxLdSp,
+			urxLbEnd,
+			urxLbCont,
+			urxLbnCount,
+			urxLbnEnd:
+		// no-op
+
+		// Ops that increase that cause an unbounded increase in the length
+		//   of a matched string, or that increase it a hard to characterize way.
+		//   Call the max length unbounded, and stop further checking.
+		case urxBackref, // BackRef.  Must assume that it might be a zero length match
+			urxBackrefI,
+			urxBackslashX: // Grahpeme Cluster.  Minimum is 1, max unbounded.
+			currentLen = math.MaxInt32
+
+			// Ops that match a max of one character (possibly two 16 bit code units.)
+			//
+		case urxStaticSetref,
+			urxStatSetrefN,
+			urxSetref,
+			urxBackslashD,
+			urxBackslashH,
+			urxBackslashR,
+			urxBackslashV,
+			urcOnecharI,
+			urxDotanyAll,
+			urxDotany,
+			urxDotanyUnix:
+			currentLen = safeIncrement(currentLen, 2)
+
+			// Single literal character.  Increase current max length by one or two,
+			//       depending on whether the char is in the supplementary range.
+		case urxOnechar:
+			currentLen = safeIncrement(currentLen, 1)
+			if op.value() > 0x10000 {
+				currentLen = safeIncrement(currentLen, 1)
+			}
+
+			// Jumps.
+			//
+		case urxJmp, urxJmpx, urxJmpSav, urxJmpSavX:
+			jmpDest := op.value()
+			if jmpDest < loc {
+				// Loop of some kind.  Max match length is unbounded.
+				currentLen = math.MaxInt32
+			} else {
+				// Forward jump.  Propagate the current min length to the target loc of the jump.
+				if forwardedLength[jmpDest] < currentLen {
+					forwardedLength[jmpDest] = currentLen
+				}
+				currentLen = 0
+			}
+
+		case urxBacktrack:
+			// back-tracks are kind of like a branch, except that the max length was
+			//   propagated already, by the state save.
+			currentLen = forwardedLength[loc+1]
+
+		case urxStateSave:
+			// State Save, for forward jumps, propagate the current minimum.
+			//               of the state save.
+			//             For backwards jumps, they create a loop, maximum
+			//               match length is unbounded.
+			jmpDest := op.value()
+			if jmpDest > loc {
+				if currentLen > forwardedLength[jmpDest] {
+					forwardedLength[jmpDest] = currentLen
+				}
+			} else {
+				currentLen = math.MaxInt32
+			}
+
+		case urxString:
+			loc++
+			stringLenOp := c.out.compiledPat[loc]
+			currentLen = safeIncrement(currentLen, stringLenOp.value())
+
+		case urxStringI:
+			// TODO:  This code assumes that any user string that matches will be no longer
+			//        than our compiled string, with case insensitive matching.
+			//        Our compiled string has been case-folded already.
+			//
+			//        Any matching user string will have no more code points than our
+			//        compiled (folded) string.  Folding may add code points, but
+			//        not remove them.
+			//
+			//        There is a potential problem if a supplemental code point
+			//        case-folds to a BMP code point.  In this case our compiled string
+			//        could be shorter (in code units) than a matching user string.
+			//
+			//        At this time (Unicode 6.1) there are no such characters, and this case
+			//        is not being handled.  A test, intltest regex/Bug9283, will fail if
+			//        any problematic characters are added to Unicode.
+			//
+			//        If this happens, we can make a set of the BMP chars that the
+			//        troublesome supplementals fold to, scan our string, and bump the
+			//        currentLen one extra for each that is found.
+			//
+			loc++
+			stringLenOp := c.out.compiledPat[loc]
+			currentLen = safeIncrement(currentLen, stringLenOp.value())
+
+		case urxCtrInit, urxCtrInitNg:
+			// For Loops, recursively call this function on the pattern for the loop body,
+			//   then multiply the result by the maximum loop count.
+			loopEndLoc := c.out.compiledPat[loc+1].value()
+			if loopEndLoc == loc+4 {
+				// Loop has an empty body. No affect on max match length.
+				// Continue processing with code after the loop end.
+				loc = loopEndLoc
+				break
+			}
+
+			maxLoopCount := int(c.out.compiledPat[loc+3])
+			if maxLoopCount == -1 {
+				// Unbounded Loop. No upper bound on match length.
+				currentLen = math.MaxInt32
+				break
+			}
+
+			blockLen := c.maxMatchLength(loc+4, loopEndLoc-1) // Recursive call.
+			updatedLen := int(currentLen) + int(blockLen)*maxLoopCount
+			if updatedLen >= math.MaxInt32 {
+				currentLen = math.MaxInt32
+				break
+			}
+			currentLen = int32(updatedLen)
+			loc = loopEndLoc
+
+		case utxCtrLoop, urxCtrLoopNg:
+			panic("should not encounter this opcode")
+
+		case urxLoopSrI, urxLoopDotI, urxLoopC:
+			// For anything to do with loops, make the match length unbounded.
+			currentLen = math.MaxInt32
+
+		case urxLaStart, urxLaEnd:
+			// Look-ahead.  Just ignore, treat the look-ahead block as if
+			// it were normal pattern.  Gives a too-long match length,
+			//  but good enough for now.
+
+		case urxLbStart:
+			// Look-behind.  Scan forward until the matching look-around end,
+			//   without processing the look-behind block.
+			dataLoc := op.value()
+			for loc = loc + 1; loc <= end; loc++ {
+				op = c.out.compiledPat[loc]
+				if (op.typ() == urxLaEnd || op.typ() == urxLbnEnd) && (op.value() == dataLoc) {
+					break
+				}
+			}
+
+		default:
+			panic("unreachable")
+		}
+
+		if currentLen == math.MaxInt32 {
+			//  The maximum length is unbounded.
+			//  Stop further processing of the pattern.
+			break
+		}
+	}
+
+	return currentLen
+}
+
+// Machine Generated below.
+// It may need updating with new versions of Unicode.
+// Intltest test RegexTest::TestCaseInsensitiveStarters will fail if an update is needed.
+// The update tool is here:
+// svn+ssh://source.icu-project.org/repos/icu/tools/trunk/unicode/c/genregexcasing
+
+// Machine Generated Data. Do not hand edit.
+var reCaseFixCodePoints = [...]rune{
+	0x61, 0x66, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x77, 0x79, 0x2bc,
+	0x3ac, 0x3ae, 0x3b1, 0x3b7, 0x3b9, 0x3c1, 0x3c5, 0x3c9, 0x3ce, 0x565,
+	0x574, 0x57e, 0x1f00, 0x1f01, 0x1f02, 0x1f03, 0x1f04, 0x1f05, 0x1f06, 0x1f07,
+	0x1f20, 0x1f21, 0x1f22, 0x1f23, 0x1f24, 0x1f25, 0x1f26, 0x1f27, 0x1f60, 0x1f61,
+	0x1f62, 0x1f63, 0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1f70, 0x1f74, 0x1f7c, 0x110000}
+
+var reCaseFixStringOffsets = [...]int16{
+	0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13,
+	0x17, 0x1b, 0x20, 0x21, 0x2a, 0x2e, 0x2f, 0x30, 0x34, 0x35, 0x37, 0x39, 0x3b,
+	0x3d, 0x3f, 0x41, 0x43, 0x45, 0x47, 0x49, 0x4b, 0x4d, 0x4f, 0x51, 0x53, 0x55,
+	0x57, 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x63, 0x65, 0x66, 0x67, 0}
+
+var reCaseFixCounts = [...]int16{
+	0x1, 0x5, 0x1, 0x1, 0x1, 0x4, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x4, 0x4, 0x5, 0x1, 0x9,
+	0x4, 0x1, 0x1, 0x4, 0x1, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2,
+	0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x1, 0x1, 0x1, 0}
+
+var reCaseFixData = [...]uint16{
+	0x1e9a, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04, 0x1e96, 0x130, 0x1f0, 0xdf, 0x1e9e, 0xfb05,
+	0xfb06, 0x1e97, 0x1e98, 0x1e99, 0x149, 0x1fb4, 0x1fc4, 0x1fb3, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc3,
+	0x1fc6, 0x1fc7, 0x1fcc, 0x390, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7, 0x1fe4, 0x3b0, 0x1f50, 0x1f52,
+	0x1f54, 0x1f56, 0x1fe2, 0x1fe3, 0x1fe6, 0x1fe7, 0x1ff3, 0x1ff6, 0x1ff7, 0x1ffc, 0x1ff4, 0x587,
+	0xfb13, 0xfb14, 0xfb15, 0xfb17, 0xfb16, 0x1f80, 0x1f88, 0x1f81, 0x1f89, 0x1f82, 0x1f8a, 0x1f83,
+	0x1f8b, 0x1f84, 0x1f8c, 0x1f85, 0x1f8d, 0x1f86, 0x1f8e, 0x1f87, 0x1f8f, 0x1f90, 0x1f98, 0x1f91,
+	0x1f99, 0x1f92, 0x1f9a, 0x1f93, 0x1f9b, 0x1f94, 0x1f9c, 0x1f95, 0x1f9d, 0x1f96, 0x1f9e, 0x1f97,
+	0x1f9f, 0x1fa0, 0x1fa8, 0x1fa1, 0x1fa9, 0x1fa2, 0x1faa, 0x1fa3, 0x1fab, 0x1fa4, 0x1fac, 0x1fa5,
+	0x1fad, 0x1fa6, 0x1fae, 0x1fa7, 0x1faf, 0x1fb2, 0x1fc2, 0x1ff2, 0}
+
+func (c *compiler) findCaseInsensitiveStarters(ch rune, starterChars *uset.UnicodeSet) {
+	if uprops.HasBinaryProperty(ch, uprops.UCharCaseSensitive) {
+		caseFoldedC := ucase.Fold(ch)
+		starterChars.Clear()
+		starterChars.AddRune(caseFoldedC)
+
+		var i int
+		for i = 0; reCaseFixCodePoints[i] < ch; i++ {
+			// Simple linear search through the sorted list of interesting code points.
+		}
+
+		if reCaseFixCodePoints[i] == ch {
+			data := reCaseFixData[reCaseFixStringOffsets[i]:]
+			numCharsToAdd := reCaseFixCounts[i]
+			for j := int16(0); j < numCharsToAdd; j++ {
+				var cpToAdd rune
+				cpToAdd, data = utf16.NextUnsafe(data)
+				starterChars.AddRune(cpToAdd)
+			}
+		}
+
+		starterChars.CloseOver(uset.CaseInsensitive)
+	} else {
+		// Not a cased character. Just return it alone.
+		starterChars.Clear()
+		starterChars.AddRune(ch)
+	}
+}
+
+func (c *compiler) scanProp() *uset.UnicodeSet {
+	if c.err != nil {
+		return nil
+	}
+	negated := c.c.char == chP
+
+	c.nextChar(&c.c)
+	if c.c.char != chLBrace {
+		c.error(PropertySyntax)
+		return nil
+	}
+
+	var propertyName strings.Builder
+	for {
+		c.nextChar(&c.c)
+		if c.c.char == chRBrace {
+			break
+		}
+		if c.c.char == -1 {
+			c.error(PropertySyntax)
+			return nil
+		}
+		propertyName.WriteRune(c.c.char)
+	}
+
+	ss := c.createSetForProperty(propertyName.String(), negated)
+	c.nextChar(&c.c)
+	return ss
+}
+
+func (c *compiler) createSetForProperty(propName string, negated bool) *uset.UnicodeSet {
+	if c.err != nil {
+		return nil
+	}
+
+	var set *uset.UnicodeSet
+
+	var usetFlags uset.USet
+	if c.modeFlags&CaseInsensitive != 0 {
+		usetFlags |= uset.CaseInsensitive
+	}
+
+	var err error
+	set, err = uprops.NewUnicodeSetFomPattern("\\p{"+propName+"}", usetFlags)
+	if err == nil {
+		goto done
+	}
+
+	//
+	//  The incoming property wasn't directly recognized by ICU.
+
+	//  Check [:word:] and [:all:]. These are not recognized as a properties by ICU UnicodeSet.
+	//     Java accepts 'word' with mixed case.
+	//     Java accepts 'all' only in all lower case.
+	if strings.EqualFold(propName, "word") {
+		set = staticPropertySets[urxIswordSet].Clone()
+		goto done
+	}
+	if propName == "all" {
+		set = uset.New()
+		set.AddRuneRange(0, 0x10ffff)
+		goto done
+	}
+
+	//    Do Java InBlock expressions
+	//
+	if strings.HasPrefix(propName, "In") && len(propName) >= 3 {
+		set = uset.New()
+		if uprops.ApplyPropertyAlias(set, "Block", propName[2:]) != nil {
+			c.error(PropertySyntax)
+		}
+		goto done
+	}
+
+	//  Check for the Java form "IsBooleanPropertyValue", which we will recast
+	//  as "BooleanPropertyValue". The property value can be either a
+	//  a General Category or a Script Name.
+	if strings.HasPrefix(propName, "Is") && len(propName) >= 3 {
+		mPropName := propName[2:]
+		if strings.IndexByte(mPropName, '=') >= 0 {
+			c.error(PropertySyntax)
+			goto done
+		}
+
+		if strings.EqualFold(mPropName, "assigned") {
+			mPropName = "unassigned"
+			negated = !negated
+		} else if strings.EqualFold(mPropName, "TitleCase") {
+			mPropName = "Titlecase_Letter"
+		}
+
+		set, err = uprops.NewUnicodeSetFomPattern("\\p{"+mPropName+"}", 0)
+		if err != nil {
+			c.error(PropertySyntax)
+		} else if !set.IsEmpty() && (usetFlags&uset.CaseInsensitive) != 0 {
+			set.CloseOver(uset.CaseInsensitive)
+		}
+		goto done
+	}
+
+	if strings.HasPrefix(propName, "java") {
+		set = uset.New()
+
+		//
+		//  Try the various Java specific properties.
+		//   These all begin with "java"
+		//
+		if propName == "javaDefined" {
+			c.err = uprops.AddCategory(set, uchar.GcCnMask)
+			set.Complement()
+		} else if propName == "javaDigit" {
+			c.err = uprops.AddCategory(set, uchar.GcNdMask)
+		} else if propName == "javaIdentifierIgnorable" {
+			c.err = addIdentifierIgnorable(set)
+		} else if propName == "javaISOControl" {
+			set.AddRuneRange(0, 0x1F)
+			set.AddRuneRange(0x7F, 0x9F)
+		} else if propName == "javaJavaIdentifierPart" {
+			c.err = uprops.AddCategory(set, uchar.GcLMask)
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcScMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcPcMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcNdMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcNlMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcMcMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcMnMask)
+			}
+			if c.err == nil {
+				c.err = addIdentifierIgnorable(set)
+			}
+		} else if propName == "javaJavaIdentifierStart" {
+			c.err = uprops.AddCategory(set, uchar.GcLMask)
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcNlMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcScMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcPcMask)
+			}
+		} else if propName == "javaLetter" {
+			c.err = uprops.AddCategory(set, uchar.GcLMask)
+		} else if propName == "javaLetterOrDigit" {
+			c.err = uprops.AddCategory(set, uchar.GcLMask)
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcNdMask)
+			}
+		} else if propName == "javaLowerCase" {
+			c.err = uprops.AddCategory(set, uchar.GcLlMask)
+		} else if propName == "javaMirrored" {
+			c.err = uprops.ApplyIntPropertyValue(set, uprops.UCharBidiMirrored, 1)
+		} else if propName == "javaSpaceChar" {
+			c.err = uprops.AddCategory(set, uchar.GcZMask)
+		} else if propName == "javaSupplementaryCodePoint" {
+			set.AddRuneRange(0x10000, uset.MaxValue)
+		} else if propName == "javaTitleCase" {
+			c.err = uprops.AddCategory(set, uchar.GcLtMask)
+		} else if propName == "javaUnicodeIdentifierStart" {
+			c.err = uprops.AddCategory(set, uchar.GcLMask)
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcNlMask)
+			}
+		} else if propName == "javaUnicodeIdentifierPart" {
+			c.err = uprops.AddCategory(set, uchar.GcLMask)
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcPcMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcNdMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcNlMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcMcMask)
+			}
+			if c.err == nil {
+				c.err = uprops.AddCategory(set, uchar.GcMnMask)
+			}
+			if c.err == nil {
+				c.err = addIdentifierIgnorable(set)
+			}
+		} else if propName == "javaUpperCase" {
+			c.err = uprops.AddCategory(set, uchar.GcLuMask)
+		} else if propName == "javaValidCodePoint" {
+			set.AddRuneRange(0, uset.MaxValue)
+		} else if propName == "javaWhitespace" {
+			c.err = uprops.AddCategory(set, uchar.GcZMask)
+			excl := uset.New()
+			excl.AddRune(0x0a)
+			excl.AddRune(0x2007)
+			excl.AddRune(0x202f)
+			set.RemoveAll(excl)
+			set.AddRuneRange(9, 0x0d)
+			set.AddRuneRange(0x1c, 0x1f)
+		} else {
+			c.error(PropertySyntax)
+		}
+
+		if c.err == nil && !set.IsEmpty() && (usetFlags&uset.CaseInsensitive) != 0 {
+			set.CloseOver(uset.CaseInsensitive)
+		}
+		goto done
+	}
+
+	// Unrecognized property. ICU didn't like it as it was, and none of the Java compatibility
+	// extensions matched it.
+	c.error(PropertySyntax)
+
+done:
+	if c.err != nil {
+		return nil
+	}
+	if negated {
+		set.Complement()
+	}
+	return set
+}
+
+func addIdentifierIgnorable(set *uset.UnicodeSet) error {
+	set.AddRuneRange(0, 8)
+	set.AddRuneRange(0x0e, 0x1b)
+	set.AddRuneRange(0x7f, 0x9f)
+
+	return uprops.AddCategory(set, uchar.GcCfMask)
+}
+
+func (c *compiler) scanPosixProp() *uset.UnicodeSet {
+	var set *uset.UnicodeSet
+
+	if !(c.c.char == chColon) {
+		panic("assertion failed: c.lastChar == ':'")
+	}
+
+	savedScanIndex := c.scanIndex
+	savedScanPattern := c.p
+	savedQuoteMode := c.quoteMode
+	savedInBackslashQuote := c.inBackslashQuote
+	savedEOLComments := c.eolComments
+	savedLineNum := c.lineNum
+	savedCharNum := c.charNum
+	savedLastChar := c.lastChar
+	savedPeekChar := c.peekChar
+	savedC := c.c
+
+	// Scan for a closing ].   A little tricky because there are some perverse
+	//   edge cases possible.  "[:abc\Qdef:] \E]"  is a valid non-property expression,
+	//   ending on the second closing ].
+	var propName []rune
+	negated := false
+
+	// Check for and consume the '^' in a negated POSIX property, e.g.  [:^Letter:]
+	c.nextChar(&c.c)
+	if c.c.char == chUp {
+		negated = true
+		c.nextChar(&c.c)
+	}
+
+	// Scan for the closing ":]", collecting the property name along the way.
+	sawPropSetTerminator := false
+	for {
+		propName = append(propName, c.c.char)
+		c.nextChar(&c.c)
+		if c.c.quoted || c.c.char == -1 {
+			// Escaped characters or end of input - either says this isn't a [:Property:]
+			break
+		}
+		if c.c.char == chColon {
+			c.nextChar(&c.c)
+			if c.c.char == chRBracket {
+				sawPropSetTerminator = true
+				break
+			}
+		}
+	}
+
+	if sawPropSetTerminator {
+		set = c.createSetForProperty(string(propName), negated)
+	} else {
+		// No closing ']' - not a [:Property:]
+		//  Restore the original scan position.
+		//  The main scanner will retry the input as a normal set expression,
+		//    not a [:Property:] expression.
+		c.scanIndex = savedScanIndex
+		c.p = savedScanPattern
+		c.quoteMode = savedQuoteMode
+		c.inBackslashQuote = savedInBackslashQuote
+		c.eolComments = savedEOLComments
+		c.lineNum = savedLineNum
+		c.charNum = savedCharNum
+		c.lastChar = savedLastChar
+		c.peekChar = savedPeekChar
+		c.c = savedC
+	}
+
+	return set
+}
+
+func (c *compiler) compileSet(set *uset.UnicodeSet) {
+	if set == nil {
+		return
+	}
+	//  Remove any strings from the set.
+	//  There shoudn't be any, but just in case.
+	//     (Case Closure can add them; if we had a simple case closure available that
+	//      ignored strings, that would be better.)
+	setSize := set.Len()
+
+	switch setSize {
+	case 0:
+		// Set of no elements.   Always fails to match.
+		c.appendOp(urxBacktrack, 0)
+
+	case 1:
+		// The set contains only a single code point.  Put it into
+		//   the compiled pattern as a single char operation rather
+		//   than a set, and discard the set itself.
+		c.literalChar(set.RuneAt(0))
+
+	default:
+		//  The set contains two or more chars.  (the normal case)
+		//  Put it into the compiled pattern as a set.
+		// theSet->freeze();
+		setNumber := len(c.out.sets)
+		c.out.sets = append(c.out.sets, set)
+		c.appendOp(urxSetref, setNumber)
+	}
+}
diff --git a/go/mysql/icuregex/compiler_table.go b/go/mysql/icuregex/compiler_table.go
new file mode 100644
index 00000000000..e8cfe0d5e55
--- /dev/null
+++ b/go/mysql/icuregex/compiler_table.go
@@ -0,0 +1,357 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex
+
+type patternParseAction uint8
+
+const (
+	doSetBackslashD patternParseAction = iota
+	doBackslashh
+	doBackslashH
+	doSetLiteralEscaped
+	doOpenLookAheadNeg
+	doCompleteNamedBackRef
+	doPatStart
+	doBackslashS
+	doBackslashD
+	doNGStar
+	doNOP
+	doBackslashX
+	doSetLiteral
+	doContinueNamedCapture
+	doBackslashG
+	doBackslashR
+	doSetBegin
+	doSetBackslashv
+	doPossessivePlus
+	doPerlInline
+	doBackslashZ
+	doSetAddAmp
+	doSetBeginDifference1
+	doIntervalError
+	doSetNegate
+	doIntervalInit
+	doSetIntersection2
+	doPossessiveInterval
+	doRuleError
+	doBackslashW
+	doContinueNamedBackRef
+	doOpenNonCaptureParen
+	doExit
+	doSetNamedChar
+	doSetBackslashV
+	doConditionalExpr
+	doEscapeError
+	doBadOpenParenType
+	doPossessiveStar
+	doSetAddDash
+	doEscapedLiteralChar
+	doSetBackslashw
+	doIntervalUpperDigit
+	doBackslashv
+	doSetBackslashS
+	doSetNoCloseError
+	doSetProp
+	doBackslashB
+	doSetEnd
+	doSetRange
+	doMatchModeParen
+	doPlus
+	doBackslashV
+	doSetMatchMode
+	doBackslashz
+	doSetNamedRange
+	doOpenLookBehindNeg
+	doInterval
+	doBadNamedCapture
+	doBeginMatchMode
+	doBackslashd
+	doPatFinish
+	doNamedChar
+	doNGPlus
+	doSetDifference2
+	doSetBackslashH
+	doCloseParen
+	doDotAny
+	doOpenCaptureParen
+	doEnterQuoteMode
+	doOpenAtomicParen
+	doBadModeFlag
+	doSetBackslashd
+	doSetFinish
+	doProperty
+	doBeginNamedBackRef
+	doBackRef
+	doOpt
+	doDollar
+	doBeginNamedCapture
+	doNGInterval
+	doSetOpError
+	doSetPosixProp
+	doSetBeginIntersection1
+	doBackslashb
+	doSetBeginUnion
+	doIntevalLowerDigit
+	doSetBackslashh
+	doStar
+	doMatchMode
+	doBackslashA
+	doOpenLookBehind
+	doPossessiveOpt
+	doOrOperator
+	doBackslashw
+	doBackslashs
+	doLiteralChar
+	doSuppressComments
+	doCaret
+	doIntervalSame
+	doNGOpt
+	doOpenLookAhead
+	doSetBackslashW
+	doMismatchedParenErr
+	doSetBackslashs
+	rbbiLastAction
+)
+
+// -------------------------------------------------------------------------------
+//
+//	RegexTableEl       represents the structure of a row in the transition table
+//	                   for the pattern parser state machine.
+//
+// -------------------------------------------------------------------------------
+type regexTableEl struct {
+	action    patternParseAction
+	charClass uint8
+	nextState uint8
+	pushState uint8
+	nextChar  bool
+}
+
+var parseStateTable = []regexTableEl{
+	{doNOP, 0, 0, 0, true},
+	{doPatStart, 255, 2, 0, false},                        //  1      start
+	{doLiteralChar, 254, 14, 0, true},                     //  2      term
+	{doLiteralChar, 130, 14, 0, true},                     //  3
+	{doSetBegin, 91 /* [ */, 123, 205, true},              //  4
+	{doNOP, 40 /* ( */, 27, 0, true},                      //  5
+	{doDotAny, 46 /* . */, 14, 0, true},                   //  6
+	{doCaret, 94 /* ^ */, 14, 0, true},                    //  7
+	{doDollar, 36 /* $ */, 14, 0, true},                   //  8
+	{doNOP, 92 /* \ */, 89, 0, true},                      //  9
+	{doOrOperator, 124 /* | */, 2, 0, true},               //  10
+	{doCloseParen, 41 /* ) */, 255, 0, true},              //  11
+	{doPatFinish, 253, 2, 0, false},                       //  12
+	{doRuleError, 255, 206, 0, false},                     //  13
+	{doNOP, 42 /* * */, 68, 0, true},                      //  14      expr-quant
+	{doNOP, 43 /* + */, 71, 0, true},                      //  15
+	{doNOP, 63 /* ? */, 74, 0, true},                      //  16
+	{doIntervalInit, 123 /* { */, 77, 0, true},            //  17
+	{doNOP, 40 /* ( */, 23, 0, true},                      //  18
+	{doNOP, 255, 20, 0, false},                            //  19
+	{doOrOperator, 124 /* | */, 2, 0, true},               //  20      expr-cont
+	{doCloseParen, 41 /* ) */, 255, 0, true},              //  21
+	{doNOP, 255, 2, 0, false},                             //  22
+	{doSuppressComments, 63 /* ? */, 25, 0, true},         //  23      open-paren-quant
+	{doNOP, 255, 27, 0, false},                            //  24
+	{doNOP, 35 /* # */, 50, 14, true},                     //  25      open-paren-quant2
+	{doNOP, 255, 29, 0, false},                            //  26
+	{doSuppressComments, 63 /* ? */, 29, 0, true},         //  27      open-paren
+	{doOpenCaptureParen, 255, 2, 14, false},               //  28
+	{doOpenNonCaptureParen, 58 /* : */, 2, 14, true},      //  29      open-paren-extended
+	{doOpenAtomicParen, 62 /* > */, 2, 14, true},          //  30
+	{doOpenLookAhead, 61 /* = */, 2, 20, true},            //  31
+	{doOpenLookAheadNeg, 33 /* ! */, 2, 20, true},         //  32
+	{doNOP, 60 /* < */, 46, 0, true},                      //  33
+	{doNOP, 35 /* # */, 50, 2, true},                      //  34
+	{doBeginMatchMode, 105 /* i */, 53, 0, false},         //  35
+	{doBeginMatchMode, 100 /* d */, 53, 0, false},         //  36
+	{doBeginMatchMode, 109 /* m */, 53, 0, false},         //  37
+	{doBeginMatchMode, 115 /* s */, 53, 0, false},         //  38
+	{doBeginMatchMode, 117 /* u */, 53, 0, false},         //  39
+	{doBeginMatchMode, 119 /* w */, 53, 0, false},         //  40
+	{doBeginMatchMode, 120 /* x */, 53, 0, false},         //  41
+	{doBeginMatchMode, 45 /* - */, 53, 0, false},          //  42
+	{doConditionalExpr, 40 /* ( */, 206, 0, true},         //  43
+	{doPerlInline, 123 /* { */, 206, 0, true},             //  44
+	{doBadOpenParenType, 255, 206, 0, false},              //  45
+	{doOpenLookBehind, 61 /* = */, 2, 20, true},           //  46      open-paren-lookbehind
+	{doOpenLookBehindNeg, 33 /* ! */, 2, 20, true},        //  47
+	{doBeginNamedCapture, 129, 64, 0, false},              //  48
+	{doBadOpenParenType, 255, 206, 0, false},              //  49
+	{doNOP, 41 /* ) */, 255, 0, true},                     //  50      paren-comment
+	{doMismatchedParenErr, 253, 206, 0, false},            //  51
+	{doNOP, 255, 50, 0, true},                             //  52
+	{doMatchMode, 105 /* i */, 53, 0, true},               //  53      paren-flag
+	{doMatchMode, 100 /* d */, 53, 0, true},               //  54
+	{doMatchMode, 109 /* m */, 53, 0, true},               //  55
+	{doMatchMode, 115 /* s */, 53, 0, true},               //  56
+	{doMatchMode, 117 /* u */, 53, 0, true},               //  57
+	{doMatchMode, 119 /* w */, 53, 0, true},               //  58
+	{doMatchMode, 120 /* x */, 53, 0, true},               //  59
+	{doMatchMode, 45 /* - */, 53, 0, true},                //  60
+	{doSetMatchMode, 41 /* ) */, 2, 0, true},              //  61
+	{doMatchModeParen, 58 /* : */, 2, 14, true},           //  62
+	{doBadModeFlag, 255, 206, 0, false},                   //  63
+	{doContinueNamedCapture, 129, 64, 0, true},            //  64      named-capture
+	{doContinueNamedCapture, 128, 64, 0, true},            //  65
+	{doOpenCaptureParen, 62 /* > */, 2, 14, true},         //  66
+	{doBadNamedCapture, 255, 206, 0, false},               //  67
+	{doNGStar, 63 /* ? */, 20, 0, true},                   //  68      quant-star
+	{doPossessiveStar, 43 /* + */, 20, 0, true},           //  69
+	{doStar, 255, 20, 0, false},                           //  70
+	{doNGPlus, 63 /* ? */, 20, 0, true},                   //  71      quant-plus
+	{doPossessivePlus, 43 /* + */, 20, 0, true},           //  72
+	{doPlus, 255, 20, 0, false},                           //  73
+	{doNGOpt, 63 /* ? */, 20, 0, true},                    //  74      quant-opt
+	{doPossessiveOpt, 43 /* + */, 20, 0, true},            //  75
+	{doOpt, 255, 20, 0, false},                            //  76
+	{doNOP, 128, 79, 0, false},                            //  77      interval-open
+	{doIntervalError, 255, 206, 0, false},                 //  78
+	{doIntevalLowerDigit, 128, 79, 0, true},               //  79      interval-lower
+	{doNOP, 44 /* , */, 83, 0, true},                      //  80
+	{doIntervalSame, 125 /* } */, 86, 0, true},            //  81
+	{doIntervalError, 255, 206, 0, false},                 //  82
+	{doIntervalUpperDigit, 128, 83, 0, true},              //  83      interval-upper
+	{doNOP, 125 /* } */, 86, 0, true},                     //  84
+	{doIntervalError, 255, 206, 0, false},                 //  85
+	{doNGInterval, 63 /* ? */, 20, 0, true},               //  86      interval-type
+	{doPossessiveInterval, 43 /* + */, 20, 0, true},       //  87
+	{doInterval, 255, 20, 0, false},                       //  88
+	{doBackslashA, 65 /* A */, 2, 0, true},                //  89      backslash
+	{doBackslashB, 66 /* B */, 2, 0, true},                //  90
+	{doBackslashb, 98 /* b */, 2, 0, true},                //  91
+	{doBackslashd, 100 /* d */, 14, 0, true},              //  92
+	{doBackslashD, 68 /* D */, 14, 0, true},               //  93
+	{doBackslashG, 71 /* G */, 2, 0, true},                //  94
+	{doBackslashh, 104 /* h */, 14, 0, true},              //  95
+	{doBackslashH, 72 /* H */, 14, 0, true},               //  96
+	{doNOP, 107 /* k */, 115, 0, true},                    //  97
+	{doNamedChar, 78 /* N */, 14, 0, false},               //  98
+	{doProperty, 112 /* p */, 14, 0, false},               //  99
+	{doProperty, 80 /* P */, 14, 0, false},                //  100
+	{doBackslashR, 82 /* R */, 14, 0, true},               //  101
+	{doEnterQuoteMode, 81 /* Q */, 2, 0, true},            //  102
+	{doBackslashS, 83 /* S */, 14, 0, true},               //  103
+	{doBackslashs, 115 /* s */, 14, 0, true},              //  104
+	{doBackslashv, 118 /* v */, 14, 0, true},              //  105
+	{doBackslashV, 86 /* V */, 14, 0, true},               //  106
+	{doBackslashW, 87 /* W */, 14, 0, true},               //  107
+	{doBackslashw, 119 /* w */, 14, 0, true},              //  108
+	{doBackslashX, 88 /* X */, 14, 0, true},               //  109
+	{doBackslashZ, 90 /* Z */, 2, 0, true},                //  110
+	{doBackslashz, 122 /* z */, 2, 0, true},               //  111
+	{doBackRef, 128, 14, 0, true},                         //  112
+	{doEscapeError, 253, 206, 0, false},                   //  113
+	{doEscapedLiteralChar, 255, 14, 0, true},              //  114
+	{doBeginNamedBackRef, 60 /* < */, 117, 0, true},       //  115      named-backref
+	{doBadNamedCapture, 255, 206, 0, false},               //  116
+	{doContinueNamedBackRef, 129, 119, 0, true},           //  117      named-backref-2
+	{doBadNamedCapture, 255, 206, 0, false},               //  118
+	{doContinueNamedBackRef, 129, 119, 0, true},           //  119      named-backref-3
+	{doContinueNamedBackRef, 128, 119, 0, true},           //  120
+	{doCompleteNamedBackRef, 62 /* > */, 14, 0, true},     //  121
+	{doBadNamedCapture, 255, 206, 0, false},               //  122
+	{doSetNegate, 94 /* ^ */, 126, 0, true},               //  123      set-open
+	{doSetPosixProp, 58 /* : */, 128, 0, false},           //  124
+	{doNOP, 255, 126, 0, false},                           //  125
+	{doSetLiteral, 93 /* ] */, 141, 0, true},              //  126      set-open2
+	{doNOP, 255, 131, 0, false},                           //  127
+	{doSetEnd, 93 /* ] */, 255, 0, true},                  //  128      set-posix
+	{doNOP, 58 /* : */, 131, 0, false},                    //  129
+	{doRuleError, 255, 206, 0, false},                     //  130
+	{doSetEnd, 93 /* ] */, 255, 0, true},                  //  131      set-start
+	{doSetBeginUnion, 91 /* [ */, 123, 148, true},         //  132
+	{doNOP, 92 /* \ */, 191, 0, true},                     //  133
+	{doNOP, 45 /* - */, 137, 0, true},                     //  134
+	{doNOP, 38 /* & */, 139, 0, true},                     //  135
+	{doSetLiteral, 255, 141, 0, true},                     //  136
+	{doRuleError, 45 /* - */, 206, 0, false},              //  137      set-start-dash
+	{doSetAddDash, 255, 141, 0, false},                    //  138
+	{doRuleError, 38 /* & */, 206, 0, false},              //  139      set-start-amp
+	{doSetAddAmp, 255, 141, 0, false},                     //  140
+	{doSetEnd, 93 /* ] */, 255, 0, true},                  //  141      set-after-lit
+	{doSetBeginUnion, 91 /* [ */, 123, 148, true},         //  142
+	{doNOP, 45 /* - */, 178, 0, true},                     //  143
+	{doNOP, 38 /* & */, 169, 0, true},                     //  144
+	{doNOP, 92 /* \ */, 191, 0, true},                     //  145
+	{doSetNoCloseError, 253, 206, 0, false},               //  146
+	{doSetLiteral, 255, 141, 0, true},                     //  147
+	{doSetEnd, 93 /* ] */, 255, 0, true},                  //  148      set-after-set
+	{doSetBeginUnion, 91 /* [ */, 123, 148, true},         //  149
+	{doNOP, 45 /* - */, 171, 0, true},                     //  150
+	{doNOP, 38 /* & */, 166, 0, true},                     //  151
+	{doNOP, 92 /* \ */, 191, 0, true},                     //  152
+	{doSetNoCloseError, 253, 206, 0, false},               //  153
+	{doSetLiteral, 255, 141, 0, true},                     //  154
+	{doSetEnd, 93 /* ] */, 255, 0, true},                  //  155      set-after-range
+	{doSetBeginUnion, 91 /* [ */, 123, 148, true},         //  156
+	{doNOP, 45 /* - */, 174, 0, true},                     //  157
+	{doNOP, 38 /* & */, 176, 0, true},                     //  158
+	{doNOP, 92 /* \ */, 191, 0, true},                     //  159
+	{doSetNoCloseError, 253, 206, 0, false},               //  160
+	{doSetLiteral, 255, 141, 0, true},                     //  161
+	{doSetBeginUnion, 91 /* [ */, 123, 148, true},         //  162      set-after-op
+	{doSetOpError, 93 /* ] */, 206, 0, false},             //  163
+	{doNOP, 92 /* \ */, 191, 0, true},                     //  164
+	{doSetLiteral, 255, 141, 0, true},                     //  165
+	{doSetBeginIntersection1, 91 /* [ */, 123, 148, true}, //  166      set-set-amp
+	{doSetIntersection2, 38 /* & */, 162, 0, true},        //  167
+	{doSetAddAmp, 255, 141, 0, false},                     //  168
+	{doSetIntersection2, 38 /* & */, 162, 0, true},        //  169      set-lit-amp
+	{doSetAddAmp, 255, 141, 0, false},                     //  170
+	{doSetBeginDifference1, 91 /* [ */, 123, 148, true},   //  171      set-set-dash
+	{doSetDifference2, 45 /* - */, 162, 0, true},          //  172
+	{doSetAddDash, 255, 141, 0, false},                    //  173
+	{doSetDifference2, 45 /* - */, 162, 0, true},          //  174      set-range-dash
+	{doSetAddDash, 255, 141, 0, false},                    //  175
+	{doSetIntersection2, 38 /* & */, 162, 0, true},        //  176      set-range-amp
+	{doSetAddAmp, 255, 141, 0, false},                     //  177
+	{doSetDifference2, 45 /* - */, 162, 0, true},          //  178      set-lit-dash
+	{doSetAddDash, 91 /* [ */, 141, 0, false},             //  179
+	{doSetAddDash, 93 /* ] */, 141, 0, false},             //  180
+	{doNOP, 92 /* \ */, 183, 0, true},                     //  181
+	{doSetRange, 255, 155, 0, true},                       //  182
+	{doSetOpError, 115 /* s */, 206, 0, false},            //  183      set-lit-dash-escape
+	{doSetOpError, 83 /* S */, 206, 0, false},             //  184
+	{doSetOpError, 119 /* w */, 206, 0, false},            //  185
+	{doSetOpError, 87 /* W */, 206, 0, false},             //  186
+	{doSetOpError, 100 /* d */, 206, 0, false},            //  187
+	{doSetOpError, 68 /* D */, 206, 0, false},             //  188
+	{doSetNamedRange, 78 /* N */, 155, 0, false},          //  189
+	{doSetRange, 255, 155, 0, true},                       //  190
+	{doSetProp, 112 /* p */, 148, 0, false},               //  191      set-escape
+	{doSetProp, 80 /* P */, 148, 0, false},                //  192
+	{doSetNamedChar, 78 /* N */, 141, 0, false},           //  193
+	{doSetBackslashs, 115 /* s */, 155, 0, true},          //  194
+	{doSetBackslashS, 83 /* S */, 155, 0, true},           //  195
+	{doSetBackslashw, 119 /* w */, 155, 0, true},          //  196
+	{doSetBackslashW, 87 /* W */, 155, 0, true},           //  197
+	{doSetBackslashd, 100 /* d */, 155, 0, true},          //  198
+	{doSetBackslashD, 68 /* D */, 155, 0, true},           //  199
+	{doSetBackslashh, 104 /* h */, 155, 0, true},          //  200
+	{doSetBackslashH, 72 /* H */, 155, 0, true},           //  201
+	{doSetBackslashv, 118 /* v */, 155, 0, true},          //  202
+	{doSetBackslashV, 86 /* V */, 155, 0, true},           //  203
+	{doSetLiteralEscaped, 255, 141, 0, true},              //  204
+	{doSetFinish, 255, 14, 0, false},                      //  205      set-finish
+	{doExit, 255, 206, 0, true},                           //  206      errorDeath
+}
diff --git a/go/mysql/icuregex/debug.go b/go/mysql/icuregex/debug.go
new file mode 100644
index 00000000000..92c43e704d7
--- /dev/null
+++ b/go/mysql/icuregex/debug.go
@@ -0,0 +1,151 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex
+
+import (
+	"fmt"
+	"io"
+)
+
+func (pat *Pattern) Dump(w io.Writer) {
+	fmt.Fprintf(w, "Original Pattern:  \"%s\"\n", pat.pattern)
+	fmt.Fprintf(w, "   Min Match Length:  %d\n", pat.minMatchLen)
+	fmt.Fprintf(w, "   Match Start Type:  %v\n", pat.startType)
+	if pat.startType == startString {
+		fmt.Fprintf(w, "   Initial match string: \"%s\"\n", string(pat.literalText[pat.initialStringIdx:pat.initialStringIdx+pat.initialStringLen]))
+	} else if pat.startType == startSet {
+		fmt.Fprintf(w, "    Match First Chars: %s\n", pat.initialChars.String())
+	} else if pat.startType == startChar {
+		fmt.Fprintf(w, "    First char of Match: ")
+		if pat.initialChar > 0x20 {
+			fmt.Fprintf(w, "'%c'\n", pat.initialChar)
+		} else {
+			fmt.Fprintf(w, "%#x\n", pat.initialChar)
+		}
+	}
+
+	fmt.Fprintf(w, "Named Capture Groups:\n")
+	if len(pat.namedCaptureMap) == 0 {
+		fmt.Fprintf(w, "   None\n")
+	} else {
+		for name, number := range pat.namedCaptureMap {
+			fmt.Fprintf(w, "   %d\t%s\n", number, name)
+		}
+	}
+
+	fmt.Fprintf(w, "\nIndex   Binary     Type             Operand\n-------------------------------------------\n")
+	for idx := range pat.compiledPat {
+		pat.dumpOp(w, idx)
+	}
+	fmt.Fprintf(w, "\n\n")
+}
+
+func (pat *Pattern) dumpOp(w io.Writer, index int) {
+	op := pat.compiledPat[index]
+	val := op.value()
+	opType := op.typ()
+	pinnedType := opType
+	if int(pinnedType) >= len(urxOpcodeNames) {
+		pinnedType = 0
+	}
+
+	fmt.Fprintf(w, "%4d   %08x    %-15s  ", index, op, urxOpcodeNames[pinnedType])
+
+	switch opType {
+	case urxNop,
+		urxDotany,
+		urxDotanyAll,
+		urxFail,
+		urxCaret,
+		urxDollar,
+		urxBackslashG,
+		urxBackslashX,
+		urxEnd,
+		urxDollarM,
+		urxCaretM:
+		// Types with no operand field of interest.
+
+	case urxReservedOp,
+		urxStartCapture,
+		urxEndCapture,
+		urxStateSave,
+		urxJmp,
+		urxJmpSav,
+		urxJmpSavX,
+		urxBackslashB,
+		urxBackslashBu,
+		urxBackslashD,
+		urxBackslashZ,
+		urxStringLen,
+		urxCtrInit,
+		urxCtrInitNg,
+		utxCtrLoop,
+		urxCtrLoopNg,
+		urxRelocOprnd,
+		urxStoSp,
+		urxLdSp,
+		urxBackref,
+		urxStoInpLoc,
+		urxJmpx,
+		urxLaStart,
+		urxLaEnd,
+		urxBackrefI,
+		urxLbStart,
+		urxLbCont,
+		urxLbEnd,
+		urxLbnCount,
+		urxLbnEnd,
+		urxLoopC,
+		urxLoopDotI,
+		urxBackslashH,
+		urxBackslashR,
+		urxBackslashV:
+		// types with an integer operand field.
+		fmt.Fprintf(w, "%d", val)
+
+	case urxOnechar, urcOnecharI:
+		if val < 0x20 {
+			fmt.Fprintf(w, "%#x", val)
+		} else {
+			fmt.Fprintf(w, "'%c'", rune(val))
+		}
+
+	case urxString, urxStringI:
+		lengthOp := pat.compiledPat[index+1]
+		length := lengthOp.value()
+		fmt.Fprintf(w, "%q", string(pat.literalText[val:val+length]))
+
+	case urxSetref, urxLoopSrI:
+		fmt.Fprintf(w, "%s", pat.sets[val].String())
+
+	case urxStaticSetref, urxStatSetrefN:
+		if (val & urxNegSet) != 0 {
+			fmt.Fprintf(w, "NOT ")
+			val &= ^urxNegSet
+		}
+		fmt.Fprintf(w, "%s", staticPropertySets[val].String())
+
+	default:
+		fmt.Fprintf(w, "??????")
+	}
+	fmt.Fprintf(w, "\n")
+}
diff --git a/go/mysql/icuregex/error.go b/go/mysql/icuregex/error.go
new file mode 100644
index 00000000000..219ddcf602b
--- /dev/null
+++ b/go/mysql/icuregex/error.go
@@ -0,0 +1,149 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex
+
+import (
+	"fmt"
+	"strings"
+)
+
+type CompileError struct {
+	Code    CompileErrorCode
+	Line    int
+	Offset  int
+	Context string
+}
+
+func (e *CompileError) Error() string {
+	var out strings.Builder
+	switch e.Code {
+	case InternalError:
+		out.WriteString("Internal error")
+	case RuleSyntax:
+		out.WriteString("Syntax error")
+	case BadEscapeSequence:
+		out.WriteString("Bad escape sequence")
+	case PropertySyntax:
+		out.WriteString("Property syntax error")
+	case Unimplemented:
+		out.WriteString("Unimplemented")
+	case MismatchedParen:
+		out.WriteString("Mismatched parentheses")
+	case NumberTooBig:
+		out.WriteString("Number too big")
+	case BadInterval:
+		out.WriteString("Bad interval")
+	case MaxLtMin:
+		out.WriteString("Max less than min")
+	case InvalidBackRef:
+		out.WriteString("Invalid back reference")
+	case InvalidFlag:
+		out.WriteString("Invalid flag")
+	case LookBehindLimit:
+		out.WriteString("Look behind limit")
+	case MissingCloseBracket:
+		out.WriteString("Missing closing ]")
+	case InvalidRange:
+		out.WriteString("Invalid range")
+	case PatternTooBig:
+		out.WriteString("Pattern too big")
+	case InvalidCaptureGroupName:
+		out.WriteString("Invalid capture group name")
+	}
+	_, _ = fmt.Fprintf(&out, " in regular expression on line %d, character %d: `%s`", e.Line, e.Offset, e.Context)
+
+	return out.String()
+}
+
+type MatchError struct {
+	Code     MatchErrorCode
+	Pattern  string
+	Position int
+	Input    []rune
+}
+
+const maxMatchInputLength = 20
+
+func (e *MatchError) Error() string {
+	var out strings.Builder
+	switch e.Code {
+	case StackOverflow:
+		out.WriteString("Stack overflow")
+	case TimeOut:
+		out.WriteString("Timeout")
+	}
+
+	input := e.Input
+	if len(input) > maxMatchInputLength {
+		var b []rune
+		start := e.Position - maxMatchInputLength/2
+		if start < 0 {
+			start = 0
+		} else {
+			b = append(b, '.', '.', '.')
+		}
+		end := start + maxMatchInputLength
+		trailing := true
+		if end > len(input) {
+			end = len(input)
+			trailing = false
+		}
+		b = append(b, input[start:end]...)
+		if trailing {
+			b = append(b, '.', '.', '.')
+		}
+		input = b
+	}
+	_, _ = fmt.Fprintf(&out, " for expression `%s` at position %d in: %q", e.Pattern, e.Position, string(input))
+
+	return out.String()
+}
+
+type Code int32
+
+type CompileErrorCode int32
+
+const (
+	InternalError           CompileErrorCode = iota + 1 /**< An internal error (bug) was detected.              */
+	RuleSyntax                                          /**< Syntax error in regexp pattern.                    */
+	BadEscapeSequence                                   /**< Unrecognized backslash escape sequence in pattern  */
+	PropertySyntax                                      /**< Incorrect Unicode property                         */
+	Unimplemented                                       /**< Use of regexp feature that is not yet implemented. */
+	MismatchedParen                                     /**< Incorrectly nested parentheses in regexp pattern.  */
+	NumberTooBig                                        /**< Decimal number is too large.                       */
+	BadInterval                                         /**< Error in {min,max} interval                        */
+	MaxLtMin                                            /**< In {min,max}, max is less than min.                */
+	InvalidBackRef                                      /**< Back-reference to a non-existent capture group.    */
+	InvalidFlag                                         /**< Invalid value for match mode flags.                */
+	LookBehindLimit                                     /**< Look-Behind pattern matches must have a bounded maximum length.    */
+	MissingCloseBracket                                 /**< Missing closing bracket on a bracket expression. */
+	InvalidRange                                        /**< In a character range [x-y], x is greater than y.   */
+	PatternTooBig                                       /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */
+	InvalidCaptureGroupName                             /**< Invalid capture group name. @stable ICU 55 */
+)
+
+type MatchErrorCode int32
+
+const (
+	StackOverflow MatchErrorCode = iota /**< Regular expression backtrack stack overflow.       */
+	TimeOut                             /**< Maximum allowed match time exceeded                */
+)
diff --git a/go/mysql/icuregex/errors/error.go b/go/mysql/icuregex/errors/error.go
new file mode 100644
index 00000000000..f03a5157acf
--- /dev/null
+++ b/go/mysql/icuregex/errors/error.go
@@ -0,0 +1,27 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package errors
+
+import "errors"
+
+var ErrIllegalArgument = errors.New("illegal argument")
+var ErrUnsupported = errors.New("unsupported")
diff --git a/go/mysql/icuregex/icu_test.go b/go/mysql/icuregex/icu_test.go
new file mode 100644
index 00000000000..42c98dde5db
--- /dev/null
+++ b/go/mysql/icuregex/icu_test.go
@@ -0,0 +1,422 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex_test
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"regexp"
+	"strconv"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"vitess.io/vitess/go/mysql/icuregex"
+	"vitess.io/vitess/go/mysql/icuregex/internal/pattern"
+)
+
+var ErrSkip = errors.New("ignored test")
+
+type Matcher int8
+
+const (
+	FuncFind Matcher = iota
+	FuncMatches
+	FuncLookingAt
+)
+
+type Expectation int8
+
+const (
+	Unknown Expectation = iota
+	Expected
+	NotExpected
+)
+
+type TestPattern struct {
+	Line   string
+	Lineno int
+
+	Pattern string
+	Flags   icuregex.RegexpFlag
+	Options struct {
+		MatchFunc  Matcher
+		FindCount  int
+		MatchOnly  bool
+		MustError  bool
+		Dump       bool
+		HitEnd     Expectation
+		RequireEnd Expectation
+	}
+	Input  string
+	Groups []TestGroup
+}
+
+type TestGroup struct {
+	Start, End int
+}
+
+var parsePattern = regexp.MustCompile(`<(/?)(r|[0-9]+)>`)
+
+func (tp *TestPattern) parseFlags(line string) (string, error) {
+	for len(line) > 0 {
+		switch line[0] {
+		case '"', '\'', '/':
+			return line, nil
+		case ' ', '\t':
+		case 'i':
+			tp.Flags |= icuregex.CaseInsensitive
+		case 'x':
+			tp.Flags |= icuregex.Comments
+		case 's':
+			tp.Flags |= icuregex.DotAll
+		case 'm':
+			tp.Flags |= icuregex.Multiline
+		case 'e':
+			tp.Flags |= icuregex.ErrorOnUnknownEscapes
+		case 'D':
+			tp.Flags |= icuregex.UnixLines
+		case 'Q':
+			tp.Flags |= icuregex.Literal
+		case '2', '3', '4', '5', '6', '7', '8', '9':
+			tp.Options.FindCount = int(line[0] - '0')
+		case 'G':
+			tp.Options.MatchOnly = true
+		case 'E':
+			tp.Options.MustError = true
+		case 'd':
+			tp.Options.Dump = true
+		case 'L':
+			tp.Options.MatchFunc = FuncLookingAt
+		case 'M':
+			tp.Options.MatchFunc = FuncMatches
+		case 'v':
+			tp.Options.MustError = !icuregex.BreakIteration
+		case 'a', 'b':
+			return "", ErrSkip
+		case 'z':
+			tp.Options.HitEnd = Expected
+		case 'Z':
+			tp.Options.HitEnd = NotExpected
+		case 'y':
+			tp.Options.RequireEnd = Expected
+		case 'Y':
+			tp.Options.RequireEnd = NotExpected
+		default:
+			return "", fmt.Errorf("unexpected modifier '%c'", line[0])
+		}
+		line = line[1:]
+	}
+	return "", io.ErrUnexpectedEOF
+}
+
+func (tp *TestPattern) parseMatch(orig string) error {
+	input, ok := pattern.Unescape(orig)
+	if !ok {
+		return fmt.Errorf("failed to unquote input: %s", orig)
+	}
+
+	var detagged []rune
+	var last int
+
+	m := parsePattern.FindAllStringSubmatchIndex(input, -1)
+	for _, g := range m {
+		detagged = append(detagged, []rune(input[last:g[0]])...)
+		last = g[1]
+
+		closing := input[g[2]:g[3]] == "/"
+		groupNum := input[g[4]:g[5]]
+		if groupNum == "r" {
+			return ErrSkip
+		}
+		num, err := strconv.Atoi(groupNum)
+		if err != nil {
+			return fmt.Errorf("bad group number %q: %w", groupNum, err)
+		}
+
+		if num >= len(tp.Groups) {
+			grp := make([]TestGroup, num+1)
+			for i := range grp {
+				grp[i].Start = -1
+				grp[i].End = -1
+			}
+			copy(grp, tp.Groups)
+			tp.Groups = grp
+		}
+
+		if closing {
+			tp.Groups[num].End = len(detagged)
+		} else {
+			tp.Groups[num].Start = len(detagged)
+		}
+	}
+
+	detagged = append(detagged, []rune(input[last:])...)
+	tp.Input = string(detagged)
+	return nil
+}
+
+func ParseTestFile(t testing.TB, filename string) []TestPattern {
+	f, err := os.Open(filename)
+	if err != nil {
+		t.Fatalf("failed to open test data: %v", err)
+	}
+
+	defer f.Close()
+	scanner := bufio.NewScanner(f)
+	var lineno int
+	var patterns []TestPattern
+
+	errFunc := func(err error) {
+		if err == ErrSkip {
+			return
+		}
+		t.Errorf("Parse error: %v\n%03d: %s", err, lineno, scanner.Text())
+	}
+
+	for scanner.Scan() {
+		lineno++
+		line := scanner.Text()
+		line = strings.TrimSpace(line)
+
+		if len(line) == 0 || line[0] == '#' {
+			continue
+		}
+
+		var tp TestPattern
+		tp.Line = line
+		tp.Lineno = lineno
+
+		idx := strings.IndexByte(line[1:], line[0])
+
+		tp.Pattern = line[1 : idx+1]
+		line, err = tp.parseFlags(line[idx+2:])
+		if err != nil {
+			errFunc(err)
+			continue
+		}
+
+		idx = strings.IndexByte(line[1:], line[0])
+		err = tp.parseMatch(line[1 : idx+1])
+		if err != nil {
+			errFunc(err)
+			continue
+		}
+
+		patterns = append(patterns, tp)
+	}
+
+	if err := scanner.Err(); err != nil {
+		t.Fatal(err)
+	}
+	return patterns
+}
+
+func (tp *TestPattern) fail(t testing.TB, msg string, args ...any) bool {
+	t.Helper()
+	msg = fmt.Sprintf(msg, args...)
+	t.Errorf("%s (in line %d)\nregexp: %s\ninput: %q\noriginal: %s", msg, tp.Lineno, tp.Pattern, tp.Input, tp.Line)
+	return false
+}
+
+func (tp *TestPattern) Test(t testing.TB) bool {
+	re, err := func() (re *icuregex.Pattern, err error) {
+		defer func() {
+			if r := recover(); r != nil {
+				err = fmt.Errorf("PANIC: %v", r)
+			}
+		}()
+		re, err = icuregex.CompileString(tp.Pattern, tp.Flags)
+		return
+	}()
+	if err != nil {
+		if tp.Options.MustError {
+			return true
+		}
+
+		return tp.fail(t, "unexpected parser failure: %v", err)
+	}
+	if tp.Options.MustError {
+		return tp.fail(t, "parse failure expected")
+	}
+
+	matcher := re.Match(tp.Input)
+	var isMatch bool
+	var findCount = tp.Options.FindCount
+	if findCount == 0 {
+		findCount = 1
+	}
+
+	for i := 0; i < findCount; i++ {
+		isMatch, err = func() (bool, error) {
+			defer func() {
+				if r := recover(); r != nil {
+					tp.fail(t, "unexpected match failure: %v", r)
+				}
+			}()
+			switch tp.Options.MatchFunc {
+			case FuncMatches:
+				return matcher.Matches()
+			case FuncLookingAt:
+				return matcher.LookingAt()
+			case FuncFind:
+				return matcher.Find()
+			default:
+				panic("invalid MatchFunc")
+			}
+		}()
+	}
+
+	require.NoError(t, err)
+
+	if !isMatch && len(tp.Groups) > 0 {
+		return tp.fail(t, "Match expected, but none found.")
+	}
+	if isMatch && len(tp.Groups) == 0 {
+		return tp.fail(t, "No match expected, but found one at position %d", matcher.Start())
+	}
+	if tp.Options.MatchOnly {
+		return true
+	}
+
+	for i := 0; i < matcher.GroupCount(); i++ {
+		expectedStart := -1
+		expectedEnd := -1
+
+		if i < len(tp.Groups) {
+			expectedStart = tp.Groups[i].Start
+			expectedEnd = tp.Groups[i].End
+		}
+		if gotStart := matcher.StartForGroup(i); gotStart != expectedStart {
+			return tp.fail(t, "Incorrect start position for group %d. Expected %d, got %d", i, expectedStart, gotStart)
+		}
+		if gotEnd := matcher.EndForGroup(i); gotEnd != expectedEnd {
+			return tp.fail(t, "Incorrect end position for group %d. Expected %d, got %d", i, expectedEnd, gotEnd)
+		}
+	}
+
+	if matcher.GroupCount()+1 < len(tp.Groups) {
+		return tp.fail(t, "Expected %d capture groups, found %d", len(tp.Groups)-1, matcher.GroupCount())
+	}
+
+	if tp.Options.HitEnd == Expected && !matcher.HitEnd() {
+		return tp.fail(t, "HitEnd() returned false. Expected true")
+	}
+	if tp.Options.HitEnd == NotExpected && matcher.HitEnd() {
+		return tp.fail(t, "HitEnd() returned true. Expected false")
+	}
+
+	if tp.Options.RequireEnd == Expected && !matcher.RequireEnd() {
+		return tp.fail(t, "RequireEnd() returned false. Expected true")
+	}
+	if tp.Options.RequireEnd == NotExpected && matcher.RequireEnd() {
+		return tp.fail(t, "RequireEnd() returned true. Expected false")
+	}
+
+	return true
+}
+
+func TestICU(t *testing.T) {
+	pats := ParseTestFile(t, "testdata/regextst.txt")
+
+	var valid int
+
+	for _, p := range pats {
+		if p.Test(t) {
+			valid++
+		}
+	}
+
+	t.Logf("%d/%d (%.02f)", valid, len(pats), float64(valid)/float64(len(pats)))
+}
+
+func TestICUExtended(t *testing.T) {
+	// This tests additional cases that aren't covered in the
+	// copied ICU test suite.
+	pats := ParseTestFile(t, "testdata/regextst_extended.txt")
+
+	var valid int
+
+	for _, p := range pats {
+		if p.Test(t) {
+			valid++
+		}
+	}
+
+	t.Logf("%d/%d (%.02f)", valid, len(pats), float64(valid)/float64(len(pats)))
+}
+
+func TestCornerCases(t *testing.T) {
+	var cases = []struct {
+		Pattern string
+		Input   string
+		Flags   icuregex.RegexpFlag
+		Match   bool
+	}{
+		{`xyz$`, "xyz\n", 0, true},
+		{`a*+`, "abbxx", 0, true},
+		{`(ABC){1,2}+ABC`, "ABCABCABC", 0, true},
+		{`(ABC){2,3}+ABC`, "ABCABCABC", 0, false},
+		{`(abc)*+a`, "abcabcabc", 0, false},
+		{`(abc)*+a`, "abcabcab", 0, true},
+		{`a\N{LATIN SMALL LETTER B}c`, "abc", 0, true},
+		{`a.b`, "a\rb", icuregex.UnixLines, true},
+		{`a.b`, "a\rb", 0, false},
+		{`(?d)abc$`, "abc\r", 0, false},
+		{`[ \b]`, "b", 0, true},
+		{`[abcd-\N{LATIN SMALL LETTER G}]+`, "xyz-abcdefghij-", 0, true},
+		{`[[abcd]&&[ac]]+`, "bacacd", 0, true},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.Pattern, func(t *testing.T) {
+			_, err := icuregex.CompileString(tc.Pattern, tc.Flags)
+			if err != nil {
+				t.Fatal(err)
+			}
+		})
+	}
+}
+
+func TestOne(t *testing.T) {
+	const Pattern = `\p{CaseIgnorable}`
+	const Input = "foo.bar"
+	const Flags = 0
+
+	re, err := icuregex.CompileString(Pattern, Flags)
+	if err != nil {
+		t.Fatalf("compilation failed: %v", err)
+	}
+
+	re.Dump(os.Stderr)
+
+	m := icuregex.NewMatcher(re)
+	m.Dumper(os.Stderr)
+	m.ResetString(Input)
+	found, err := m.Find()
+	require.NoError(t, err)
+	t.Logf("match = %v", found)
+}
diff --git a/go/mysql/icuregex/internal/bytestrie/bytes_trie.go b/go/mysql/icuregex/internal/bytestrie/bytes_trie.go
new file mode 100644
index 00000000000..aff80dc3e69
--- /dev/null
+++ b/go/mysql/icuregex/internal/bytestrie/bytes_trie.go
@@ -0,0 +1,354 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package bytestrie
+
+type BytesTrie struct {
+	pos                  []byte
+	original             []byte
+	remainingMatchLength int32
+}
+
+func New(pos []byte) BytesTrie {
+	return BytesTrie{pos: pos, original: pos, remainingMatchLength: -1}
+}
+
+type result int32
+
+const ( /**
+	 * The input unit(s) did not continue a matching string.
+	 * Once current()/next() return NO_MATCH,
+	 * all further calls to current()/next() will also return NO_MATCH,
+	 * until the trie is reset to its original state or to a saved state.
+	 * @stable ICU 4.8
+	 */
+	noMatch result = iota
+	/**
+	 * The input unit(s) continued a matching string
+	 * but there is no value for the string so far.
+	 * (It is a prefix of a longer string.)
+	 * @stable ICU 4.8
+	 */
+	noValue
+	/**
+	 * The input unit(s) continued a matching string
+	 * and there is a value for the string so far.
+	 * This value will be returned by getValue().
+	 * No further input byte/unit can continue a matching string.
+	 * @stable ICU 4.8
+	 */
+	finalValue
+	/**
+	 * The input unit(s) continued a matching string
+	 * and there is a value for the string so far.
+	 * This value will be returned by getValue().
+	 * Another input byte/unit can continue a matching string.
+	 * @stable ICU 4.8
+	 */
+	intermediateValue
+)
+
+const (
+	maxBranchLinearSubNodeLength = 5
+
+	// 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
+	minLinearMatch       = 0x10
+	maxLinearMatchLength = 0x10
+
+	// 20..ff: Variable-length value node.
+	// If odd, the value is final. (Otherwise, intermediate value or jump delta.)
+	// Then shift-right by 1 bit.
+	// The remaining lead byte value indicates the number of following bytes (0..4)
+	// and contains the value's top bits.
+	minValueLead = minLinearMatch + maxLinearMatchLength // 0x20
+	// It is a final value if bit 0 is set.
+	valueIsFinal = 1
+
+	// Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
+	minOneByteValueLead = minValueLead / 2 // 0x10
+	maxOneByteValue     = 0x40             // At least 6 bits in the first byte.
+
+	minTwoByteValueLead   = minOneByteValueLead + maxOneByteValue + 1 // 0x51
+	maxTwoByteValue       = 0x1aff
+	minThreeByteValueLead = minTwoByteValueLead + (maxTwoByteValue >> 8) + 1 // 0x6c
+	fourByteValueLead     = 0x7e
+
+	// Compact delta integers.
+	maxOneByteDelta       = 0xbf
+	minTwoByteDeltaLead   = maxOneByteDelta + 1 // 0xc0
+	minThreeByteDeltaLead = 0xf0
+	fourByteDeltaLead     = 0xfe
+)
+
+func (bt *BytesTrie) ContainsName(name string) bool {
+	result := noValue
+	for _, c := range []byte(name) {
+		if 'A' <= c && c <= 'Z' {
+			c += 'a' - 'A'
+		}
+		if c == 0x2d || c == 0x5f || c == 0x20 || (0x09 <= c && c <= 0x0d) {
+			continue
+		}
+		if result&1 == 0 {
+			return false
+		}
+		result = bt.next(int32(c))
+	}
+	return result >= finalValue
+}
+
+func (bt *BytesTrie) next(inByte int32) result {
+	pos := bt.pos
+	if pos == nil {
+		return noMatch
+	}
+	if inByte < 0 {
+		inByte += 0x100
+	}
+	length := bt.remainingMatchLength // Actual remaining match length minus 1.
+	if length >= 0 {
+		match := inByte == int32(pos[0])
+		pos = pos[1:]
+		// Remaining part of a linear-match node.
+		if match {
+			length = length - 1
+			bt.remainingMatchLength = length
+			bt.pos = pos
+			if length < 0 {
+				node := int32(pos[0])
+				if node >= minValueLead {
+					return bt.valueResult(node)
+				}
+			}
+			return noValue
+		}
+		bt.stop()
+		return noMatch
+	}
+	return bt.nextImpl(pos, inByte)
+}
+
+func (bt *BytesTrie) nextImpl(pos []byte, inByte int32) result {
+	for {
+		node := int32(pos[0])
+		pos = pos[1:]
+		if node < minLinearMatch {
+			return bt.branchNext(pos, node, inByte)
+		} else if node < minValueLead {
+			// Match the first of length+1 bytes.
+			length := node - minLinearMatch // Actual match length minus 1.
+			match := inByte == int32(pos[0])
+			pos = pos[1:]
+			if match {
+				length = length - 1
+				bt.remainingMatchLength = length
+				bt.pos = pos
+				if length < 0 {
+					node = int32(pos[0])
+					if node >= minValueLead {
+						return bt.valueResult(node)
+					}
+				}
+				return noValue
+			}
+			// No match.
+			break
+		} else if (node & valueIsFinal) != 0 {
+			// No further matching bytes.
+			break
+		} else {
+			// Skip intermediate value.
+			pos = bt.skipValue2(pos, node)
+			// The next node must not also be a value node.
+		}
+	}
+	bt.stop()
+	return noMatch
+}
+
+func (bt *BytesTrie) stop() {
+	bt.pos = nil
+}
+
+func (bt *BytesTrie) valueResult(node int32) result {
+	return intermediateValue - result(node&valueIsFinal)
+}
+
+func (bt *BytesTrie) branchNext(pos []byte, length int32, inByte int32) result {
+	// Branch according to the current unit.
+	if length == 0 {
+		length = int32(pos[0])
+		pos = pos[1:]
+	}
+	length++
+	// The length of the branch is the number of units to select from.
+	// The data structure encodes a binary search.
+	for length > maxBranchLinearSubNodeLength {
+		p := int32(pos[0])
+		pos = pos[1:]
+		if inByte < p {
+			length >>= 1
+			pos = bt.jumpByDelta(pos)
+		} else {
+			length = length - (length >> 1)
+			pos = bt.skipDelta(pos)
+		}
+	}
+	// Drop down to linear search for the last few bytes.
+	// length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
+	// and divides length by 2.
+	for {
+		p := int32(pos[0])
+		pos = pos[1:]
+		if inByte == p {
+			var result result
+			node := int32(pos[0])
+			if (node & valueIsFinal) != 0 {
+				// Leave the final value for getValue() to read.
+				result = finalValue
+			} else {
+				// Use the non-final value as the jump delta.
+				pos = pos[1:]
+				// int32_t delta=readValue(pos, node>>1);
+				node >>= 1
+				var delta int32
+				if node < minTwoByteValueLead {
+					delta = node - minOneByteValueLead
+				} else if node < minThreeByteValueLead {
+					delta = ((node - minTwoByteValueLead) << 8) | int32(pos[0])
+					pos = pos[1:]
+				} else if node < fourByteValueLead {
+					delta = ((node - minThreeByteValueLead) << 16) | (int32(pos[0]) << 8) | int32(pos[1])
+					pos = pos[2:]
+				} else if node == fourByteValueLead {
+					delta = (int32(pos[0]) << 16) | (int32(pos[1]) << 8) | int32(pos[2])
+					pos = pos[3:]
+				} else {
+					delta = (int32(pos[0]) << 24) | (int32(pos[1]) << 16) | (int32(pos[2]) << 8) | int32(pos[3])
+					pos = pos[4:]
+				}
+				// end readValue()
+				pos = pos[delta:]
+				node = int32(pos[0])
+				if node >= minValueLead {
+					result = bt.valueResult(node)
+				} else {
+					result = noValue
+				}
+			}
+			bt.pos = pos
+			return result
+		}
+		length--
+		pos = bt.skipValue1(pos)
+		if length <= 1 {
+			break
+		}
+	}
+	p := int32(pos[0])
+	pos = pos[1:]
+	if inByte == p {
+		bt.pos = pos
+		node := int32(pos[0])
+		if node >= minValueLead {
+			return bt.valueResult(node)
+		}
+		return noValue
+	}
+	bt.stop()
+	return noMatch
+}
+
+func (bt *BytesTrie) skipValue1(pos []byte) []byte {
+	leadByte := int32(pos[0])
+	return bt.skipValue2(pos[1:], leadByte)
+}
+
+func (bt *BytesTrie) skipValue2(pos []byte, leadByte int32) []byte {
+	if leadByte >= (minTwoByteValueLead << 1) {
+		if leadByte < (minThreeByteValueLead << 1) {
+			pos = pos[1:]
+		} else if leadByte < (fourByteValueLead << 1) {
+			pos = pos[2:]
+		} else {
+			pos = pos[3+((leadByte>>1)&1):]
+		}
+	}
+	return pos
+}
+
+func (bt *BytesTrie) skipDelta(pos []byte) []byte {
+	delta := int32(pos[0])
+	pos = pos[1:]
+	if delta >= minTwoByteDeltaLead {
+		if delta < minThreeByteDeltaLead {
+			pos = pos[1:]
+		} else if delta < fourByteDeltaLead {
+			pos = pos[2:]
+		} else {
+			pos = pos[3+(delta&1):]
+		}
+	}
+	return pos
+}
+
+func (bt *BytesTrie) jumpByDelta(pos []byte) []byte {
+	delta := int32(pos[0])
+	pos = pos[1:]
+	if delta < minTwoByteDeltaLead {
+		// nothing to do
+	} else if delta < minThreeByteDeltaLead {
+		delta = ((delta - minTwoByteDeltaLead) << 8) | int32(pos[0])
+		pos = pos[1:]
+	} else if delta < fourByteDeltaLead {
+		delta = ((delta - minThreeByteDeltaLead) << 16) | (int32(pos[0]) << 8) | int32(pos[1])
+		pos = pos[2:]
+	} else if delta == fourByteDeltaLead {
+		delta = (int32(pos[0]) << 16) | (int32(pos[1]) << 8) | int32(pos[2])
+		pos = pos[3:]
+	} else {
+		delta = (int32(pos[0]) << 24) | (int32(pos[1]) << 16) | (int32(pos[2]) << 8) | int32(pos[3])
+		pos = pos[4:]
+	}
+	return pos[delta:]
+}
+
+func (bt *BytesTrie) GetValue() int32 {
+	pos := bt.pos
+	leadByte := int32(pos[0])
+	return bt.readValue(pos[1:], leadByte>>1)
+}
+
+func (bt *BytesTrie) readValue(pos []byte, leadByte int32) int32 {
+	var value int32
+	if leadByte < minTwoByteValueLead {
+		value = leadByte - minOneByteValueLead
+	} else if leadByte < minThreeByteValueLead {
+		value = ((leadByte - minTwoByteValueLead) << 8) | int32(pos[0])
+	} else if leadByte < fourByteValueLead {
+		value = ((leadByte - minThreeByteValueLead) << 16) | (int32(pos[0]) << 8) | int32(pos[1])
+	} else if leadByte == fourByteValueLead {
+		value = (int32(pos[0]) << 16) | (int32(pos[1]) << 8) | int32(pos[2])
+	} else {
+		value = (int32(pos[0]) << 24) | (int32(pos[1]) << 16) | (int32(pos[2]) << 8) | int32(pos[3])
+	}
+	return value
+}
diff --git a/go/mysql/icuregex/internal/icudata/README.md b/go/mysql/icuregex/internal/icudata/README.md
new file mode 100644
index 00000000000..070633b555e
--- /dev/null
+++ b/go/mysql/icuregex/internal/icudata/README.md
@@ -0,0 +1,46 @@
+# ICU data files
+
+These are files copied from the ICU project that contain various types
+of data, like character properties.
+
+## How to update
+
+Not all data files are immediately available in the source code, but
+need to be built first. This applies to the character / word break
+tables.
+
+### Copy from source data
+
+The `icu4c/source/data/in` directory in the source distribution contains
+the following ICU data files we use:
+
+```
+pnames.icu
+ubidi.icu
+ucase.icu
+unames.icu
+ulayout.icu
+uprops.icu
+nfc.nrm
+nfkc.nrm
+nfkc_cf.nrm
+```
+
+The character and word break table need to be compiled before they can
+be copied.
+
+In `icu4c/source` run:
+
+```bash
+./configure --with-data-packaging=files
+make
+```
+
+This will compile the character and word break data into a binary file
+that we can use. Once built, the following files we use are available in
+`icu4c/source/data/out/build/icudt<XX>l/brkitr`:
+
+```
+char.brk
+word.brk
+```
diff --git a/go/mysql/icuregex/internal/icudata/char.brk b/go/mysql/icuregex/internal/icudata/char.brk
new file mode 100644
index 00000000000..a243ae6580a
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/char.brk differ
diff --git a/go/mysql/icuregex/internal/icudata/embed.go b/go/mysql/icuregex/internal/icudata/embed.go
new file mode 100644
index 00000000000..2b7e3033a21
--- /dev/null
+++ b/go/mysql/icuregex/internal/icudata/embed.go
@@ -0,0 +1,96 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icudata
+
+import _ "embed"
+
+// PNames is the list of property names. It is used
+// for example by usage of Unicode propery name aliases
+// in regular expressions.
+//
+//go:embed pnames.icu
+var PNames []byte
+
+// UBidi is the list of bidi properties. These are used
+// by Bidi class aliases in regular expressions.
+//
+//go:embed ubidi.icu
+var UBidi []byte
+
+// UCase is the list of case properties. These are used
+// for case folding internally for case insensitive matching.
+//
+//go:embed ucase.icu
+var UCase []byte
+
+// ULayout is used for property checks agains the InPC, InSC
+// and VO properties.
+//
+//go:embed ulayout.icu
+var ULayout []byte
+
+// UNames is used for named character references in regular
+// expressions.
+//
+//go:embed unames.icu
+var UNames []byte
+
+// UProps is used for all the character properties. These
+// are used to retrieve properties of characters for character
+// classes, like letters, whitespace, digits etc.
+//
+//go:embed uprops.icu
+var UProps []byte
+
+// Nfc is the table for character normalization where canonical
+// decomposition is done followed by canonical composition.
+// This is used for property checks of characters about composition.
+//
+//go:embed nfc.nrm
+var Nfc []byte
+
+// Nfkc is the table for character normalization where compatibility
+// decomposition is done followed by canonical composition.
+// This is used for property checks of characters about composition.
+//
+//go:embed nfkc.nrm
+var Nfkc []byte
+
+// NfkcCf is the table for character normalization where compatibility
+// decomposition is done followed by canonical composition with
+// case folding.
+// This is used for property checks of characters about composition.
+//
+//go:embed nfkc_cf.nrm
+var NfkcCf []byte
+
+// BrkChar is used for matching against character break
+// characters in regular expressions.
+//
+//go:embed char.brk
+var BrkChar []byte
+
+// BrkWord is used for matching against word break
+// characters in regular expressions.
+//
+//go:embed word.brk
+var BrkWord []byte
diff --git a/go/mysql/icuregex/internal/icudata/nfc.nrm b/go/mysql/icuregex/internal/icudata/nfc.nrm
new file mode 100644
index 00000000000..a1254c0aa75
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/nfc.nrm differ
diff --git a/go/mysql/icuregex/internal/icudata/nfkc.nrm b/go/mysql/icuregex/internal/icudata/nfkc.nrm
new file mode 100644
index 00000000000..2e6e3dda074
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/nfkc.nrm differ
diff --git a/go/mysql/icuregex/internal/icudata/nfkc_cf.nrm b/go/mysql/icuregex/internal/icudata/nfkc_cf.nrm
new file mode 100644
index 00000000000..a3a40833a91
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/nfkc_cf.nrm differ
diff --git a/go/mysql/icuregex/internal/icudata/pnames.icu b/go/mysql/icuregex/internal/icudata/pnames.icu
new file mode 100644
index 00000000000..58af6c0157a
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/pnames.icu differ
diff --git a/go/mysql/icuregex/internal/icudata/ubidi.icu b/go/mysql/icuregex/internal/icudata/ubidi.icu
new file mode 100644
index 00000000000..bc85f3d3502
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/ubidi.icu differ
diff --git a/go/mysql/icuregex/internal/icudata/ucase.icu b/go/mysql/icuregex/internal/icudata/ucase.icu
new file mode 100644
index 00000000000..011e6053f79
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/ucase.icu differ
diff --git a/go/mysql/icuregex/internal/icudata/ulayout.icu b/go/mysql/icuregex/internal/icudata/ulayout.icu
new file mode 100644
index 00000000000..598d347cc1e
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/ulayout.icu differ
diff --git a/go/mysql/icuregex/internal/icudata/unames.icu b/go/mysql/icuregex/internal/icudata/unames.icu
new file mode 100644
index 00000000000..55a2267fd5b
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/unames.icu differ
diff --git a/go/mysql/icuregex/internal/icudata/uprops.icu b/go/mysql/icuregex/internal/icudata/uprops.icu
new file mode 100644
index 00000000000..245db9a0584
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/uprops.icu differ
diff --git a/go/mysql/icuregex/internal/icudata/word.brk b/go/mysql/icuregex/internal/icudata/word.brk
new file mode 100644
index 00000000000..80460c60128
Binary files /dev/null and b/go/mysql/icuregex/internal/icudata/word.brk differ
diff --git a/go/mysql/icuregex/internal/normalizer/constants.go b/go/mysql/icuregex/internal/normalizer/constants.go
new file mode 100644
index 00000000000..3c2de588952
--- /dev/null
+++ b/go/mysql/icuregex/internal/normalizer/constants.go
@@ -0,0 +1,122 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package normalizer
+
+const (
+	// Fixed norm16 values.
+	minYesYesWithCC   = 0xfe02
+	jamoVt            = 0xfe00
+	minNormalMaybeYes = 0xfc00
+	jamoL             = 2 // offset=1 hasCompBoundaryAfter=false
+	inert             = 1 // offset=0 hasCompBoundaryAfter=true
+
+	// norm16 bit 0 is comp-boundary-after.
+	hasCompBoundaryAfter = 1
+	offsetShift          = 1
+
+	// For algorithmic one-way mappings, norm16 bits 2..1 indicate the
+	// tccc (0, 1, >1) for quick FCC boundary-after tests.
+	deltaTccc0    = 0
+	deltaTccc1    = 2
+	deltaTcccGt1  = 4
+	deltaTcccMask = 6
+	deltaShift    = 3
+
+	maxDelta = 0x40
+)
+
+const (
+	jamoLBase rune = 0x1100 /* "lead" jamo */
+	jamoLEnd  rune = 0x1112
+	jamoVBase rune = 0x1161 /* "vowel" jamo */
+	jamoVEnd  rune = 0x1175
+	jamoTBase rune = 0x11a7 /* "trail" jamo */
+	jamoTEnd  rune = 0x11c2
+
+	hangulBase rune = 0xac00
+	hangulEnd  rune = 0xd7a3
+
+	jamoLCount rune = 19
+	jamoVCount rune = 21
+	jamoTCount rune = 28
+
+	hangulCount = jamoLCount * jamoVCount * jamoTCount
+	hangulLimit = hangulBase + hangulCount
+)
+
+const (
+	mappingHasCccLcccWord = 0x80
+	mappingHasRawMapping  = 0x40
+	// unused bit 0x20,
+	mappingLengthMask = 0x1f
+)
+
+/**
+ * Constants for normalization modes.
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+type Mode int32
+
+const (
+	/** No decomposition/composition. @deprecated ICU 56 Use unorm2.h instead. */
+	NormNone Mode = 1
+	/** Canonical decomposition. @deprecated ICU 56 Use unorm2.h instead. */
+	NormNfd Mode = 2
+	/** Compatibility decomposition. @deprecated ICU 56 Use unorm2.h instead. */
+	NormNfkd Mode = 3
+	/** Canonical decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */
+	NormNfc Mode = 4
+	/** Default normalization. @deprecated ICU 56 Use unorm2.h instead. */
+	NormDefault Mode = NormNfc
+	/** Compatibility decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */
+	NormNfkc Mode = 5
+	/** "Fast C or D" form. @deprecated ICU 56 Use unorm2.h instead. */
+	NormFcd Mode = 6
+)
+
+/**
+ * Result values for normalization quick check functions.
+ * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
+ * @stable ICU 2.0
+ */
+type CheckResult int
+
+const (
+	/**
+	 * The input string is not in the normalization form.
+	 * @stable ICU 2.0
+	 */
+	No CheckResult = iota
+	/**
+	 * The input string is in the normalization form.
+	 * @stable ICU 2.0
+	 */
+	Yes
+	/**
+	 * The input string may or may not be in the normalization form.
+	 * This value is only returned for composition forms like NFC and FCC,
+	 * when a backward-combining character is found for which the surrounding text
+	 * would have to be analyzed further.
+	 * @stable ICU 2.0
+	 */
+	Maybe
+)
diff --git a/go/mysql/icuregex/internal/normalizer/normalizer.go b/go/mysql/icuregex/internal/normalizer/normalizer.go
new file mode 100644
index 00000000000..c13a4878deb
--- /dev/null
+++ b/go/mysql/icuregex/internal/normalizer/normalizer.go
@@ -0,0 +1,482 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package normalizer
+
+import (
+	"errors"
+	"sync"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/icudata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/udata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uset"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utf16"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utrie"
+)
+
+type Normalizer struct {
+	minDecompNoCP    rune
+	minCompNoMaybeCP rune
+	minLcccCP        rune
+
+	// Norm16 value thresholds for quick check combinations and types of extra data.
+	minYesNo                  uint16
+	minYesNoMappingsOnly      uint16
+	minNoNo                   uint16
+	minNoNoCompBoundaryBefore uint16
+	minNoNoCompNoMaybeCC      uint16
+	minNoNoEmpty              uint16
+	limitNoNo                 uint16
+	centerNoNoDelta           uint16
+	minMaybeYes               uint16
+
+	normTrie *utrie.UcpTrie
+
+	maybeYesCompositions []uint16
+	extraData            []uint16 // mappings and/or compositions for yesYes, yesNo & noNo characters
+	smallFCD             []uint8  // [0x100] one bit per 32 BMP code points, set if any FCD!=0
+}
+
+var nfc *Normalizer
+var nfkc *Normalizer
+
+var normalizerOnce sync.Once
+
+func loadNormalizer() {
+	normalizerOnce.Do(func() {
+		nfc = &Normalizer{}
+		if err := nfc.load(icudata.Nfc); err != nil {
+			panic(err)
+		}
+
+		nfkc = &Normalizer{}
+		if err := nfkc.load(icudata.Nfkc); err != nil {
+			panic(err)
+		}
+	})
+}
+
+const ixNormTrieOffset = 0
+const ixExtraDataOffset = 1
+const ixSmallFcdOffset = 2
+const ixReserved3Offset = 3
+const ixTotalSize = 7
+
+const ixMinDecompNoCp = 8
+const ixMinCompNoMaybeCp = 9
+
+/** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
+const ixMinYesNo = 10
+
+/** Mappings are comp-normalized. */
+const ixMinNoNo = 11
+const ixLimitNoNo = 12
+const ixMinMaybeYes = 13
+
+/** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
+const ixMinYesNoMappingsOnly = 14
+
+/** Mappings are not comp-normalized but have a comp boundary before. */
+const ixMinNoNoCompBoundaryBefore = 15
+
+/** Mappings do not have a comp boundary before. */
+const ixMinNoNoCompNoMaybeCc = 16
+
+/** Mappings to the empty string. */
+const ixMinNoNoEmpty = 17
+
+const ixMinLcccCp = 18
+const ixCount = 20
+
+func (n *Normalizer) load(data []byte) error {
+	bytes := udata.NewBytes(data)
+
+	err := bytes.ReadHeader(func(info *udata.DataInfo) bool {
+		return info.Size >= 20 &&
+			info.IsBigEndian == 0 &&
+			info.CharsetFamily == 0 &&
+			info.DataFormat[0] == 0x4e && /* dataFormat="unam" */
+			info.DataFormat[1] == 0x72 &&
+			info.DataFormat[2] == 0x6d &&
+			info.DataFormat[3] == 0x32 &&
+			info.FormatVersion[0] == 4
+	})
+	if err != nil {
+		return err
+	}
+
+	indexesLength := int32(bytes.Uint32()) / 4
+	if indexesLength <= ixMinLcccCp {
+		return errors.New("normalizer2 data: not enough indexes")
+	}
+	indexes := make([]int32, indexesLength)
+	indexes[0] = indexesLength * 4
+	for i := int32(1); i < indexesLength; i++ {
+		indexes[i] = bytes.Int32()
+	}
+
+	n.minDecompNoCP = indexes[ixMinDecompNoCp]
+	n.minCompNoMaybeCP = indexes[ixMinCompNoMaybeCp]
+	n.minLcccCP = indexes[ixMinLcccCp]
+
+	n.minYesNo = uint16(indexes[ixMinYesNo])
+	n.minYesNoMappingsOnly = uint16(indexes[ixMinYesNoMappingsOnly])
+	n.minNoNo = uint16(indexes[ixMinNoNo])
+	n.minNoNoCompBoundaryBefore = uint16(indexes[ixMinNoNoCompBoundaryBefore])
+	n.minNoNoCompNoMaybeCC = uint16(indexes[ixMinNoNoCompNoMaybeCc])
+	n.minNoNoEmpty = uint16(indexes[ixMinNoNoEmpty])
+	n.limitNoNo = uint16(indexes[ixLimitNoNo])
+	n.minMaybeYes = uint16(indexes[ixMinMaybeYes])
+
+	n.centerNoNoDelta = uint16(indexes[ixMinMaybeYes]>>deltaShift) - maxDelta - 1
+
+	offset := indexes[ixNormTrieOffset]
+	nextOffset := indexes[ixExtraDataOffset]
+	triePosition := bytes.Position()
+
+	n.normTrie, err = utrie.UcpTrieFromBytes(bytes)
+	if err != nil {
+		return err
+	}
+
+	trieLength := bytes.Position() - triePosition
+	if trieLength > nextOffset-offset {
+		return errors.New("normalizer2 data: not enough bytes for normTrie")
+	}
+	bytes.Skip((nextOffset - offset) - trieLength) // skip padding after trie bytes
+
+	// Read the composition and mapping data.
+	offset = nextOffset
+	nextOffset = indexes[ixSmallFcdOffset]
+	numChars := (nextOffset - offset) / 2
+	if numChars != 0 {
+		n.maybeYesCompositions = bytes.Uint16Slice(numChars)
+		n.extraData = n.maybeYesCompositions[((minNormalMaybeYes - n.minMaybeYes) >> offsetShift):]
+	}
+
+	// smallFCD: new in formatVersion 2
+	n.smallFCD = bytes.Uint8Slice(0x100)
+	return nil
+}
+
+func Nfc() *Normalizer {
+	loadNormalizer()
+	return nfc
+}
+
+func Nfkc() *Normalizer {
+	loadNormalizer()
+	return nfkc
+}
+
+func (n *Normalizer) AddPropertyStarts(u *uset.UnicodeSet) {
+	var start, end rune
+	var value uint32
+	for {
+		end, value = nfc.normTrie.GetRange(start, utrie.UcpMapRangeFixedLeadSurrogates, inert, nil)
+		if end < 0 {
+			break
+		}
+		u.AddRune(start)
+		if start != end && n.isAlgorithmicNoNo(uint16(value)) && (value&deltaTcccMask) > deltaTccc1 {
+			// Range of code points with same-norm16-value algorithmic decompositions.
+			// They might have different non-zero FCD16 values.
+			prevFCD16 := n.GetFCD16(start)
+			for {
+				start++
+				if start > end {
+					break
+				}
+				fcd16 := n.GetFCD16(start)
+				if fcd16 != prevFCD16 {
+					u.AddRune(start)
+					prevFCD16 = fcd16
+				}
+			}
+		}
+		start = end + 1
+	}
+
+	// add Hangul LV syllables and LV+1 because of skippables
+	for c := hangulBase; c < hangulLimit; c += jamoTCount {
+		u.AddRune(c)
+		u.AddRune(c + 1)
+	}
+	u.AddRune(hangulLimit)
+}
+
+func (n *Normalizer) isAlgorithmicNoNo(norm16 uint16) bool {
+	return n.limitNoNo <= norm16 && norm16 < n.minMaybeYes
+}
+
+func (n *Normalizer) GetFCD16(c rune) uint16 {
+	if c < n.minDecompNoCP {
+		return 0
+	} else if c <= 0xffff {
+		if !n.singleLeadMightHaveNonZeroFCD16(c) {
+			return 0
+		}
+	}
+	return n.getFCD16FromNormData(c)
+}
+
+func (n *Normalizer) singleLeadMightHaveNonZeroFCD16(lead rune) bool {
+	// 0<=lead<=0xffff
+	bits := n.smallFCD[lead>>8]
+	if bits == 0 {
+		return false
+	}
+	return ((bits >> ((lead >> 5) & 7)) & 1) != 0
+}
+
+func (n *Normalizer) getFCD16FromNormData(c rune) uint16 {
+	norm16 := n.getNorm16(c)
+	if norm16 >= n.limitNoNo {
+		if norm16 >= minNormalMaybeYes {
+			// combining mark
+			norm16 = uint16(n.getCCFromNormalYesOrMaybe(norm16))
+			return norm16 | (norm16 << 8)
+		} else if norm16 >= n.minMaybeYes {
+			return 0
+		} else { // isDecompNoAlgorithmic(norm16)
+			deltaTrailCC := norm16 & deltaTcccMask
+			if deltaTrailCC <= deltaTccc1 {
+				return deltaTrailCC >> offsetShift
+			}
+			// Maps to an isCompYesAndZeroCC.
+			c = n.mapAlgorithmic(c, norm16)
+			norm16 = n.getRawNorm16(c)
+		}
+	}
+
+	if norm16 <= n.minYesNo || n.isHangulLVT(norm16) {
+		// no decomposition or Hangul syllable, all zeros
+		return 0
+	}
+	// c decomposes, get everything from the variable-length extra data
+	mapping := n.getMapping(norm16)
+	firstUnit := mapping[1]
+	if firstUnit&mappingHasCccLcccWord != 0 {
+		norm16 |= mapping[0] & 0xff00
+	}
+	return norm16
+}
+
+func (n *Normalizer) getMapping(norm16 uint16) []uint16 {
+	return n.extraData[(norm16>>offsetShift)-1:]
+}
+
+func (n *Normalizer) getNorm16(c rune) uint16 {
+	if utf16.IsLead(c) {
+		return inert
+	}
+	return n.getRawNorm16(c)
+}
+
+func (n *Normalizer) getRawNorm16(c rune) uint16 {
+	return uint16(n.normTrie.Get(c))
+}
+
+func (n *Normalizer) getCCFromNormalYesOrMaybe(norm16 uint16) uint8 {
+	return uint8(norm16 >> offsetShift)
+}
+
+func (n *Normalizer) mapAlgorithmic(c rune, norm16 uint16) rune {
+	return c + rune(norm16>>deltaShift) - rune(n.centerNoNoDelta)
+}
+
+func (n *Normalizer) isHangulLV(norm16 uint16) bool {
+	return norm16 == n.minYesNo
+}
+
+func (n *Normalizer) isHangulLVT(norm16 uint16) bool {
+	return norm16 == n.hangulLVT()
+}
+
+func (n *Normalizer) hangulLVT() uint16 {
+	return n.minYesNoMappingsOnly | hasCompBoundaryAfter
+}
+
+func (n *Normalizer) getComposeQuickCheck(c rune) CheckResult {
+	return n.getCompQuickCheck(n.getNorm16(c))
+}
+
+func (n *Normalizer) getDecomposeQuickCheck(c rune) CheckResult {
+	if n.isDecompYes(n.getNorm16(c)) {
+		return Yes
+	}
+	return No
+}
+
+func QuickCheck(c rune, mode Mode) CheckResult {
+	if mode <= NormNone || NormFcd <= mode {
+		return Yes
+	}
+	switch mode {
+	case NormNfc:
+		return Nfc().getComposeQuickCheck(c)
+	case NormNfd:
+		return Nfc().getDecomposeQuickCheck(c)
+	case NormNfkc:
+		return Nfkc().getComposeQuickCheck(c)
+	case NormNfkd:
+		return Nfkc().getDecomposeQuickCheck(c)
+	default:
+		return Maybe
+	}
+}
+
+func IsInert(c rune, mode Mode) bool {
+	switch mode {
+	case NormNfc:
+		return Nfc().isCompInert(c)
+	case NormNfd:
+		return Nfc().isDecompInert(c)
+	case NormNfkc:
+		return Nfkc().isCompInert(c)
+	case NormNfkd:
+		return Nfkc().isDecompInert(c)
+	default:
+		return true
+	}
+}
+
+func (n *Normalizer) isDecompYes(norm16 uint16) bool {
+	return norm16 < n.minYesNo || n.minMaybeYes <= norm16
+}
+
+func (n *Normalizer) getCompQuickCheck(norm16 uint16) CheckResult {
+	if norm16 < n.minNoNo || minYesYesWithCC <= norm16 {
+		return Yes
+	} else if n.minMaybeYes <= norm16 {
+		return Maybe
+	} else {
+		return No
+	}
+}
+
+func (n *Normalizer) isMaybeOrNonZeroCC(norm16 uint16) bool {
+	return norm16 >= n.minMaybeYes
+}
+
+func (n *Normalizer) isDecompNoAlgorithmic(norm16 uint16) bool {
+	return norm16 >= n.limitNoNo
+}
+
+func (n *Normalizer) IsCompNo(c rune) bool {
+	norm16 := n.getNorm16(c)
+	return n.minNoNo <= norm16 && norm16 < n.minMaybeYes
+}
+
+func (n *Normalizer) Decompose(c rune) []rune {
+	norm16 := n.getNorm16(c)
+	if c < n.minDecompNoCP || n.isMaybeOrNonZeroCC(norm16) {
+		// c does not decompose
+		return nil
+	}
+	var decomp []rune
+
+	if n.isDecompNoAlgorithmic(norm16) {
+		// Maps to an isCompYesAndZeroCC.
+		c = n.mapAlgorithmic(c, norm16)
+		decomp = append(decomp, c)
+		// The mapping might decompose further.
+		norm16 = n.getRawNorm16(c)
+	}
+	if norm16 < n.minYesNo {
+		return decomp
+	} else if n.isHangulLV(norm16) || n.isHangulLVT(norm16) {
+		// Hangul syllable: decompose algorithmically
+		parts := hangulDecompose(c)
+		for len(parts) > 0 {
+			c = rune(parts[0])
+			decomp = append(decomp, c)
+			parts = parts[1:]
+		}
+		return decomp
+	}
+	// c decomposes, get everything from the variable-length extra data
+	mapping := n.getMapping(norm16)
+	length := mapping[1] & mappingLengthMask
+	mapping = mapping[2 : 2+length]
+
+	for len(mapping) > 0 {
+		c, mapping = utf16.NextUnsafe(mapping)
+		decomp = append(decomp, c)
+	}
+
+	return decomp
+}
+
+func hangulDecompose(c rune) []uint16 {
+	c -= hangulBase
+	c2 := c % jamoTCount
+	c /= jamoTCount
+	var buffer []uint16
+	buffer = append(buffer, uint16(jamoLBase+c/jamoVCount))
+	buffer = append(buffer, uint16(jamoVBase+c%jamoVCount))
+	if c2 != 0 {
+		buffer = append(buffer, uint16(jamoTBase+c2))
+	}
+	return buffer
+}
+
+func (n *Normalizer) isCompInert(c rune) bool {
+	norm16 := n.getNorm16(c)
+	return n.isCompYesAndZeroCC(norm16) && (norm16&hasCompBoundaryAfter) != 0
+}
+
+func (n *Normalizer) isDecompInert(c rune) bool {
+	return n.isDecompYesAndZeroCC(n.getNorm16(c))
+}
+
+func (n *Normalizer) isCompYesAndZeroCC(norm16 uint16) bool {
+	return norm16 < n.minNoNo
+}
+
+func (n *Normalizer) isDecompYesAndZeroCC(norm16 uint16) bool {
+	return norm16 < n.minYesNo ||
+		norm16 == jamoVt ||
+		(n.minMaybeYes <= norm16 && norm16 <= minNormalMaybeYes)
+}
+
+func (n *Normalizer) CombiningClass(c rune) uint8 {
+	return n.getCC(n.getNorm16(c))
+}
+
+func (n *Normalizer) getCC(norm16 uint16) uint8 {
+	if norm16 >= minNormalMaybeYes {
+		return n.getCCFromNormalYesOrMaybe(norm16)
+	}
+	if norm16 < n.minNoNo || n.limitNoNo <= norm16 {
+		return 0
+	}
+	return n.getCCFromNoNo(norm16)
+
+}
+
+func (n *Normalizer) getCCFromNoNo(norm16 uint16) uint8 {
+	mapping := n.getMapping(norm16)
+	if mapping[1]&mappingHasCccLcccWord != 0 {
+		return uint8(mapping[0])
+	}
+	return 0
+}
diff --git a/go/mysql/icuregex/internal/pattern/unescape.go b/go/mysql/icuregex/internal/pattern/unescape.go
new file mode 100644
index 00000000000..e4a554ff612
--- /dev/null
+++ b/go/mysql/icuregex/internal/pattern/unescape.go
@@ -0,0 +1,314 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package pattern
+
+import (
+	"strings"
+	"unicode/utf8"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/utf16"
+)
+
+/* Convert one octal digit to a numeric value 0..7, or -1 on failure */
+func _digit8(c rune) rune {
+	if c >= 0x0030 && c <= 0x0037 {
+		return (c - 0x0030)
+	}
+	return -1
+}
+
+/* Convert one hex digit to a numeric value 0..F, or -1 on failure */
+func _digit16(c rune) rune {
+	if c >= 0x0030 && c <= 0x0039 {
+		return (c - 0x0030)
+	}
+	if c >= 0x0041 && c <= 0x0046 {
+		return (c - (0x0041 - 10))
+	}
+	if c >= 0x0061 && c <= 0x0066 {
+		return (c - (0x0061 - 10))
+	}
+	return -1
+}
+
+var unscapeMap = []byte{
+	/*"   0x22, 0x22 */
+	/*'   0x27, 0x27 */
+	/*?   0x3F, 0x3F */
+	/*\   0x5C, 0x5C */
+	/*a*/ 0x61, 0x07,
+	/*b*/ 0x62, 0x08,
+	/*e*/ 0x65, 0x1b,
+	/*f*/ 0x66, 0x0c,
+	/*n*/ 0x6E, 0x0a,
+	/*r*/ 0x72, 0x0d,
+	/*t*/ 0x74, 0x09,
+	/*v*/ 0x76, 0x0b,
+}
+
+func Unescape(str string) (string, bool) {
+	var idx int
+	if idx = strings.IndexByte(str, '\\'); idx < 0 {
+		return str, true
+	}
+
+	var result strings.Builder
+	result.WriteString(str[:idx])
+	str = str[idx:]
+
+	for len(str) > 0 {
+		if str[0] == '\\' {
+			var r rune
+			r, str = UnescapeAt(str[1:])
+			if r < 0 {
+				return "", false
+			}
+			result.WriteRune(r)
+		} else {
+			result.WriteByte(str[0])
+			str = str[1:]
+		}
+	}
+	return result.String(), true
+}
+
+func UnescapeAt(str string) (rune, string) {
+	c, w := utf8.DecodeRuneInString(str)
+	str = str[w:]
+	if c == utf8.RuneError && (w == 0 || w == 1) {
+		return -1, str
+	}
+
+	var minDig, maxDig, n int
+	var braces bool
+	var bitsPerDigit = 4
+	var result rune
+
+	switch c {
+	case 'u':
+		minDig = 4
+		maxDig = 4
+	case 'U':
+		minDig = 8
+		maxDig = 8
+	case 'x':
+		minDig = 1
+		if len(str) > 0 && str[0] == '{' {
+			str = str[1:]
+			braces = true
+			maxDig = 8
+		} else {
+			maxDig = 2
+		}
+	default:
+		if dig := _digit8(c); dig >= 0 {
+			minDig = 1
+			maxDig = 4
+			n = 1
+			bitsPerDigit = 3
+			result = dig
+		}
+	}
+
+	if minDig != 0 {
+		for n < maxDig && len(str) > 0 {
+			c, w = utf8.DecodeRuneInString(str)
+			if c == utf8.RuneError && w == 1 {
+				return -1, str
+			}
+
+			var dig rune
+			if bitsPerDigit == 3 {
+				dig = _digit8(c)
+			} else {
+				dig = _digit16(c)
+			}
+			if dig < 0 {
+				break
+			}
+			result = (result << bitsPerDigit) | dig
+			str = str[w:]
+			n++
+		}
+		if n < minDig {
+			return -1, str
+		}
+		if braces {
+			if c != '}' {
+				return -1, str
+			}
+			str = str[1:]
+		}
+		if result < 0 || result > utf8.MaxRune {
+			return -1, str
+		}
+		if len(str) > 0 && utf16.IsLead(result) {
+			c, w = utf8.DecodeRuneInString(str)
+			if c == utf8.RuneError && (w == 0 || w == 1) {
+				return -1, str
+			}
+			if c == '\\' {
+				var str2 string
+				c, str2 = UnescapeAt(str[1:])
+				if utf16.IsTrail(c) {
+					result = utf16.DecodeRune(result, c)
+					str = str2
+				}
+			}
+		}
+		return result, str
+	}
+
+	if c < utf8.RuneSelf {
+		for i := 0; i < len(unscapeMap); i += 2 {
+			if byte(c) == unscapeMap[i] {
+				return rune(unscapeMap[i+1]), str
+			}
+			if byte(c) < unscapeMap[i] {
+				break
+			}
+		}
+	}
+
+	if c == 'c' && len(str) > 0 {
+		c, w = utf8.DecodeRuneInString(str)
+		if c == utf8.RuneError && (w == 0 || w == 1) {
+			return -1, str
+		}
+		return 0x1f & c, str[w:]
+	}
+
+	return c, str
+}
+
+func UnescapeAtRunes(str []rune) (rune, []rune) {
+	if len(str) == 0 {
+		return -1, str
+	}
+
+	c := str[0]
+	str = str[1:]
+	if c == utf8.RuneError {
+		return -1, str
+	}
+
+	var minDig, maxDig, n int
+	var braces bool
+	var bitsPerDigit = 4
+	var result rune
+
+	switch c {
+	case 'u':
+		minDig = 4
+		maxDig = 4
+	case 'U':
+		minDig = 8
+		maxDig = 8
+	case 'x':
+		minDig = 1
+		if len(str) > 0 && str[0] == '{' {
+			str = str[1:]
+			braces = true
+			maxDig = 8
+		} else {
+			maxDig = 2
+		}
+	default:
+		if dig := _digit8(c); dig >= 0 {
+			minDig = 1
+			maxDig = 4
+			n = 1
+			bitsPerDigit = 3
+			result = dig
+		}
+	}
+
+	if minDig != 0 {
+		for n < maxDig && len(str) > 0 {
+			c = str[0]
+			if c == utf8.RuneError {
+				return -1, str
+			}
+
+			var dig rune
+			if bitsPerDigit == 3 {
+				dig = _digit8(c)
+			} else {
+				dig = _digit16(c)
+			}
+			if dig < 0 {
+				break
+			}
+			result = (result << bitsPerDigit) | dig
+			str = str[1:]
+			n++
+		}
+		if n < minDig {
+			return -1, str
+		}
+		if braces {
+			if c != '}' {
+				return -1, str
+			}
+			str = str[1:]
+		}
+		if result < 0 || result > utf8.MaxRune {
+			return -1, str
+		}
+		if len(str) > 0 && utf16.IsLead(result) {
+			c = str[0]
+			if c == utf8.RuneError {
+				return -1, str
+			}
+			if c == '\\' {
+				var str2 []rune
+				c, str2 = UnescapeAtRunes(str[1:])
+				if utf16.IsTrail(c) {
+					result = utf16.DecodeRune(result, c)
+					str = str2
+				}
+			}
+		}
+		return result, str
+	}
+
+	if c < utf8.RuneSelf {
+		for i := 0; i < len(unscapeMap); i += 2 {
+			if byte(c) == unscapeMap[i] {
+				return rune(unscapeMap[i+1]), str
+			}
+			if byte(c) < unscapeMap[i] {
+				break
+			}
+		}
+	}
+
+	if c == 'c' && len(str) > 0 {
+		c = str[0]
+		if c == utf8.RuneError {
+			return -1, str
+		}
+		return 0x1f & c, str[1:]
+	}
+
+	return c, str
+}
diff --git a/go/mysql/icuregex/internal/pattern/unescape_test.go b/go/mysql/icuregex/internal/pattern/unescape_test.go
new file mode 100644
index 00000000000..0bb76c2bfdb
--- /dev/null
+++ b/go/mysql/icuregex/internal/pattern/unescape_test.go
@@ -0,0 +1,48 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package pattern
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestUnescapeAt(t *testing.T) {
+	r, str := UnescapeAt("ud800\\ud800\\udc00")
+	assert.Equal(t, rune(0xd800), r)
+	assert.Equal(t, "\\ud800\\udc00", str)
+
+	r, str = UnescapeAt(str[1:])
+	assert.Equal(t, rune(0x00010000), r)
+	assert.Equal(t, "", str)
+}
+
+func TestUnescapeAtRunes(t *testing.T) {
+	r, str := UnescapeAtRunes([]rune("ud800\\ud800\\udc00"))
+	assert.Equal(t, rune(0xd800), r)
+	assert.Equal(t, []rune("\\ud800\\udc00"), str)
+
+	r, str = UnescapeAtRunes(str[1:])
+	assert.Equal(t, rune(0x00010000), r)
+	assert.Equal(t, []rune(""), str)
+}
diff --git a/go/mysql/icuregex/internal/pattern/utils.go b/go/mysql/icuregex/internal/pattern/utils.go
new file mode 100644
index 00000000000..4dcf55e9f42
--- /dev/null
+++ b/go/mysql/icuregex/internal/pattern/utils.go
@@ -0,0 +1,111 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package pattern
+
+import (
+	"strings"
+	"unicode/utf8"
+)
+
+var patternPropsLatin1 = [256]uint8{
+	// WS: 9..D
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	// WS: 20  Syntax: 21..2F
+	5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	// Syntax: 3A..40
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
+	3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	// Syntax: 5B..5E
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
+	// Syntax: 60
+	3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	// Syntax: 7B..7E
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
+	// WS: 85
+	0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	// Syntax: A1..A7, A9, AB, AC, AE
+	0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0,
+	// Syntax: B0, B1, B6, BB, BF
+	3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	// Syntax: D7
+	0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	// Syntax: F7
+	0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
+}
+
+func IsWhitespace(c rune) bool {
+	if c < 0 {
+		return false
+	} else if c <= 0xff {
+		return (patternPropsLatin1[c]>>2)&1 != 0
+	} else if 0x200e <= c && c <= 0x2029 {
+		return c <= 0x200f || 0x2028 <= c
+	} else {
+		return false
+	}
+}
+
+func SkipWhitespace(str string) string {
+	for {
+		r, w := utf8.DecodeRuneInString(str)
+		if r == utf8.RuneError && (w == 0 || w == 1) {
+			return str[w:]
+		}
+		if !IsWhitespace(r) {
+			return str
+		}
+		str = str[w:]
+	}
+}
+
+func IsUnprintable(c rune) bool {
+	return !(c >= 0x20 && c <= 0x7E)
+}
+
+// "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+var digits = [...]byte{
+	48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
+	65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
+	75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
+	85, 86, 87, 88, 89, 90,
+}
+
+func EscapeUnprintable(w *strings.Builder, c rune) {
+	w.WriteByte('\\')
+	if (c & ^0xFFFF) != 0 {
+		w.WriteByte('U')
+		w.WriteByte(digits[0xF&(c>>28)])
+		w.WriteByte(digits[0xF&(c>>24)])
+		w.WriteByte(digits[0xF&(c>>20)])
+		w.WriteByte(digits[0xF&(c>>16)])
+	} else {
+		w.WriteByte('u')
+	}
+	w.WriteByte(digits[0xF&(c>>12)])
+	w.WriteByte(digits[0xF&(c>>8)])
+	w.WriteByte(digits[0xF&(c>>4)])
+	w.WriteByte(digits[0xF&c])
+}
diff --git a/go/mysql/icuregex/internal/ubidi/ubidi.go b/go/mysql/icuregex/internal/ubidi/ubidi.go
new file mode 100644
index 00000000000..195e2b1a6dd
--- /dev/null
+++ b/go/mysql/icuregex/internal/ubidi/ubidi.go
@@ -0,0 +1,461 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package ubidi
+
+import (
+	"errors"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/icudata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/udata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utrie"
+)
+
+const (
+	ixIndexTop = iota
+	ixLength
+	ixTrieSize
+	ixMirrorLength
+
+	ixJgStart
+	ixJgLimit
+	ixJgStart2 /* new in format version 2.2, ICU 54 */
+	ixJgLimit2
+
+	maxValuesIndex
+	ixTop
+)
+
+var ubidi struct {
+	indexes []int32
+	trie    *utrie.UTrie2
+	mirrors []uint32
+	jg      []uint8
+	jg2     []uint8
+}
+
+func readData(bytes *udata.Bytes) error {
+	err := bytes.ReadHeader(func(info *udata.DataInfo) bool {
+		return info.DataFormat[0] == 0x42 &&
+			info.DataFormat[1] == 0x69 &&
+			info.DataFormat[2] == 0x44 &&
+			info.DataFormat[3] == 0x69 &&
+			info.FormatVersion[0] == 2
+	})
+	if err != nil {
+		return err
+	}
+
+	count := int32(bytes.Uint32())
+	if count < ixTop {
+		return errors.New("indexes[0] too small in ucase.icu")
+	}
+
+	ubidi.indexes = make([]int32, count)
+	ubidi.indexes[0] = count
+
+	for i := int32(1); i < count; i++ {
+		ubidi.indexes[i] = int32(bytes.Uint32())
+	}
+
+	ubidi.trie, err = utrie.UTrie2FromBytes(bytes)
+	if err != nil {
+		return err
+	}
+
+	expectedTrieLength := ubidi.indexes[ixTrieSize]
+	trieLength := ubidi.trie.SerializedLength()
+
+	if trieLength > expectedTrieLength {
+		return errors.New("ucase.icu: not enough bytes for the trie")
+	}
+
+	bytes.Skip(expectedTrieLength - trieLength)
+
+	if n := ubidi.indexes[ixMirrorLength]; n > 0 {
+		ubidi.mirrors = bytes.Uint32Slice(n)
+	}
+	if n := ubidi.indexes[ixJgLimit] - ubidi.indexes[ixJgStart]; n > 0 {
+		ubidi.jg = bytes.Uint8Slice(n)
+	}
+	if n := ubidi.indexes[ixJgLimit2] - ubidi.indexes[ixJgStart2]; n > 0 {
+		ubidi.jg2 = bytes.Uint8Slice(n)
+	}
+
+	return nil
+}
+
+func init() {
+	b := udata.NewBytes(icudata.UBidi)
+	if err := readData(b); err != nil {
+		panic(err)
+	}
+}
+
+const (
+	/* UBIDI_CLASS_SHIFT=0, */ /* bidi class: 5 bits (4..0) */
+	jtShift                    = 5 /* joining type: 3 bits (7..5) */
+
+	bptShift = 8 /* Bidi_Paired_Bracket_Type(bpt): 2 bits (9..8) */
+
+	joinControlShift = 10
+	bidiControlShift = 11
+
+	isMirroredShift = 12 /* 'is mirrored' */
+)
+
+/**
+ * Bidi Paired Bracket Type constants.
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @stable ICU 52
+ */
+type UPairedBracketType int32
+
+/*
+ * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
+ * It matches lines like
+ *     U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
+ */
+const (
+	/** Not a paired bracket. @stable ICU 52 */
+	BptNone UPairedBracketType = iota
+	/** Open paired bracket. @stable ICU 52 */
+	BptOpen
+	/** Close paired bracket. @stable ICU 52 */
+	BptClose
+)
+
+const classMask = 0x0000001f
+const jtMask = 0x000000e0
+const bptMask = 0x00000300
+
+/**
+ * Joining Type constants.
+ *
+ * @see UCHAR_JOINING_TYPE
+ * @stable ICU 2.2
+ */
+type JoiningType int32
+
+/*
+ * Note: UJoiningType constants are parsed by preparseucd.py.
+ * It matches lines like
+ *     U_JT_<Unicode Joining_Type value name>
+ */
+const (
+	JtNonJoining   JoiningType = iota /*[U]*/
+	JtJoinCausing                     /*[C]*/
+	JtDualJoining                     /*[D]*/
+	JtLeftJoining                     /*[L]*/
+	JtRightJoining                    /*[R]*/
+	JtTransparent                     /*[T]*/
+)
+
+/**
+ * Joining Group constants.
+ *
+ * @see UCHAR_JOINING_GROUP
+ * @stable ICU 2.2
+ */
+type JoiningGroup int32
+
+/*
+ * Note: UJoiningGroup constants are parsed by preparseucd.py.
+ * It matches lines like
+ *     U_JG_<Unicode Joining_Group value name>
+ */
+const (
+	JgNoJoiningGroup JoiningGroup = iota
+	JgAin
+	JgAlaph
+	JgAlef
+	JgBeh
+	JgBeth
+	JgDal
+	JgDalathRish
+	JgE
+	JgFeh
+	JgFinalSemkath
+	JgGaf
+	JgGamal
+	JgHah
+	JgTehMarbutaGoal /**< @stable ICU 4.6 */
+	JgHe
+	JgHeh
+	JgHehGoal
+	JgHeth
+	JgKaf
+	JgKaph
+	JgKnottedHeh
+	JgLam
+	JgLamadh
+	JgMeem
+	JgMim
+	JgNoon
+	JgNun
+	JgPe
+	JgQaf
+	JgQaph
+	JgReh
+	JgReversedPe
+	JgSad
+	JgSadhe
+	JgSeen
+	JgSemkath
+	JgShin
+	JgSwashKaf
+	JgSyriacWaw
+	JgTah
+	JgTaw
+	JgTehMarbuta
+	JgTeth
+	JgWaw
+	JgYeh
+	JgYehBarree
+	JgYehWithTail
+	JgYudh
+	JgYudhHe
+	JgZain
+	JgFe                   /**< @stable ICU 2.6 */
+	JgKhaph                /**< @stable ICU 2.6 */
+	JgZhain                /**< @stable ICU 2.6 */
+	JgBurushashkiYehBarree /**< @stable ICU 4.0 */
+	JgFarsiYeh             /**< @stable ICU 4.4 */
+	JgNya                  /**< @stable ICU 4.4 */
+	JgRohingyaYeh          /**< @stable ICU 49 */
+	JgManichaeanAleph      /**< @stable ICU 54 */
+	JgManichaeanAyin       /**< @stable ICU 54 */
+	JgManichaeanBeth       /**< @stable ICU 54 */
+	JgManichaeanDaleth     /**< @stable ICU 54 */
+	JgManichaeanDhamedh    /**< @stable ICU 54 */
+	JgManichaeanFive       /**< @stable ICU 54 */
+	JgManichaeanGimel      /**< @stable ICU 54 */
+	JgManichaeanHeth       /**< @stable ICU 54 */
+	JgManichaeanHundred    /**< @stable ICU 54 */
+	JgManichaeanKaph       /**< @stable ICU 54 */
+	JgManichaeanLamedh     /**< @stable ICU 54 */
+	JgManichaeanMem        /**< @stable ICU 54 */
+	JgManichaeanNun        /**< @stable ICU 54 */
+	JgManichaeanOne        /**< @stable ICU 54 */
+	JgManichaeanPe         /**< @stable ICU 54 */
+	JgManichaeanQoph       /**< @stable ICU 54 */
+	JgManichaeanResh       /**< @stable ICU 54 */
+	JgManichaeanSadhe      /**< @stable ICU 54 */
+	JgManichaeanSamekh     /**< @stable ICU 54 */
+	JgManichaeanTaw        /**< @stable ICU 54 */
+	JgManichaeanTen        /**< @stable ICU 54 */
+	JgManichaeanTeth       /**< @stable ICU 54 */
+	JgManichaeanThamedh    /**< @stable ICU 54 */
+	JgManichaeanTwenty     /**< @stable ICU 54 */
+	JgManichaeanWaw        /**< @stable ICU 54 */
+	JgManichaeanYodh       /**< @stable ICU 54 */
+	JgManichaeanZayin      /**< @stable ICU 54 */
+	JgStraightWaw          /**< @stable ICU 54 */
+	JgAfricanFeh           /**< @stable ICU 58 */
+	JgAfricanNoon          /**< @stable ICU 58 */
+	JgAfricanQaf           /**< @stable ICU 58 */
+
+	JgMalayalamBha  /**< @stable ICU 60 */
+	JgMalayalamJa   /**< @stable ICU 60 */
+	JgMalayalamLla  /**< @stable ICU 60 */
+	JgMalayalamLlla /**< @stable ICU 60 */
+	JgMalayalamNga  /**< @stable ICU 60 */
+	JgMalayalamNna  /**< @stable ICU 60 */
+	JgMalayalamNnna /**< @stable ICU 60 */
+	JgMalayalamNya  /**< @stable ICU 60 */
+	JgMalayalamRa   /**< @stable ICU 60 */
+	JgMalayalamSsa  /**< @stable ICU 60 */
+	JgMalayalamTta  /**< @stable ICU 60 */
+
+	JgHanafiRohingyaKinnaYa /**< @stable ICU 62 */
+	JgHanafiRohingyaPa      /**< @stable ICU 62 */
+
+	JgThinYeh      /**< @stable ICU 70 */
+	JgVerticalTail /**< @stable ICU 70 */
+)
+
+/**
+ * This specifies the language directional property of a character set.
+ * @stable ICU 2.0
+ */
+type CharDirection int32
+
+/*
+ * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ *     / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
+ *     U_<[A-Z_]+> = <integer>,
+ */
+
+const (
+	/** L @stable ICU 2.0 */
+	LeftToRight CharDirection = 0
+	/** R @stable ICU 2.0 */
+	RightToLeft CharDirection = 1
+	/** EN @stable ICU 2.0 */
+	EuropeanNumber CharDirection = 2
+	/** ES @stable ICU 2.0 */
+	EuropeanNumberSeparator CharDirection = 3
+	/** ET @stable ICU 2.0 */
+	EuropeanNumberTerminator CharDirection = 4
+	/** AN @stable ICU 2.0 */
+	ArabicNumber CharDirection = 5
+	/** CS @stable ICU 2.0 */
+	CommonNumberSeparator CharDirection = 6
+	/** B @stable ICU 2.0 */
+	BlockSeparator CharDirection = 7
+	/** S @stable ICU 2.0 */
+	SegmentSeparator CharDirection = 8
+	/** WS @stable ICU 2.0 */
+	WhiteSpaceNeutral CharDirection = 9
+	/** ON @stable ICU 2.0 */
+	OtherNeutral CharDirection = 10
+	/** LRE @stable ICU 2.0 */
+	LeftToRightEmbedding CharDirection = 11
+	/** LRO @stable ICU 2.0 */
+	LeftToRightOverride CharDirection = 12
+	/** AL @stable ICU 2.0 */
+	RightToLeftArabic CharDirection = 13
+	/** RLE @stable ICU 2.0 */
+	RightToLeftEmbedding CharDirection = 14
+	/** RLO @stable ICU 2.0 */
+	RightToLeftOverride CharDirection = 15
+	/** PDF @stable ICU 2.0 */
+	PopDirectionalFormat CharDirection = 16
+	/** NSM @stable ICU 2.0 */
+	DirNonSpacingMark CharDirection = 17
+	/** BN @stable ICU 2.0 */
+	BoundaryNeutral CharDirection = 18
+	/** FSI @stable ICU 52 */
+	StrongIsolate CharDirection = 19
+	/** LRI @stable ICU 52 */
+	LeftToRightIsolate CharDirection = 20
+	/** RLI @stable ICU 52 */
+	RightToLeftIsolate CharDirection = 21
+	/** PDI @stable ICU 52 */
+	PopDirectionalIsolate CharDirection = 22
+)
+
+type propertySet interface {
+	AddRune(ch rune)
+	AddRuneRange(from rune, to rune)
+}
+
+func AddPropertyStarts(sa propertySet) {
+	/* add the start code point of each same-value range of the trie */
+	ubidi.trie.Enum(nil, func(start, _ rune, _ uint32) bool {
+		sa.AddRune(start)
+		return true
+	})
+
+	/* add the code points from the bidi mirroring table */
+	length := ubidi.indexes[ixMirrorLength]
+	for i := int32(0); i < length; i++ {
+		c := mirrorCodePoint(rune(ubidi.mirrors[i]))
+		sa.AddRuneRange(c, c+1)
+	}
+
+	/* add the code points from the Joining_Group array where the value changes */
+	start := ubidi.indexes[ixJgStart]
+	limit := ubidi.indexes[ixJgLimit]
+	jgArray := ubidi.jg[:]
+	for {
+		prev := uint8(0)
+		for start < limit {
+			jg := jgArray[0]
+			jgArray = jgArray[1:]
+			if jg != prev {
+				sa.AddRune(start)
+				prev = jg
+			}
+			start++
+		}
+		if prev != 0 {
+			/* add the limit code point if the last value was not 0 (it is now start==limit) */
+			sa.AddRune(limit)
+		}
+		if limit == ubidi.indexes[ixJgLimit] {
+			/* switch to the second Joining_Group range */
+			start = ubidi.indexes[ixJgStart2]
+			limit = ubidi.indexes[ixJgLimit2]
+			jgArray = ubidi.jg2[:]
+		} else {
+			break
+		}
+	}
+
+	/* add code points with hardcoded properties, plus the ones following them */
+
+	/* (none right now) */
+}
+
+func HasFlag(props uint16, shift int) bool {
+	return ((props >> shift) & 1) != 0
+}
+
+func mirrorCodePoint(m rune) rune {
+	return m & 0x1fffff
+}
+
+func IsJoinControl(c rune) bool {
+	props := ubidi.trie.Get16(c)
+	return HasFlag(props, joinControlShift)
+}
+
+func JoinType(c rune) JoiningType {
+	props := ubidi.trie.Get16(c)
+	return JoiningType((props & jtMask) >> jtShift)
+}
+
+func JoinGroup(c rune) JoiningGroup {
+	start := ubidi.indexes[ixJgStart]
+	limit := ubidi.indexes[ixJgLimit]
+	if start <= c && c < limit {
+		return JoiningGroup(ubidi.jg[c-start])
+	}
+	start = ubidi.indexes[ixJgStart2]
+	limit = ubidi.indexes[ixJgLimit2]
+	if start <= c && c < limit {
+		return JoiningGroup(ubidi.jg2[c-start])
+	}
+	return JgNoJoiningGroup
+}
+
+func IsMirrored(c rune) bool {
+	props := ubidi.trie.Get16(c)
+	return HasFlag(props, isMirroredShift)
+}
+
+func IsBidiControl(c rune) bool {
+	props := ubidi.trie.Get16(c)
+	return HasFlag(props, bidiControlShift)
+}
+
+func PairedBracketType(c rune) UPairedBracketType {
+	props := ubidi.trie.Get16(c)
+	return UPairedBracketType((props & bptMask) >> bptShift)
+}
+
+func Class(c rune) CharDirection {
+	props := ubidi.trie.Get16(c)
+	return CharDirection(props & classMask)
+}
diff --git a/go/mysql/icuregex/internal/ucase/fold.go b/go/mysql/icuregex/internal/ucase/fold.go
new file mode 100644
index 00000000000..88d4f026c65
--- /dev/null
+++ b/go/mysql/icuregex/internal/ucase/fold.go
@@ -0,0 +1,243 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package ucase
+
+import (
+	"math/bits"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/utf16"
+)
+
+func FoldRunes(str []rune) []rune {
+	out := make([]rune, 0, len(str))
+	for _, c := range str {
+		r, exp := FullFolding(c)
+		if exp == nil {
+			out = append(out, r)
+			continue
+		}
+
+		for len(exp) > 0 {
+			r, exp = utf16.NextUnsafe(exp)
+			out = append(out, r)
+		}
+	}
+	return out
+}
+
+/*
+  - Case folding is similar to lowercasing.
+  - The result may be a simple mapping, i.e., a single code point, or
+  - a full mapping, i.e., a string.
+  - If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
+  - then only the lowercase mapping is stored.
+    *
+  - Some special cases are hardcoded because their conditions cannot be
+  - parsed and processed from CaseFolding.txt.
+    *
+  - Unicode 3.2 CaseFolding.txt specifies for its status field:
+
+# C: common case folding, common mappings shared by both simple and full mappings.
+# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# T: special case for uppercase I and dotted uppercase I
+#    - For non-Turkic languages, this mapping is normally not used.
+#    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
+#
+# Usage:
+#  A. To do a simple case folding, use the mappings with status C + S.
+#  B. To do a full case folding, use the mappings with status C + F.
+#
+#    The mappings with status T can be used or omitted depending on the desired case-folding
+#    behavior. (The default option is to exclude them.)
+
+  - Unicode 3.2 has 'T' mappings as follows:
+
+0049; T; 0131; # LATIN CAPITAL LETTER I
+0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+
+  - while the default mappings for these code points are:
+
+0049; C; 0069; # LATIN CAPITAL LETTER I
+0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+
+  - U+0130 has no simple case folding (simple-case-folds to itself).
+*/
+func Fold(c rune) rune {
+	props := ucase.trie.Get16(c)
+	if !hasException(props) {
+		if isUpperOrTitle(props) {
+			c += getDelta(props)
+		}
+	} else {
+		pe := getExceptions(props)
+		excWord := pe[0]
+		pe = pe[1:]
+		if (excWord & excConditionalFold) != 0 {
+			/* special case folding mappings, hardcoded */
+			/* default mappings */
+			if c == 0x49 {
+				/* 0049; C; 0069; # LATIN CAPITAL LETTER I */
+				return 0x69
+			} else if c == 0x130 {
+				/* no simple case folding for U+0130 */
+				return c
+			}
+		}
+		if (excWord & excNoSimpleCaseFolding) != 0 {
+			return c
+		}
+		if hasSlot(excWord, excDelta) && isUpperOrTitle(props) {
+			var delta int32
+			delta, _ = getSlotValue(excWord, excDelta, pe)
+			if excWord&excDeltaIsNegative == 0 {
+				return c + delta
+			}
+			return c - delta
+		}
+
+		var idx int32
+		if hasSlot(excWord, excFold) {
+			idx = excFold
+		} else if hasSlot(excWord, excLower) {
+			idx = excLower
+		} else {
+			return c
+		}
+		c, _ = getSlotValue(excWord, idx, pe)
+	}
+	return c
+}
+
+func FullFolding(c rune) (rune, []uint16) {
+	result := c
+	props := ucase.trie.Get16(c)
+
+	if !hasException(props) {
+		if isUpperOrTitle(props) {
+			result = c + getDelta(props)
+		}
+		return result, nil
+	}
+
+	pe := getExceptions(props)
+	excWord := pe[0]
+	pe = pe[1:]
+	var idx int32
+
+	if excWord&excConditionalFold != 0 {
+		/* use hardcoded conditions and mappings */
+		/* default mappings */
+		if c == 0x49 {
+			/* 0049; C; 0069; # LATIN CAPITAL LETTER I */
+			return 0x69, nil
+		} else if c == 0x130 {
+			/* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
+			return -1, []uint16{0x69, 0x307}
+		}
+	} else if hasSlot(excWord, excFullMappings) {
+		full, pe := getSlotValue(excWord, excFullMappings, pe)
+
+		/* start of full case mapping strings */
+		pe = pe[1:]
+
+		/* skip the lowercase result string */
+		pe = pe[full&fullLower:]
+		full = (full >> 4) & 0xf
+
+		if full != 0 {
+			/* set the output pointer to the result string */
+			return -1, pe[:full]
+		}
+	}
+
+	if excWord&excNoSimpleCaseFolding != 0 {
+		return result, nil
+	}
+	if hasSlot(excWord, excDelta) && isUpperOrTitle(props) {
+		delta, _ := getSlotValue(excWord, excDelta, pe)
+		if excWord&excDeltaIsNegative == 0 {
+			return c + delta, nil
+		}
+		return c - delta, nil
+	}
+	if hasSlot(excWord, excFold) {
+		idx = excFold
+	} else if hasSlot(excWord, excLower) {
+		idx = excLower
+	} else {
+		return c, nil
+	}
+	result, _ = getSlotValue(excWord, idx, pe)
+	return result, nil
+}
+
+const (
+	excLower = iota
+	excFold
+	excUpper
+	excTitle
+	excDelta
+	exc5 /* reserved */
+	excClosure
+	excFullMappings
+)
+
+const (
+	/* complex/conditional mappings */
+	excConditionalSpecial  = 0x4000
+	excConditionalFold     = 0x8000
+	excNoSimpleCaseFolding = 0x200
+	excDeltaIsNegative     = 0x400
+	excSensitive           = 0x800
+
+	excDoubleSlots = 0x100
+)
+
+func isUpperOrTitle(props uint16) bool {
+	return props&2 != 0
+}
+
+func getDelta(props uint16) rune {
+	return rune(int16(props) >> 7)
+}
+
+func getExceptions(props uint16) []uint16 {
+	return ucase.exceptions[props>>4:]
+}
+
+func hasSlot(flags uint16, idx int32) bool {
+	return (flags & (1 << idx)) != 0
+}
+
+func slotOffset(flags uint16, idx int32) int {
+	return bits.OnesCount8(uint8(flags & ((1 << idx) - 1)))
+}
+
+func getSlotValue(excWord uint16, idx int32, pExc16 []uint16) (int32, []uint16) {
+	if excWord&excDoubleSlots == 0 {
+		pExc16 = pExc16[slotOffset(excWord, idx):]
+		return int32(pExc16[0]), pExc16
+	}
+	pExc16 = pExc16[2*slotOffset(excWord, idx):]
+	return (int32(pExc16[0]) << 16) | int32(pExc16[1]), pExc16[1:]
+}
diff --git a/go/mysql/icuregex/internal/ucase/ucase.go b/go/mysql/icuregex/internal/ucase/ucase.go
new file mode 100644
index 00000000000..9fb8407ea66
--- /dev/null
+++ b/go/mysql/icuregex/internal/ucase/ucase.go
@@ -0,0 +1,425 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package ucase
+
+import (
+	"errors"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/icudata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/udata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utf16"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utrie"
+)
+
+var ucase struct {
+	trie       *utrie.UTrie2
+	exceptions []uint16
+	unfold     []uint16
+}
+
+const (
+	ixIndexTop      = 0
+	ixLength        = 1
+	ixTrieSize      = 2
+	ixExcLength     = 3
+	ixUnfoldLength  = 4
+	ixMaxFullLength = 15
+	ixTop           = 16
+)
+
+func readData(bytes *udata.Bytes) error {
+	err := bytes.ReadHeader(func(info *udata.DataInfo) bool {
+		return info.DataFormat[0] == 0x63 &&
+			info.DataFormat[1] == 0x41 &&
+			info.DataFormat[2] == 0x53 &&
+			info.DataFormat[3] == 0x45 &&
+			info.FormatVersion[0] == 4
+	})
+	if err != nil {
+		return err
+	}
+
+	count := int32(bytes.Uint32())
+	if count < ixTop {
+		return errors.New("indexes[0] too small in ucase.icu")
+	}
+
+	indexes := make([]int32, count)
+	indexes[0] = count
+
+	for i := int32(1); i < count; i++ {
+		indexes[i] = int32(bytes.Uint32())
+	}
+
+	ucase.trie, err = utrie.UTrie2FromBytes(bytes)
+	if err != nil {
+		return err
+	}
+
+	expectedTrieLength := indexes[ixTrieSize]
+	trieLength := ucase.trie.SerializedLength()
+
+	if trieLength > expectedTrieLength {
+		return errors.New("ucase.icu: not enough bytes for the trie")
+	}
+
+	bytes.Skip(expectedTrieLength - trieLength)
+
+	if n := indexes[ixExcLength]; n > 0 {
+		ucase.exceptions = bytes.Uint16Slice(n)
+	}
+	if n := indexes[ixUnfoldLength]; n > 0 {
+		ucase.unfold = bytes.Uint16Slice(n)
+	}
+
+	return nil
+}
+
+func init() {
+	b := udata.NewBytes(icudata.UCase)
+	if err := readData(b); err != nil {
+		panic(err)
+	}
+}
+
+type propertySet interface {
+	AddRune(ch rune)
+}
+
+func AddPropertyStarts(sa propertySet) {
+	/* add the start code point of each same-value range of the trie */
+	ucase.trie.Enum(nil, func(start, _ rune, _ uint32) bool {
+		sa.AddRune(start)
+		return true
+	})
+
+	/* add code points with hardcoded properties, plus the ones following them */
+
+	/* (none right now, see comment below) */
+
+	/*
+	 * Omit code points with hardcoded specialcasing properties
+	 * because we do not build property UnicodeSets for them right now.
+	 */
+}
+
+const (
+	fullMappingsMaxLength = (4 * 0xf)
+	closureMaxLength      = 0xf
+
+	fullLower   = 0xf
+	fullFolding = 0xf0
+	fullUpper   = 0xf00
+	fullTitle   = 0xf000
+)
+
+func AddCaseClosure(c rune, sa propertySet) {
+	/*
+	 * Hardcode the case closure of i and its relatives and ignore the
+	 * data file data for these characters.
+	 * The Turkic dotless i and dotted I with their case mapping conditions
+	 * and case folding option make the related characters behave specially.
+	 * This code matches their closure behavior to their case folding behavior.
+	 */
+
+	switch c {
+	case 0x49:
+		/* regular i and I are in one equivalence class */
+		sa.AddRune(0x69)
+		return
+	case 0x69:
+		sa.AddRune(0x49)
+		return
+	case 0x130:
+		/* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
+		// the Regex engine calls removeAllStrings() on all UnicodeSets, so we don't need to insert them
+		// sa->addString(sa->set, iDot, 2);
+		return
+	case 0x131:
+		/* dotless i is in a class by itself */
+		return
+	default:
+		/* otherwise use the data file data */
+		break
+	}
+
+	props := ucase.trie.Get16(c)
+	if !hasException(props) {
+		if getPropsType(props) != None {
+			/* add the one simple case mapping, no matter what type it is */
+			delta := getDelta(props)
+			if delta != 0 {
+				sa.AddRune(c + delta)
+			}
+		}
+	} else {
+		/*
+		 * c has exceptions, so there may be multiple simple and/or
+		 * full case mappings. Add them all.
+		 */
+		pe := getExceptions(props)
+		excWord := pe[0]
+		pe = pe[1:]
+		var idx int32
+		var closure []uint16
+
+		/* add all simple case mappings */
+		for idx = excLower; idx <= excTitle; idx++ {
+			if hasSlot(excWord, idx) {
+				c, _ = getSlotValue(excWord, idx, pe)
+				sa.AddRune(c)
+			}
+		}
+		if hasSlot(excWord, excDelta) {
+			delta, _ := getSlotValue(excWord, excDelta, pe)
+			if excWord&excDeltaIsNegative == 0 {
+				sa.AddRune(c + delta)
+			} else {
+				sa.AddRune(c - delta)
+			}
+		}
+
+		/* get the closure string pointer & length */
+		if hasSlot(excWord, excClosure) {
+			closureLength, pe1 := getSlotValue(excWord, excClosure, pe)
+			closureLength &= closureMaxLength  /* higher bits are reserved */
+			closure = pe1[1 : 1+closureLength] /* behind this slot, unless there are full case mappings */
+		}
+
+		/* add the full case folding */
+		if hasSlot(excWord, excFullMappings) {
+			fullLength, pe1 := getSlotValue(excWord, excFullMappings, pe)
+
+			/* start of full case mapping strings */
+			pe1 = pe1[1:]
+
+			fullLength &= 0xffff /* bits 16 and higher are reserved */
+
+			/* skip the lowercase result string */
+			pe1 = pe1[fullLength&fullLower:]
+			fullLength >>= 4
+
+			/* skip adding the case folding strings */
+			length := fullLength & 0xf
+			pe1 = pe1[length:]
+
+			/* skip the uppercase and titlecase strings */
+			fullLength >>= 4
+			pe1 = pe1[fullLength&0xf:]
+			fullLength >>= 4
+			pe1 = pe1[fullLength:]
+
+			closure = pe1[:len(closure)]
+		}
+
+		/* add each code point in the closure string */
+		for len(closure) > 0 {
+			c, closure = utf16.NextUnsafe(closure)
+			sa.AddRune(c)
+		}
+	}
+}
+
+const dotMask = 0x60
+
+const (
+	noDot       = 0    /* normal characters with cc=0 */
+	softDotted  = 0x20 /* soft-dotted characters with cc=0 */
+	above       = 0x40 /* "above" accents with cc=230 */
+	otherAccent = 0x60 /* other accent character (0<cc!=230) */
+)
+
+const (
+	ignorable = 4
+	exception = 8
+	sensitive = 0x10
+)
+
+/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<excDotShift */
+const excDotShift = 7
+
+func hasException(props uint16) bool {
+	return (props & exception) != 0
+}
+
+func IsSoftDotted(c rune) bool {
+	return getDotType(c) == softDotted
+}
+
+/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */
+func getDotType(c rune) int32 {
+	props := ucase.trie.Get16(c)
+	if !hasException(props) {
+		return int32(props & dotMask)
+	}
+	pe := getExceptions(props)
+	return int32((pe[0] >> excDotShift) & dotMask)
+}
+
+func IsCaseSensitive(c rune) bool {
+	props := ucase.trie.Get16(c)
+	if !hasException(props) {
+		return (props & sensitive) != 0
+	}
+	pe := getExceptions(props)
+	return (pe[0] & excSensitive) != 0
+}
+
+func ToFullLower(c rune) rune {
+	// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
+	result := c
+	props := ucase.trie.Get16(c)
+	if !hasException(props) {
+		if isUpperOrTitle(props) {
+			result = c + getDelta(props)
+		}
+	} else {
+		pe := getExceptions(props)
+		excWord := pe[0]
+		pe = pe[1:]
+
+		if excWord&excConditionalSpecial != 0 {
+			/* use hardcoded conditions and mappings */
+			if c == 0x130 {
+				return 2
+			}
+			/* no known conditional special case mapping, use a normal mapping */
+		} else if hasSlot(excWord, excFullMappings) {
+			full, _ := getSlotValue(excWord, excFullMappings, pe)
+			full = full & fullLower
+			if full != 0 {
+				/* return the string length */
+				return full
+			}
+		}
+
+		if hasSlot(excWord, excDelta) && isUpperOrTitle(props) {
+			delta, _ := getSlotValue(excWord, excDelta, pe)
+			if (excWord & excDeltaIsNegative) == 0 {
+				return c + delta
+			}
+			return c - delta
+		}
+		if hasSlot(excWord, excLower) {
+			result, _ = getSlotValue(excWord, excLower, pe)
+		}
+	}
+
+	if result == c {
+		return ^result
+	}
+	return result
+}
+
+func ToFullUpper(c rune) rune {
+	return toUpperOrTitle(c, true)
+}
+
+func ToFullTitle(c rune) rune {
+	return toUpperOrTitle(c, false)
+}
+
+func toUpperOrTitle(c rune, upperNotTitle bool) rune {
+	result := c
+	props := ucase.trie.Get16(c)
+	if !hasException(props) {
+		if getPropsType(props) == Lower {
+			result = c + getDelta(props)
+		}
+	} else {
+		pe := getExceptions(props)
+		excWord := pe[0]
+		pe = pe[1:]
+
+		if excWord&excConditionalSpecial != 0 {
+			if c == 0x0587 {
+				return 2
+			}
+			/* no known conditional special case mapping, use a normal mapping */
+		} else if hasSlot(excWord, excFullMappings) {
+			full, _ := getSlotValue(excWord, excFullMappings, pe)
+
+			/* skip the lowercase and case-folding result strings */
+			full >>= 8
+
+			if upperNotTitle {
+				full &= 0xf
+			} else {
+				/* skip the uppercase result string */
+				full = (full >> 4) & 0xf
+			}
+
+			if full != 0 {
+				/* return the string length */
+				return full
+			}
+		}
+
+		if hasSlot(excWord, excDelta) && getPropsType(props) == Lower {
+			delta, _ := getSlotValue(excWord, excDelta, pe)
+			if (excWord & excDeltaIsNegative) == 0 {
+				return c + delta
+			}
+			return c - delta
+		}
+		var idx int32
+		if !upperNotTitle && hasSlot(excWord, excTitle) {
+			idx = excTitle
+		} else if hasSlot(excWord, excUpper) {
+			/* here, titlecase is same as uppercase */
+			idx = excUpper
+		} else {
+			return ^c
+		}
+		result, _ = getSlotValue(excWord, idx, pe)
+	}
+
+	if result == c {
+		return ^result
+	}
+	return result
+}
+
+func GetTypeOrIgnorable(c rune) int32 {
+	props := ucase.trie.Get16(c)
+	return int32(props & 7)
+}
+
+type Type int32
+
+const (
+	None Type = iota
+	Lower
+	Upper
+	Title
+)
+
+const typeMask = 3
+
+func GetType(c rune) Type {
+	props := ucase.trie.Get16(c)
+	return getPropsType(props)
+}
+
+func getPropsType(props uint16) Type {
+	return Type(props & typeMask)
+}
diff --git a/go/mysql/icuregex/internal/uchar/constants.go b/go/mysql/icuregex/internal/uchar/constants.go
new file mode 100644
index 00000000000..1ab96751b5c
--- /dev/null
+++ b/go/mysql/icuregex/internal/uchar/constants.go
@@ -0,0 +1,240 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uchar
+
+import "golang.org/x/exp/constraints"
+
+func uMask[T constraints.Integer](x T) uint32 {
+	return 1 << x
+}
+
+type Category int8
+
+const (
+	/*
+	 * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
+	 * It matches pairs of lines like
+	 *     / ** <Unicode 2-letter General_Category value> comment... * /
+	 *     U_<[A-Z_]+> = <integer>,
+	 */
+
+	/** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
+	Unassigned Category = 0
+	/** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
+	GeneralOtherTypes Category = iota - 1
+	/** Lu @stable ICU 2.0 */
+	UppercaseLetter
+	/** Ll @stable ICU 2.0 */
+	LowercaseLetter
+	/** Lt @stable ICU 2.0 */
+	TitlecaseLetter
+	/** Lm @stable ICU 2.0 */
+	ModifierLetter
+	/** Lo @stable ICU 2.0 */
+	OtherLetter
+	/** Mn @stable ICU 2.0 */
+	NonSpacingMask
+	/** Me @stable ICU 2.0 */
+	EnclosingMark
+	/** Mc @stable ICU 2.0 */
+	CombiningSpacingMask
+	/** Nd @stable ICU 2.0 */
+	DecimalDigitNumber
+	/** Nl @stable ICU 2.0 */
+	LetterNumber
+	/** No @stable ICU 2.0 */
+	OtherNumber
+	/** Zs @stable ICU 2.0 */
+	SpaceSeparator
+	/** Zl @stable ICU 2.0 */
+	LineSeparator
+	/** Zp @stable ICU 2.0 */
+	ParagraphSeparator
+	/** Cc @stable ICU 2.0 */
+	ControlChar
+	/** Cf @stable ICU 2.0 */
+	FormatChar
+	/** Co @stable ICU 2.0 */
+	PrivateUseChar
+	/** Cs @stable ICU 2.0 */
+	Surrogate
+	/** Pd @stable ICU 2.0 */
+	DashPunctuation
+	/** Ps @stable ICU 2.0 */
+	StartPunctuation
+	/** Pe @stable ICU 2.0 */
+	EndPunctuation
+	/** Pc @stable ICU 2.0 */
+	ConnectorPunctuation
+	/** Po @stable ICU 2.0 */
+	OtherPunctuation
+	/** Sm @stable ICU 2.0 */
+	MathSymbol
+	/** Sc @stable ICU 2.0 */
+	CurrencySymbol
+	/** Sk @stable ICU 2.0 */
+	ModifierSymbol
+	/** So @stable ICU 2.0 */
+	OtherSymbol
+	/** Pi @stable ICU 2.0 */
+	InitialPunctuation
+	/** Pf @stable ICU 2.0 */
+	FinalPunctuation
+	/**
+	 * One higher than the last enum UCharCategory constant.
+	 * This numeric value is stable (will not change), see
+	 * http://www.unicode.org/policies/stability_policy.html#Property_Value
+	 *
+	 * @stable ICU 2.0
+	 */
+	CharCategoryCount
+)
+
+var (
+	GcCnMask = uMask(GeneralOtherTypes)
+
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcLuMask = uMask(UppercaseLetter)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcLlMask = uMask(LowercaseLetter)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcLtMask = uMask(TitlecaseLetter)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcLmMask = uMask(ModifierLetter)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcLoMask = uMask(OtherLetter)
+
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcMnMask = uMask(NonSpacingMask)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcMeMask = uMask(EnclosingMark)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcMcMask = uMask(CombiningSpacingMask)
+
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcNdMask = uMask(DecimalDigitNumber)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcNlMask = uMask(LetterNumber)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcNoMask = uMask(OtherNumber)
+
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcZsMask = uMask(SpaceSeparator)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcZlMask = uMask(LineSeparator)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcZpMask = uMask(ParagraphSeparator)
+
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcCcMask = uMask(ControlChar)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcCfMask = uMask(FormatChar)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcCoMask = uMask(PrivateUseChar)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcCsMask = uMask(Surrogate)
+
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcPdMask = uMask(DashPunctuation)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcPsMask = uMask(StartPunctuation)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcPeMask = uMask(EndPunctuation)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcPcMask = uMask(ConnectorPunctuation)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcPoMask = uMask(OtherPunctuation)
+
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcSmMask = uMask(MathSymbol)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcScMask = uMask(CurrencySymbol)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcSkMask = uMask(ModifierSymbol)
+	/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+	GcSoMask = uMask(OtherSymbol)
+
+	/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
+	GcLMask = (GcLuMask | GcLlMask | GcLtMask | GcLmMask | GcLoMask)
+
+	/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
+	GcLcMask = (GcLuMask | GcLlMask | GcLtMask)
+
+	/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
+	GcMMask = (GcMnMask | GcMeMask | GcMcMask)
+
+	/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
+	GcNMask = (GcNdMask | GcNlMask | GcNoMask)
+
+	/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
+	GcZMask = (GcZsMask | GcZlMask | GcZpMask)
+)
+
+const upropsAgeShift = 24
+const maxVersionLength = 4
+const versionDelimiter = '.'
+
+type UVersionInfo [maxVersionLength]uint8
+
+const (
+	/** No numeric value. */
+	UPropsNtvNone = 0
+	/** Decimal digits: nv=0..9 */
+	UPropsNtvDecimalStart = 1
+	/** Other digits: nv=0..9 */
+	UPropsNtvDigitStart = 11
+	/** Small integers: nv=0..154 */
+	UPropsNtvNumericStart = 21
+	/** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */
+	UPropsNtvFractionStart = 0xb0
+	/**
+	 * Large integers:
+	 * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33)
+	 * (only one significant decimal digit)
+	 */
+	UPropsNtvLargeStart = 0x1e0
+	/**
+	 * Sexagesimal numbers:
+	 * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4)
+	 */
+	UPropsNtvBase60Start = 0x300
+	/**
+	 * Fraction-20 values:
+	 * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640
+	 * numerator: num = 2*(frac20&3)+1
+	 * denominator: den = 20<<(frac20>>2)
+	 */
+	UPropsNtvFraction20Start = UPropsNtvBase60Start + 36 // 0x300+9*4=0x324
+	/**
+	 * Fraction-32 values:
+	 * frac32 = ntv-0x34c = 0..15 -> 1|3|5|7 / 32|64|128|256
+	 * numerator: num = 2*(frac32&3)+1
+	 * denominator: den = 32<<(frac32>>2)
+	 */
+	UPropsNtvFraction32Start = UPropsNtvFraction20Start + 24 // 0x324+6*4=0x34c
+	/** No numeric value (yet). */
+	UPropsNtvReservedStart = UPropsNtvFraction32Start + 16 // 0x34c+4*4=0x35c
+
+	UPropsNtvMaxSmallInt = UPropsNtvFractionStart - UPropsNtvNumericStart - 1
+)
+
+const noNumericValue = -123456789.0
diff --git a/go/mysql/icuregex/internal/uchar/uchar.go b/go/mysql/icuregex/internal/uchar/uchar.go
new file mode 100644
index 00000000000..a2c758ea1c0
--- /dev/null
+++ b/go/mysql/icuregex/internal/uchar/uchar.go
@@ -0,0 +1,405 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uchar
+
+import (
+	"errors"
+	"strconv"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/icudata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/udata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utrie"
+)
+
+var uprops struct {
+	trie             *utrie.UTrie2
+	trie2            *utrie.UTrie2
+	vectorsColumns   int32
+	vectors          []uint32
+	scriptExtensions []uint16
+}
+
+func readData(bytes *udata.Bytes) error {
+	err := bytes.ReadHeader(func(info *udata.DataInfo) bool {
+		return info.DataFormat[0] == 0x55 &&
+			info.DataFormat[1] == 0x50 &&
+			info.DataFormat[2] == 0x72 &&
+			info.DataFormat[3] == 0x6f &&
+			info.FormatVersion[0] == 7
+	})
+	if err != nil {
+		return err
+	}
+
+	propertyOffset := bytes.Int32()
+	/* exceptionOffset = */ bytes.Int32()
+	/* caseOffset = */ bytes.Int32()
+	additionalOffset := bytes.Int32()
+	additionalVectorsOffset := bytes.Int32()
+	uprops.vectorsColumns = bytes.Int32()
+	scriptExtensionsOffset := bytes.Int32()
+	reservedOffset7 := bytes.Int32()
+	/* reservedOffset8 = */ bytes.Int32()
+	/* dataTopOffset = */ bytes.Int32()
+	_ = bytes.Int32()
+	_ = bytes.Int32()
+	bytes.Skip((16 - 12) << 2)
+
+	uprops.trie, err = utrie.UTrie2FromBytes(bytes)
+	if err != nil {
+		return err
+	}
+
+	expectedTrieLength := (propertyOffset - 16) * 4
+	trieLength := uprops.trie.SerializedLength()
+
+	if trieLength > expectedTrieLength {
+		return errors.New("ucase.icu: not enough bytes for the trie")
+	}
+
+	bytes.Skip(expectedTrieLength - trieLength)
+	bytes.Skip((additionalOffset - propertyOffset) * 4)
+
+	if uprops.vectorsColumns > 0 {
+		uprops.trie2, err = utrie.UTrie2FromBytes(bytes)
+		if err != nil {
+			return err
+		}
+
+		expectedTrieLength = (additionalVectorsOffset - additionalOffset) * 4
+		trieLength = uprops.trie2.SerializedLength()
+
+		if trieLength > expectedTrieLength {
+			return errors.New("ucase.icu: not enough bytes for the trie")
+		}
+
+		bytes.Skip(expectedTrieLength - trieLength)
+		uprops.vectors = bytes.Uint32Slice(scriptExtensionsOffset - additionalVectorsOffset)
+	}
+
+	if n := (reservedOffset7 - scriptExtensionsOffset) * 2; n > 0 {
+		uprops.scriptExtensions = bytes.Uint16Slice(n)
+	}
+
+	return nil
+}
+
+func init() {
+	b := udata.NewBytes(icudata.UProps)
+	if err := readData(b); err != nil {
+		panic(err)
+	}
+}
+
+type PropertySet interface {
+	AddRune(ch rune)
+}
+
+func VecAddPropertyStarts(sa PropertySet) {
+	uprops.trie2.Enum(nil, func(start, _ rune, _ uint32) bool {
+		sa.AddRune(start)
+		return true
+	})
+}
+
+const (
+	tab      = 0x0009
+	lf       = 0x000a
+	ff       = 0x000c
+	cr       = 0x000d
+	nbsp     = 0x00a0
+	cgj      = 0x034f
+	figuresp = 0x2007
+	hairsp   = 0x200a
+	zwnj     = 0x200c
+	zwj      = 0x200d
+	rlm      = 0x200f
+	nnbsp    = 0x202f
+	zwnbsp   = 0xfef
+)
+
+func AddPropertyStarts(sa PropertySet) {
+	/* add the start code point of each same-value range of the main trie */
+	uprops.trie.Enum(nil, func(start, _ rune, _ uint32) bool {
+		sa.AddRune(start)
+		return true
+	})
+
+	/* add code points with hardcoded properties, plus the ones following them */
+
+	/* add for u_isblank() */
+	sa.AddRune(tab)
+	sa.AddRune(tab + 1)
+
+	/* add for IS_THAT_CONTROL_SPACE() */
+	sa.AddRune(cr + 1) /* range TAB..CR */
+	sa.AddRune(0x1c)
+	sa.AddRune(0x1f + 1)
+	sa.AddRune(0x85) // NEXT LINE (NEL)
+	sa.AddRune(0x85 + 1)
+
+	/* add for u_isIDIgnorable() what was not added above */
+	sa.AddRune(0x7f) /* range DEL..NBSP-1, NBSP added below */
+	sa.AddRune(hairsp)
+	sa.AddRune(rlm + 1)
+	sa.AddRune(0x206a)     // INHIBIT SYMMETRIC SWAPPING
+	sa.AddRune(0x206f + 1) // NOMINAL DIGIT SHAPES
+	sa.AddRune(zwnbsp)
+	sa.AddRune(zwnbsp + 1)
+
+	/* add no-break spaces for u_isWhitespace() what was not added above */
+	sa.AddRune(nbsp)
+	sa.AddRune(nbsp + 1)
+	sa.AddRune(figuresp)
+	sa.AddRune(figuresp + 1)
+	sa.AddRune(nnbsp)
+	sa.AddRune(nnbsp + 1)
+
+	/* add for u_digit() */
+	sa.AddRune('a')
+	sa.AddRune('z' + 1)
+	sa.AddRune('A')
+	sa.AddRune('Z' + 1)
+	// fullwidth
+	sa.AddRune('ａ')
+	sa.AddRune('ｚ' + 1)
+	sa.AddRune('Ａ')
+	sa.AddRune('Ｚ' + 1)
+
+	/* add for u_isxdigit() */
+	sa.AddRune('f' + 1)
+	sa.AddRune('F' + 1)
+	// fullwidth
+	sa.AddRune('ｆ' + 1)
+	sa.AddRune('Ｆ' + 1)
+
+	/* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
+	sa.AddRune(0x2060) /* range 2060..206f */
+	sa.AddRune(0xfff0)
+	sa.AddRune(0xfffb + 1)
+	sa.AddRune(0xe0000)
+	sa.AddRune(0xe0fff + 1)
+
+	/* add for UCHAR_GRAPHEME_BASE and others */
+	sa.AddRune(cgj)
+	sa.AddRune(cgj + 1)
+}
+
+func CharType(c rune) Category {
+	props := uprops.trie.Get16(c)
+	return getCategory(props)
+}
+
+func GetProperties(c rune) uint16 {
+	return uprops.trie.Get16(c)
+}
+
+func getCategory(props uint16) Category {
+	return Category(props & 0x1f)
+}
+
+func GetUnicodeProperties(c rune, column int) uint32 {
+	if column >= int(uprops.vectorsColumns) {
+		return 0
+	}
+	vecIndex := uprops.trie2.Get16(c)
+	return uprops.vectors[int(vecIndex)+column]
+}
+
+func ScriptExtension(idx uint32) uint16 {
+	return uprops.scriptExtensions[idx]
+}
+
+func ScriptExtensions(idx uint32) []uint16 {
+	return uprops.scriptExtensions[idx:]
+}
+
+func IsDigit(c rune) bool {
+	return CharType(c) == DecimalDigitNumber
+}
+
+func IsPOSIXPrint(c rune) bool {
+	return CharType(c) == SpaceSeparator || IsGraphPOSIX(c)
+}
+
+func IsGraphPOSIX(c rune) bool {
+	props := uprops.trie.Get16(c)
+	/* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
+	/* comparing ==0 returns FALSE for the categories mentioned */
+	return uMask(getCategory(props))&(GcCcMask|GcCsMask|GcCnMask|GcZMask) == 0
+}
+
+func IsXDigit(c rune) bool {
+	/* check ASCII and Fullwidth ASCII a-fA-F */
+	if (c <= 0x66 && c >= 0x41 && (c <= 0x46 || c >= 0x61)) ||
+		(c >= 0xff21 && c <= 0xff46 && (c <= 0xff26 || c >= 0xff41)) {
+		return true
+	}
+	return IsDigit(c)
+}
+
+func IsBlank(c rune) bool {
+	if c <= 0x9f {
+		return c == 9 || c == 0x20 /* TAB or SPACE */
+	}
+	/* Zs */
+	return CharType(c) == SpaceSeparator
+}
+
+func CharAge(c rune) UVersionInfo {
+	version := GetUnicodeProperties(c, 0) >> upropsAgeShift
+	return UVersionInfo{uint8(version >> 4), uint8(version & 0xf), 0, 0}
+}
+
+func VersionFromString(str string) (version UVersionInfo) {
+	part := 0
+	for len(str) > 0 && part < maxVersionLength {
+		if str[0] == versionDelimiter {
+			str = str[1:]
+		}
+		str, version[part] = parseInt(str)
+		part++
+	}
+	return
+}
+
+// parseInt is simplified but aims to mimic strtoul usage
+// as it is used for ICU version parsing.
+func parseInt(str string) (string, uint8) {
+	if str == "" {
+		return str, 0
+	}
+
+	start := 0
+	end := 0
+whitespace:
+	for i := 0; i < len(str); i++ {
+		switch str[i] {
+		case ' ', '\f', '\n', '\r', '\t', '\v':
+			start++
+			continue
+		default:
+			break whitespace
+		}
+	}
+	str = str[start:]
+
+	for i := 0; i < len(str); i++ {
+		if str[i] < '0' || str[i] > '9' {
+			end = i
+			break
+		}
+		end++
+	}
+
+	val, err := strconv.ParseUint(str[start:end], 10, 8)
+	if err != nil {
+		return str[end:], 0
+	}
+	return str[end:], uint8(val)
+}
+
+const upropsNumericTypeValueShift = 6
+
+func NumericTypeValue(c rune) uint16 {
+	props := uprops.trie.Get16(c)
+	return props >> upropsNumericTypeValueShift
+}
+
+func NumericValue(c rune) float64 {
+	ntv := int32(NumericTypeValue(c))
+
+	if ntv == UPropsNtvNone {
+		return noNumericValue
+	} else if ntv < UPropsNtvDigitStart {
+		/* decimal digit */
+		return float64(ntv - UPropsNtvDecimalStart)
+	} else if ntv < UPropsNtvNumericStart {
+		/* other digit */
+		return float64(ntv - UPropsNtvDigitStart)
+	} else if ntv < UPropsNtvFractionStart {
+		/* small integer */
+		return float64(ntv - UPropsNtvNumericStart)
+	} else if ntv < UPropsNtvLargeStart {
+		/* fraction */
+		numerator := (ntv >> 4) - 12
+		denominator := (ntv & 0xf) + 1
+		return float64(numerator) / float64(denominator)
+	} else if ntv < UPropsNtvBase60Start {
+		/* large, single-significant-digit integer */
+		mant := (ntv >> 5) - 14
+		exp := (ntv & 0x1f) + 2
+		numValue := float64(mant)
+
+		/* multiply by 10^exp without math.h */
+		for exp >= 4 {
+			numValue *= 10000.
+			exp -= 4
+		}
+		switch exp {
+		case 3:
+			numValue *= 1000.0
+		case 2:
+			numValue *= 100.0
+		case 1:
+			numValue *= 10.0
+		case 0:
+		default:
+		}
+
+		return numValue
+	} else if ntv < UPropsNtvFraction20Start {
+		/* sexagesimal (base 60) integer */
+		numValue := (ntv >> 2) - 0xbf
+		exp := (ntv & 3) + 1
+
+		switch exp {
+		case 4:
+			numValue *= 60 * 60 * 60 * 60
+		case 3:
+			numValue *= 60 * 60 * 60
+		case 2:
+			numValue *= 60 * 60
+		case 1:
+			numValue *= 60
+		case 0:
+		default:
+		}
+
+		return float64(numValue)
+	} else if ntv < UPropsNtvFraction32Start {
+		// fraction-20 e.g. 3/80
+		frac20 := ntv - UPropsNtvFraction20Start // 0..0x17
+		numerator := 2*(frac20&3) + 1
+		denominator := 20 << (frac20 >> 2)
+		return float64(numerator) / float64(denominator)
+	} else if ntv < UPropsNtvReservedStart {
+		// fraction-32 e.g. 3/64
+		frac32 := ntv - UPropsNtvFraction32Start // 0..15
+		numerator := 2*(frac32&3) + 1
+		denominator := 32 << (frac32 >> 2)
+		return float64(numerator) / float64(denominator)
+	} else {
+		/* reserved */
+		return noNumericValue
+	}
+}
diff --git a/go/mysql/icuregex/internal/udata/udata.go b/go/mysql/icuregex/internal/udata/udata.go
new file mode 100644
index 00000000000..f20f8be1efa
--- /dev/null
+++ b/go/mysql/icuregex/internal/udata/udata.go
@@ -0,0 +1,155 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package udata
+
+import (
+	"encoding/binary"
+	"errors"
+	"unsafe"
+)
+
+type DataInfo struct {
+	/** sizeof(UDataInfo)
+	 *  @stable ICU 2.0 */
+	Size uint16
+
+	/** unused, set to 0
+	 *  @stable ICU 2.0*/
+	ReservedWord uint16
+
+	/* platform data properties */
+	/** 0 for little-endian machine, 1 for big-endian
+	 *  @stable ICU 2.0 */
+	IsBigEndian uint8
+
+	/** see U_CHARSET_FAMILY values in utypes.h
+	 *  @stable ICU 2.0*/
+	CharsetFamily uint8
+
+	/** sizeof(UChar), one of { 1, 2, 4 }
+	 *  @stable ICU 2.0*/
+	SizeofUChar uint8
+
+	/** unused, set to 0
+	 *  @stable ICU 2.0*/
+	ReservedByte uint8
+
+	/** data format identifier
+	 *  @stable ICU 2.0*/
+	DataFormat [4]uint8
+
+	/** versions: [0] major [1] minor [2] milli [3] micro
+	 *  @stable ICU 2.0*/
+	FormatVersion [4]uint8
+
+	/** versions: [0] major [1] minor [2] milli [3] micro
+	 *  @stable ICU 2.0*/
+	DataVersion [4]uint8
+}
+
+type Bytes struct {
+	buf  []byte
+	orig []byte
+	enc  binary.ByteOrder
+}
+
+func NewBytes(b []byte) *Bytes {
+	return &Bytes{buf: b, orig: b, enc: binary.LittleEndian}
+}
+
+func (b *Bytes) ReadHeader(isValid func(info *DataInfo) bool) error {
+	type MappedData struct {
+		headerSize uint16
+		magic1     uint8
+		magic2     uint8
+	}
+
+	type DataHeader struct {
+		dataHeader MappedData
+		info       DataInfo
+	}
+
+	data := unsafe.SliceData(b.buf)
+	header := (*DataHeader)(unsafe.Pointer(data))
+
+	if header.dataHeader.magic1 != 0xda || header.dataHeader.magic2 != 0x27 {
+		return errors.New("invalid magic number")
+	}
+
+	if header.info.IsBigEndian != 0 {
+		return errors.New("unsupported: BigEndian data source")
+	}
+
+	if !isValid(&header.info) {
+		return errors.New("failed to validate data header")
+	}
+
+	b.buf = b.buf[header.dataHeader.headerSize:]
+	return nil
+}
+
+func (b *Bytes) Uint8() uint8 {
+	u := b.buf[0]
+	b.buf = b.buf[1:]
+	return u
+}
+func (b *Bytes) Uint16() uint16 {
+	u := b.enc.Uint16(b.buf)
+	b.buf = b.buf[2:]
+	return u
+}
+
+func (b *Bytes) Uint16Slice(size int32) []uint16 {
+	s := unsafe.Slice((*uint16)(unsafe.Pointer(unsafe.SliceData(b.buf))), size)
+	b.buf = b.buf[2*size:]
+	return s
+}
+
+func (b *Bytes) Uint32Slice(size int32) []uint32 {
+	s := unsafe.Slice((*uint32)(unsafe.Pointer(unsafe.SliceData(b.buf))), size)
+	b.buf = b.buf[4*size:]
+	return s
+}
+
+func (b *Bytes) Uint32() uint32 {
+	u := b.enc.Uint32(b.buf)
+	b.buf = b.buf[4:]
+	return u
+}
+
+func (b *Bytes) Int32() int32 {
+	return int32(b.Uint32())
+}
+
+func (b *Bytes) Skip(size int32) {
+	b.buf = b.buf[size:]
+}
+
+func (b *Bytes) Uint8Slice(n int32) []uint8 {
+	s := b.buf[:n]
+	b.buf = b.buf[n:]
+	return s
+}
+
+func (b *Bytes) Position() int32 {
+	return int32(len(b.orig) - len(b.buf))
+}
diff --git a/go/mysql/icuregex/internal/ulayout/ulayout.go b/go/mysql/icuregex/internal/ulayout/ulayout.go
new file mode 100644
index 00000000000..dbf21d9460b
--- /dev/null
+++ b/go/mysql/icuregex/internal/ulayout/ulayout.go
@@ -0,0 +1,128 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package ulayout
+
+import (
+	"errors"
+	"sync"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/icudata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/udata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utrie"
+)
+
+var inpcTrie *utrie.UcpTrie
+var inscTrie *utrie.UcpTrie
+var voTrie *utrie.UcpTrie
+
+const (
+	ixInpcTrieTop = 1
+	ixInscTrieTop = 2
+	ixVoTrieTop   = 3
+
+	ixCount = 12
+)
+
+func InpcTrie() *utrie.UcpTrie {
+	loadLayouts()
+	return inpcTrie
+}
+
+func InscTrie() *utrie.UcpTrie {
+	loadLayouts()
+	return inscTrie
+}
+
+func VoTrie() *utrie.UcpTrie {
+	loadLayouts()
+	return voTrie
+}
+
+var layoutsOnce sync.Once
+
+func loadLayouts() {
+	layoutsOnce.Do(func() {
+		b := udata.NewBytes(icudata.ULayout)
+		if err := readData(b); err != nil {
+			panic(err)
+		}
+	})
+}
+
+func readData(bytes *udata.Bytes) error {
+	err := bytes.ReadHeader(func(info *udata.DataInfo) bool {
+		return info.DataFormat[0] == 0x4c &&
+			info.DataFormat[1] == 0x61 &&
+			info.DataFormat[2] == 0x79 &&
+			info.DataFormat[3] == 0x6f &&
+			info.FormatVersion[0] == 1
+	})
+	if err != nil {
+		return err
+	}
+
+	startPos := bytes.Position()
+	indexesLength := int32(bytes.Uint32()) // inIndexes[IX_INDEXES_LENGTH]
+	if indexesLength < ixCount {
+		return errors.New("text layout properties data: not enough indexes")
+	}
+	index := make([]int32, indexesLength)
+	index[0] = indexesLength
+	for i := int32(1); i < indexesLength; i++ {
+		index[i] = int32(bytes.Uint32())
+	}
+
+	offset := indexesLength * 4
+	top := index[ixInpcTrieTop]
+	trieSize := top - offset
+	if trieSize >= 16 {
+		inpcTrie, err = utrie.UcpTrieFromBytes(bytes)
+		if err != nil {
+			return err
+		}
+	}
+
+	pos := bytes.Position() - startPos
+	bytes.Skip(top - pos)
+	offset = top
+	top = index[ixInscTrieTop]
+	trieSize = top - offset
+	if trieSize >= 16 {
+		inscTrie, err = utrie.UcpTrieFromBytes(bytes)
+		if err != nil {
+			return err
+		}
+	}
+
+	pos = bytes.Position() - startPos
+	bytes.Skip(top - pos)
+	offset = top
+	top = index[ixVoTrieTop]
+	trieSize = top - offset
+	if trieSize >= 16 {
+		voTrie, err = utrie.UcpTrieFromBytes(bytes)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/go/mysql/icuregex/internal/unames/unames.go b/go/mysql/icuregex/internal/unames/unames.go
new file mode 100644
index 00000000000..45920be8292
--- /dev/null
+++ b/go/mysql/icuregex/internal/unames/unames.go
@@ -0,0 +1,471 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package unames
+
+import (
+	"bytes"
+	"strconv"
+	"strings"
+	"sync"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/icudata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/udata"
+)
+
+var charNamesOnce sync.Once
+var charNames *unames
+
+type unames struct {
+	tokens       []uint16
+	tokenStrings []uint8
+	groups       []uint16
+	groupNames   []uint8
+	algNames     []algorithmicRange
+}
+
+func loadCharNames() {
+	charNamesOnce.Do(func() {
+		b := udata.NewBytes(icudata.UNames)
+		if err := b.ReadHeader(func(info *udata.DataInfo) bool {
+			return info.Size >= 20 &&
+				info.IsBigEndian == 0 &&
+				info.CharsetFamily == 0 &&
+				info.DataFormat[0] == 0x75 && /* dataFormat="unam" */
+				info.DataFormat[1] == 0x6e &&
+				info.DataFormat[2] == 0x61 &&
+				info.DataFormat[3] == 0x6d &&
+				info.FormatVersion[0] == 1
+		}); err != nil {
+			panic(err)
+		}
+
+		tokenStringOffset := int32(b.Uint32() - 16)
+		groupsOffset := int32(b.Uint32() - 16)
+		groupStringOffset := int32(b.Uint32() - 16)
+		algNamesOffset := int32(b.Uint32() - 16)
+		charNames = &unames{
+			tokens:       b.Uint16Slice(tokenStringOffset / 2),
+			tokenStrings: b.Uint8Slice(groupsOffset - tokenStringOffset),
+			groups:       b.Uint16Slice((groupStringOffset - groupsOffset) / 2),
+			groupNames:   b.Uint8Slice(algNamesOffset - groupStringOffset),
+		}
+
+		algCount := b.Uint32()
+		charNames.algNames = make([]algorithmicRange, 0, algCount)
+
+		for i := uint32(0); i < algCount; i++ {
+			ar := algorithmicRange{
+				start:   b.Uint32(),
+				end:     b.Uint32(),
+				typ:     b.Uint8(),
+				variant: b.Uint8(),
+			}
+			size := b.Uint16()
+			switch ar.typ {
+			case 0:
+				ar.s = b.Uint8Slice(int32(size) - 12)
+			case 1:
+				ar.factors = b.Uint16Slice(int32(ar.variant))
+				ar.s = b.Uint8Slice(int32(size) - 12 - int32(ar.variant)*2)
+			}
+			charNames.algNames = append(charNames.algNames, ar)
+		}
+	})
+}
+
+func (names *unames) getGroupName(group []uint16) []uint8 {
+	return names.groupNames[names.getGroupOffset(group):]
+}
+
+type NameChoice int32
+
+const (
+	UnicodeCharName NameChoice = iota
+	/**
+	 * The Unicode_1_Name property value which is of little practical value.
+	 * Beginning with ICU 49, ICU APIs return an empty string for this name choice.
+	 * @deprecated ICU 49
+	 */
+	Unicode10CharName
+	/** Standard or synthetic character name. @stable ICU 2.0 */
+	ExtendedCharName
+	/** Corrected name from NameAliases.txt. @stable ICU 4.4 */
+	CharNameAlias
+)
+
+type algorithmicRange struct {
+	start, end   uint32
+	typ, variant uint8
+	factors      []uint16
+	s            []uint8
+}
+
+func (ar *algorithmicRange) findAlgName(otherName string) rune {
+	switch ar.typ {
+	case 0:
+		s := ar.s
+
+		for s[0] != 0 && len(otherName) > 0 {
+			if s[0] != otherName[0] {
+				return -1
+			}
+			s = s[1:]
+			otherName = otherName[1:]
+		}
+
+		var code rune
+		count := int(ar.variant)
+		for i := 0; i < count && len(otherName) > 0; i++ {
+			c := rune(otherName[0])
+			otherName = otherName[1:]
+			if '0' <= c && c <= '9' {
+				code = (code << 4) | (c - '0')
+			} else if 'A' <= c && c <= 'F' {
+				code = (code << 4) | (c - 'A' + 10)
+			} else {
+				return -1
+			}
+		}
+
+		if len(otherName) == 0 && ar.start <= uint32(code) && uint32(code) <= ar.end {
+			return code
+		}
+	case 1:
+		factors := ar.factors
+		s := ar.s
+
+		for s[0] != 0 && len(otherName) > 0 {
+			if s[0] != otherName[0] {
+				return -1
+			}
+			s = s[1:]
+			otherName = otherName[1:]
+		}
+		s = s[1:]
+
+		start := rune(ar.start)
+		limit := rune(ar.end + 1)
+
+		var indexes [8]uint16
+		var buf strings.Builder
+		var elements [8][]byte
+		var elementBases [8][]byte
+
+		ar.writeFactorSuffix0(factors, s, &buf, &elements, &elementBases)
+		if buf.String() == otherName {
+			return start
+		}
+
+		for start+1 < limit {
+			start++
+			i := len(factors)
+
+			for {
+				i--
+				idx := indexes[i] + 1
+				if idx < factors[i] {
+					indexes[i] = idx
+					s = elements[i]
+					s = s[bytes.IndexByte(s, 0)+1:]
+					elements[i] = s
+					break
+				}
+
+				indexes[i] = 0
+				elements[i] = elementBases[i]
+			}
+
+			t := otherName
+			for i = 0; i < len(factors); i++ {
+				s = elements[i]
+
+				for s[0] != 0 && len(t) > 0 {
+					if s[0] != t[0] {
+						s = nil
+						i = 99
+						break
+					}
+					s = s[1:]
+					t = t[1:]
+				}
+			}
+			if i < 99 && len(t) == 0 {
+				return start
+			}
+		}
+	}
+	return -1
+}
+
+func (ar *algorithmicRange) writeFactorSuffix0(factors []uint16, s []uint8, buf *strings.Builder, elements, elementBases *[8][]byte) {
+	/* write each element */
+	for i := 0; i < len(factors); i++ {
+		(*elements)[i] = s
+		(*elementBases)[i] = s
+
+		nul := bytes.IndexByte(s, 0)
+		buf.Write(s[:nul])
+		s = s[nul+1:]
+
+		factor := int(factors[i] - 1)
+		for factor > 0 {
+			s = s[bytes.IndexByte(s, 0)+1:]
+			factor--
+		}
+	}
+}
+
+func CharForName(nameChoice NameChoice, name string) rune {
+	loadCharNames()
+
+	lower := strings.ToLower(name)
+	upper := strings.ToUpper(name)
+
+	if lower[0] == '<' {
+		if nameChoice == ExtendedCharName && lower[len(lower)-1] == '>' {
+			if limit := strings.LastIndexByte(lower, '-'); limit >= 2 {
+				cp, err := strconv.ParseUint(lower[limit+1:len(lower)-1], 16, 32)
+				if err != nil || cp > 0x10ffff {
+					return -1
+				}
+				return rune(cp)
+			}
+		}
+		return -1
+	}
+
+	for _, ar := range charNames.algNames {
+		if cp := ar.findAlgName(upper); cp != -1 {
+			return cp
+		}
+	}
+
+	return charNames.enumNames(0, 0x10ffff+1, upper, nameChoice)
+}
+
+const groupShift = 5
+const linesPerGroup = 1 << groupShift
+const groupMask = linesPerGroup - 1
+
+const (
+	groupMsb = iota
+	groupOffsetHigh
+	groupOffsetLow
+	groupLength
+)
+
+func (names *unames) enumNames(start, limit rune, otherName string, nameChoice NameChoice) rune {
+	startGroupMSB := uint16(start >> groupShift)
+	endGroupMSB := uint16((limit - 1) >> groupShift)
+
+	group := names.getGroup(start)
+
+	if startGroupMSB < group[groupMsb] && nameChoice == ExtendedCharName {
+		extLimit := rune(group[groupMsb]) << groupShift
+		if extLimit > limit {
+			extLimit = limit
+		}
+		start = extLimit
+	}
+
+	if startGroupMSB == endGroupMSB {
+		if startGroupMSB == group[groupMsb] {
+			return names.enumGroupNames(group, start, limit-1, otherName, nameChoice)
+		}
+	} else {
+		if startGroupMSB == group[groupMsb] {
+			if start&groupMask != 0 {
+				if cp := names.enumGroupNames(group, start, (rune(startGroupMSB)<<groupShift)+linesPerGroup-1, otherName, nameChoice); cp != -1 {
+					return cp
+				}
+				group = group[groupLength:]
+			}
+		} else if startGroupMSB > group[groupMsb] {
+			group = group[groupLength:]
+		}
+
+		for len(group) > 0 && group[groupMsb] < endGroupMSB {
+			start = rune(group[groupMsb]) << groupShift
+			if cp := names.enumGroupNames(group, start, start+linesPerGroup-1, otherName, nameChoice); cp != -1 {
+				return cp
+			}
+			group = group[groupLength:]
+		}
+
+		if len(group) > 0 && group[groupMsb] == endGroupMSB {
+			return names.enumGroupNames(group, (limit-1)&^groupMask, limit-1, otherName, nameChoice)
+		}
+	}
+
+	return -1
+}
+
+func (names *unames) getGroup(code rune) []uint16 {
+	groups := names.groups
+	groupMSB := uint16(code >> groupShift)
+
+	start := 0
+	groupCount := int(groups[0])
+	limit := groupCount
+	groups = groups[1:]
+
+	for start < limit-1 {
+		number := (start + limit) / 2
+		if groupMSB < groups[number*groupLength+groupMsb] {
+			limit = number
+		} else {
+			start = number
+		}
+	}
+
+	return groups[start*groupLength : (groupCount-start)*groupLength]
+}
+
+func (names *unames) getGroupOffset(group []uint16) uint32 {
+	return (uint32(group[groupOffsetHigh]) << 16) | uint32(group[groupOffsetLow])
+}
+
+func (names *unames) enumGroupNames(group []uint16, start, end rune, otherName string, choice NameChoice) rune {
+	var offsets [linesPerGroup + 2]uint16
+	var lengths [linesPerGroup + 2]uint16
+
+	s := names.getGroupName(group)
+	s = expandGroupLengths(s, offsets[:0], lengths[:0])
+
+	for start < end {
+		name := s[offsets[start&groupMask]:]
+		nameLen := lengths[start&groupMask]
+		if names.compareName(name[:nameLen], choice, otherName) {
+			return start
+		}
+		start++
+	}
+	return -1
+}
+
+func expandGroupLengths(s []uint8, offsets []uint16, lengths []uint16) []uint8 {
+	/* read the lengths of the 32 strings in this group and get each string's offset */
+	var i, offset, length uint16
+	var lengthByte uint8
+
+	/* all 32 lengths must be read to get the offset of the first group string */
+	for i < linesPerGroup {
+		lengthByte = s[0]
+		s = s[1:]
+
+		/* read even nibble - MSBs of lengthByte */
+		if length >= 12 {
+			/* double-nibble length spread across two bytes */
+			length = ((length&0x3)<<4 | uint16(lengthByte)>>4) + 12
+			lengthByte &= 0xf
+		} else if (lengthByte /* &0xf0 */) >= 0xc0 {
+			/* double-nibble length spread across this one byte */
+			length = (uint16(lengthByte) & 0x3f) + 12
+		} else {
+			/* single-nibble length in MSBs */
+			length = uint16(lengthByte) >> 4
+			lengthByte &= 0xf
+		}
+
+		offsets = append(offsets, offset)
+		lengths = append(lengths, length)
+
+		offset += length
+		i++
+
+		/* read odd nibble - LSBs of lengthByte */
+		if (lengthByte & 0xf0) == 0 {
+			/* this nibble was not consumed for a double-nibble length above */
+			length = uint16(lengthByte)
+			if length < 12 {
+				/* single-nibble length in LSBs */
+				offsets = append(offsets, offset)
+				lengths = append(lengths, length)
+
+				offset += length
+				i++
+			}
+		} else {
+			length = 0 /* prevent double-nibble detection in the next iteration */
+		}
+	}
+
+	/* now, s is at the first group string */
+	return s
+}
+
+func (names *unames) compareName(name []byte, choice NameChoice, otherName string) bool {
+	tokens := names.tokens
+
+	tokenCount := tokens[0]
+	tokens = tokens[1:]
+
+	otherNameLen := len(otherName)
+
+	for len(name) > 0 && len(otherName) > 0 {
+		c := name[0]
+		name = name[1:]
+
+		if uint16(c) >= tokenCount {
+			if c != ';' {
+				if c != otherName[0] {
+					return false
+				}
+				otherName = otherName[1:]
+			} else {
+				break
+			}
+		} else {
+			token := tokens[c]
+			if int16(token) == -2 {
+				token = tokens[int(c)<<8|int(name[0])]
+				name = name[1:]
+			}
+			if int16(token) == -1 {
+				if c != ';' {
+					if c != otherName[0] {
+						return false
+					}
+					otherName = otherName[1:]
+				} else {
+					if len(otherName) == otherNameLen && choice == ExtendedCharName {
+						if ';' >= tokenCount || int16(tokens[';']) == -1 {
+							continue
+						}
+					}
+					break
+				}
+			} else {
+				tokenString := names.tokenStrings[token:]
+				for tokenString[0] != 0 && len(otherName) > 0 {
+					if tokenString[0] != otherName[0] {
+						return false
+					}
+					tokenString = tokenString[1:]
+					otherName = otherName[1:]
+				}
+			}
+		}
+	}
+
+	return len(otherName) == 0
+}
diff --git a/go/mysql/icuregex/internal/unames/unames_test.go b/go/mysql/icuregex/internal/unames/unames_test.go
new file mode 100644
index 00000000000..f15353eef8d
--- /dev/null
+++ b/go/mysql/icuregex/internal/unames/unames_test.go
@@ -0,0 +1,64 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package unames
+
+import (
+	"testing"
+)
+
+func TestCharForName(t *testing.T) {
+	var TestNames = []struct {
+		code                   rune
+		name, oldName, extName string
+	}{
+		{0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
+		{0x01a2, "LATIN CAPITAL LETTER OI", "", "LATIN CAPITAL LETTER OI"},
+		{0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "", "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK"},
+		{0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "", "TIBETAN MARK BSKA- SHOG GI MGO RGYAN"},
+		{0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401"},
+		{0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED"},
+		{0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA"},
+		{0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH"},
+		{0xd800, "", "", "<lead surrogate-D800>"},
+		{0xdc00, "", "", "<trail surrogate-DC00>"},
+		{0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS"},
+		{0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN"},
+		{0xffff, "", "", "<noncharacter-FFFF>"},
+		{0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "", "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS"},
+		{0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456"},
+	}
+
+	for _, tn := range TestNames {
+		if tn.name != "" {
+			r := CharForName(UnicodeCharName, tn.name)
+			if r != tn.code {
+				t.Errorf("CharFromName(U_UNICODE_CHAR_NAME, %q) = '%c' (U+%d), expected %c (U+%d)", tn.name, r, r, tn.code, tn.code)
+			}
+		}
+		if tn.extName != "" {
+			r := CharForName(ExtendedCharName, tn.extName)
+			if r != tn.code {
+				t.Errorf("CharFromName(U_EXTENDED_CHAR_NAME, %q) = '%c' (U+%d), expected %c (U+%d)", tn.extName, r, r, tn.code, tn.code)
+			}
+		}
+	}
+}
diff --git a/go/mysql/icuregex/internal/uprops/constants.go b/go/mysql/icuregex/internal/uprops/constants.go
new file mode 100644
index 00000000000..3cfe250599a
--- /dev/null
+++ b/go/mysql/icuregex/internal/uprops/constants.go
@@ -0,0 +1,613 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uprops
+
+type Property int32
+
+const (
+	/*
+	 * Note: UProperty constants are parsed by preparseucd.py.
+	 * It matches lines like
+	 *     UCHAR_<Unicode property name>=<integer>,
+	 */
+
+	/*  Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
+	    debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
+	    rather than UCHAR_BINARY_START.  Likewise for other *_START
+	    identifiers. */
+
+	/** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
+	  Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
+	UCharAlphabetic Property = 0
+	/** First constant for binary Unicode properties. @stable ICU 2.1 */
+	UCharBinaryStart = UCharAlphabetic
+	/** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
+	UCharASCIIHexDigit Property = 1
+	/** Binary property Bidi_Control.
+	  Format controls which have specific functions
+	  in the Bidi Algorithm. @stable ICU 2.1 */
+	UCharBidiControl Property = 2
+	/** Binary property Bidi_Mirrored.
+	  Characters that may change display in RTL text.
+	  Same as u_isMirrored.
+	  See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
+	UCharBidiMirrored Property = 3
+	/** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
+	UCharDash Property = 4
+	/** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
+	  Ignorable in most processing.
+	  <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
+	UCharDefaultIgnorableCodePoint Property = 5
+	/** Binary property Deprecated (new in Unicode 3.2).
+	  The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
+	UCharDeprecated Property = 6
+	/** Binary property Diacritic. Characters that linguistically modify
+	  the meaning of another character to which they apply. @stable ICU 2.1 */
+	UCharDiacritic Property = 7
+	/** Binary property Extender.
+	  Extend the value or shape of a preceding alphabetic character,
+	  e.g., length and iteration marks. @stable ICU 2.1 */
+	UCharExtender Property = 8
+	/** Binary property Full_Composition_Exclusion.
+	  CompositionExclusions.txt+Singleton Decompositions+
+	  Non-Starter Decompositions. @stable ICU 2.1 */
+	UCharFullCompositionExclusion Property = 9
+	/** Binary property Grapheme_Base (new in Unicode 3.2).
+	  For programmatic determination of grapheme cluster boundaries.
+	  [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
+	UCharGraphemeBase Property = 10
+	/** Binary property Grapheme_Extend (new in Unicode 3.2).
+	  For programmatic determination of grapheme cluster boundaries.
+	  Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
+	UCharGraphemeExtend Property = 11
+	/** Binary property Grapheme_Link (new in Unicode 3.2).
+	  For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
+	UCharGraphemeLink Property = 12
+	/** Binary property Hex_Digit.
+	  Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
+	UCharHexDigit Property = 13
+	/** Binary property Hyphen. Dashes used to mark connections
+	  between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
+	UCharHyphen Property = 14
+	/** Binary property ID_Continue.
+	  Characters that can continue an identifier.
+	  DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
+	  ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
+	UCharIDContinue Property = 15
+	/** Binary property ID_Start.
+	  Characters that can start an identifier.
+	  Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
+	UCharIDStart Property = 16
+	/** Binary property Ideographic.
+	  CJKV ideographs. @stable ICU 2.1 */
+	UCharIdeographic Property = 17
+	/** Binary property IDS_Binary_Operator (new in Unicode 3.2).
+	  For programmatic determination of
+	  Ideographic Description Sequences. @stable ICU 2.1 */
+	UCharIdsBinaryOperator Property = 18
+	/** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
+	  For programmatic determination of
+	  Ideographic Description Sequences. @stable ICU 2.1 */
+	UCharIdsTrinaryOperator Property = 19
+	/** Binary property Join_Control.
+	  Format controls for cursive joining and ligation. @stable ICU 2.1 */
+	UCharJoinControl Property = 20
+	/** Binary property Logical_Order_Exception (new in Unicode 3.2).
+	  Characters that do not use logical order and
+	  require special handling in most processing. @stable ICU 2.1 */
+	UCharLogicalOrderException Property = 21
+	/** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
+	  Ll+Other_Lowercase @stable ICU 2.1 */
+	UCharLowercase Property = 22
+	/** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
+	UCharMath Property = 23
+	/** Binary property Noncharacter_Code_Point.
+	  Code points that are explicitly defined as illegal
+	  for the encoding of characters. @stable ICU 2.1 */
+	UCharNoncharacterCodePoint Property = 24
+	/** Binary property Quotation_Mark. @stable ICU 2.1 */
+	UCharQuotationMark Property = 25
+	/** Binary property Radical (new in Unicode 3.2).
+	  For programmatic determination of
+	  Ideographic Description Sequences. @stable ICU 2.1 */
+	UCharRadical Property = 26
+	/** Binary property Soft_Dotted (new in Unicode 3.2).
+	  Characters with a "soft dot", like i or j.
+	  An accent placed on these characters causes
+	  the dot to disappear. @stable ICU 2.1 */
+	UCharSoftDotted Property = 27
+	/** Binary property Terminal_Punctuation.
+	  Punctuation characters that generally mark
+	  the end of textual units. @stable ICU 2.1 */
+	UCharTerminalPunctuation Property = 28
+	/** Binary property Unified_Ideograph (new in Unicode 3.2).
+	  For programmatic determination of
+	  Ideographic Description Sequences. @stable ICU 2.1 */
+	UCharUnifiedIdeograph Property = 29
+	/** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
+	  Lu+Other_Uppercase @stable ICU 2.1 */
+	UCharUppercase Property = 30
+	/** Binary property White_Space.
+	  Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
+	  Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
+	UCharWhiteSpace Property = 31
+	/** Binary property XID_Continue.
+	  ID_Continue modified to allow closure under
+	  normalization forms NFKC and NFKD. @stable ICU 2.1 */
+	UCharXidContinue Property = 32
+	/** Binary property XID_Start. ID_Start modified to allow
+	  closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
+	UCharXidStart Property = 33
+	/** Binary property Case_Sensitive. Either the source of a case
+	  mapping or _in_ the target of a case mapping. Not the same as
+	  the general category Cased_Letter. @stable ICU 2.6 */
+	UCharCaseSensitive Property = 34
+	/** Binary property STerm (new in Unicode 4.0.1).
+	  Sentence Terminal. Used in UAX #29: Text Boundaries
+	  (http://www.unicode.org/reports/tr29/)
+	  @stable ICU 3.0 */
+	UCharSTerm Property = 35
+	/** Binary property Variation_Selector (new in Unicode 4.0.1).
+	  Indicates all those characters that qualify as Variation Selectors.
+	  For details on the behavior of these characters,
+	  see StandardizedVariants.html and 15.6 Variation Selectors.
+	  @stable ICU 3.0 */
+	UCharVariationSelector Property = 36
+	/** Binary property NFD_Inert.
+	  ICU-specific property for characters that are inert under NFD,
+	  i.e., they do not interact with adjacent characters.
+	  See the documentation for the Normalizer2 class and the
+	  Normalizer2::isInert() method.
+	  @stable ICU 3.0 */
+	UCharNfdInert Property = 37
+	/** Binary property NFKD_Inert.
+	  ICU-specific property for characters that are inert under NFKD,
+	  i.e., they do not interact with adjacent characters.
+	  See the documentation for the Normalizer2 class and the
+	  Normalizer2::isInert() method.
+	  @stable ICU 3.0 */
+	UCharNfkdInert Property = 38
+	/** Binary property NFC_Inert.
+	  ICU-specific property for characters that are inert under NFC,
+	  i.e., they do not interact with adjacent characters.
+	  See the documentation for the Normalizer2 class and the
+	  Normalizer2::isInert() method.
+	  @stable ICU 3.0 */
+	UCharNfcInert Property = 39
+	/** Binary property NFKC_Inert.
+	  ICU-specific property for characters that are inert under NFKC,
+	  i.e., they do not interact with adjacent characters.
+	  See the documentation for the Normalizer2 class and the
+	  Normalizer2::isInert() method.
+	  @stable ICU 3.0 */
+	UCharNfkcInert Property = 40
+	/** Binary Property Segment_Starter.
+	  ICU-specific property for characters that are starters in terms of
+	  Unicode normalization and combining character sequences.
+	  They have ccc=0 and do not occur in non-initial position of the
+	  canonical decomposition of any character
+	  (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).
+	  ICU uses this property for segmenting a string for generating a set of
+	  canonically equivalent strings, e.g. for canonical closure while
+	  processing collation tailoring rules.
+	  @stable ICU 3.0 */
+	UCharSegmentStarter Property = 41
+	/** Binary property Pattern_Syntax (new in Unicode 4.1).
+	  See UAX #31 Identifier and Pattern Syntax
+	  (http://www.unicode.org/reports/tr31/)
+	  @stable ICU 3.4 */
+	UCharPatternSyntax Property = 42
+	/** Binary property Pattern_White_Space (new in Unicode 4.1).
+	  See UAX #31 Identifier and Pattern Syntax
+	  (http://www.unicode.org/reports/tr31/)
+	  @stable ICU 3.4 */
+	UCharPatternWhiteSpace Property = 43
+	/** Binary property alnum (a C/POSIX character class).
+	  Implemented according to the UTS #18 Annex C Standard Recommendation.
+	  See the uchar.h file documentation.
+	  @stable ICU 3.4 */
+	UCharPosixAlnum Property = 44
+	/** Binary property blank (a C/POSIX character class).
+	  Implemented according to the UTS #18 Annex C Standard Recommendation.
+	  See the uchar.h file documentation.
+	  @stable ICU 3.4 */
+	UCharPosixBlank Property = 45
+	/** Binary property graph (a C/POSIX character class).
+	  Implemented according to the UTS #18 Annex C Standard Recommendation.
+	  See the uchar.h file documentation.
+	  @stable ICU 3.4 */
+	UCharPosixGraph Property = 46
+	/** Binary property print (a C/POSIX character class).
+	  Implemented according to the UTS #18 Annex C Standard Recommendation.
+	  See the uchar.h file documentation.
+	  @stable ICU 3.4 */
+	UCharPosixPrint Property = 47
+	/** Binary property xdigit (a C/POSIX character class).
+	  Implemented according to the UTS #18 Annex C Standard Recommendation.
+	  See the uchar.h file documentation.
+	  @stable ICU 3.4 */
+	UCharPosixXdigit Property = 48
+	/** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */
+	UCharCased Property = 49
+	/** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */
+	UCharCaseIgnorable Property = 50
+	/** Binary property Changes_When_Lowercased. @stable ICU 4.4 */
+	UCharChangesWhenLowercased Property = 51
+	/** Binary property Changes_When_Uppercased. @stable ICU 4.4 */
+	UCharChangesWhenUppercased Property = 52
+	/** Binary property Changes_When_Titlecased. @stable ICU 4.4 */
+	UCharChangesWhenTitlecased Property = 53
+	/** Binary property Changes_When_Casefolded. @stable ICU 4.4 */
+	UCharChangesWhenCasefolded Property = 54
+	/** Binary property Changes_When_Casemapped. @stable ICU 4.4 */
+	UCharChangesWhenCasemapped Property = 55
+	/** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
+	UCharChangesWhenNfkcCasefolded Property = 56
+	/**
+	 * Binary property Emoji.
+	 * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+	 *
+	 * @stable ICU 57
+	 */
+	UCharEmoji Property = 57
+	/**
+	 * Binary property Emoji_Presentation.
+	 * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+	 *
+	 * @stable ICU 57
+	 */
+	UCharEmojiPresentation Property = 58
+	/**
+	 * Binary property Emoji_Modifier.
+	 * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+	 *
+	 * @stable ICU 57
+	 */
+	UCharEmojiModifier Property = 59
+	/**
+	 * Binary property Emoji_Modifier_Base.
+	 * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+	 *
+	 * @stable ICU 57
+	 */
+	UCharEmojiModifierBase Property = 60
+	/**
+	 * Binary property Emoji_Component.
+	 * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+	 *
+	 * @stable ICU 60
+	 */
+	UCharEmojiComponent Property = 61
+	/**
+	 * Binary property Regional_Indicator.
+	 * @stable ICU 60
+	 */
+	UCharRegionalIndicator Property = 62
+	/**
+	 * Binary property Prepended_Concatenation_Mark.
+	 * @stable ICU 60
+	 */
+	UCharPrependedConcatenationMark Property = 63
+	/**
+	 * Binary property Extended_Pictographic.
+	 * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+	 *
+	 * @stable ICU 62
+	 */
+	UCharExtendedPictographic Property = 64
+
+	/** Enumerated property Bidi_Class.
+	  Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
+	UCharBidiClass Property = 0x1000
+	/** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+	UCharIntStart = UCharBidiClass
+	/** Enumerated property Block.
+	  Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
+	UCharBlock Property = 0x1001
+	/** Enumerated property Canonical_Combining_Class.
+	  Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
+	UCharCanonicalCombiningClass Property = 0x1002
+	/** Enumerated property Decomposition_Type.
+	  Returns UDecompositionType values. @stable ICU 2.2 */
+	UCharDecompositionType Property = 0x1003
+	/** Enumerated property East_Asian_Width.
+	  See http://www.unicode.org/reports/tr11/
+	  Returns UEastAsianWidth values. @stable ICU 2.2 */
+	UCharEastAsianWidth Property = 0x1004
+	/** Enumerated property General_Category.
+	  Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
+	UCharGeneralCategory Property = 0x1005
+	/** Enumerated property Joining_Group.
+	  Returns UJoiningGroup values. @stable ICU 2.2 */
+	UCharJoiningGroup Property = 0x1006
+	/** Enumerated property Joining_Type.
+	  Returns UJoiningType values. @stable ICU 2.2 */
+	UCharJoiningType Property = 0x1007
+	/** Enumerated property Line_Break.
+	  Returns ULineBreak values. @stable ICU 2.2 */
+	UCharLineBreak Property = 0x1008
+	/** Enumerated property Numeric_Type.
+	  Returns UNumericType values. @stable ICU 2.2 */
+	UCharNumericType Property = 0x1009
+	/** Enumerated property Script.
+	  Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
+	UCharScript Property = 0x100A
+	/** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
+	  Returns UHangulSyllableType values. @stable ICU 2.6 */
+	UCharHangulSyllableType Property = 0x100B
+	/** Enumerated property NFD_Quick_Check.
+	  Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+	UCharNfdQuickCheck Property = 0x100C
+	/** Enumerated property NFKD_Quick_Check.
+	  Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+	UCharNfkdQuickCheck Property = 0x100D
+	/** Enumerated property NFC_Quick_Check.
+	  Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+	UCharNfcQuickCheck Property = 0x100E
+	/** Enumerated property NFKC_Quick_Check.
+	  Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+	UCharNfkcQuickCheck Property = 0x100F
+	/** Enumerated property Lead_Canonical_Combining_Class.
+	  ICU-specific property for the ccc of the first code point
+	  of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
+	  Useful for checking for canonically ordered text;
+	  see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+	  Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+	UCharLeadCanonicalCombiningClass Property = 0x1010
+	/** Enumerated property Trail_Canonical_Combining_Class.
+	  ICU-specific property for the ccc of the last code point
+	  of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
+	  Useful for checking for canonically ordered text;
+	  see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+	  Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+	UCharTrailCanonicalCombiningClass Property = 0x1011
+	/** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
+	  Used in UAX #29: Text Boundaries
+	  (http://www.unicode.org/reports/tr29/)
+	  Returns UGraphemeClusterBreak values. @stable ICU 3.4 */
+	UCharGraphemeClusterBreak Property = 0x1012
+	/** Enumerated property Sentence_Break (new in Unicode 4.1).
+	  Used in UAX #29: Text Boundaries
+	  (http://www.unicode.org/reports/tr29/)
+	  Returns USentenceBreak values. @stable ICU 3.4 */
+	UCharSentenceBreak Property = 0x1013
+	/** Enumerated property Word_Break (new in Unicode 4.1).
+	  Used in UAX #29: Text Boundaries
+	  (http://www.unicode.org/reports/tr29/)
+	  Returns UWordBreakValues values. @stable ICU 3.4 */
+	UCharWordBreak Property = 0x1014
+	/** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+	  Used in UAX #9: Unicode Bidirectional Algorithm
+	  (http://www.unicode.org/reports/tr9/)
+	  Returns UBidiPairedBracketType values. @stable ICU 52 */
+	UCharBidiPairedBracketType Property = 0x1015
+	/**
+	 * Enumerated property Indic_Positional_Category.
+	 * New in Unicode 6.0 as provisional property Indic_Matra_Category;
+	 * renamed and changed to informative in Unicode 8.0.
+	 * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt
+	 * @stable ICU 63
+	 */
+	UCharIndicPositionalCategory Property = 0x1016
+	/**
+	 * Enumerated property Indic_Syllabic_Category.
+	 * New in Unicode 6.0 as provisional; informative since Unicode 8.0.
+	 * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt
+	 * @stable ICU 63
+	 */
+	UCharIndicSyllableCategory Property = 0x1017
+	/**
+	 * Enumerated property Vertical_Orientation.
+	 * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/).
+	 * New as a UCD property in Unicode 10.0.
+	 * @stable ICU 63
+	 */
+	UCharVerticalOrientation Property = 0x1018
+
+	/** Bitmask property General_Category_Mask.
+	  This is the General_Category property returned as a bit mask.
+	  When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
+	  returns bit masks for UCharCategory values where exactly one bit is set.
+	  When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
+	  a multi-bit mask is used for sets of categories like "Letters".
+	  Mask values should be cast to uint32_t.
+	  @stable ICU 2.4 */
+	UCharGeneralCategoryMask Property = 0x2000
+	/** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
+	UCharMaskStart = UCharGeneralCategoryMask
+	/** Double property Numeric_Value.
+	  Corresponds to u_getNumericValue. @stable ICU 2.4 */
+	UCharNumericValue Property = 0x3000
+	/** First constant for double Unicode properties. @stable ICU 2.4 */
+	UCharDoubleStart = UCharNumericValue
+	/** String property Age.
+	  Corresponds to u_charAge. @stable ICU 2.4 */
+	UCharAge Property = 0x4000
+	/** First constant for string Unicode properties. @stable ICU 2.4 */
+	UCharStringStart = UCharAge
+	/** String property Bidi_Mirroring_Glyph.
+	  Corresponds to u_charMirror. @stable ICU 2.4 */
+	UCharBidiMirroringGlyph Property = 0x4001
+	/** String property Case_Folding.
+	  Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
+	UCharCaseFolding Property = 0x4002
+	/** String property Lowercase_Mapping.
+	  Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
+	UCharLowercaseMapping Property = 0x4004
+	/** String property Name.
+	  Corresponds to u_charName. @stable ICU 2.4 */
+	UCharName Property = 0x4005
+	/** String property Simple_Case_Folding.
+	  Corresponds to u_foldCase. @stable ICU 2.4 */
+	UCharSimpleCaseFolding Property = 0x4006
+	/** String property Simple_Lowercase_Mapping.
+	  Corresponds to u_tolower. @stable ICU 2.4 */
+	UCharSimpleLowercaseMapping Property = 0x4007
+	/** String property Simple_Titlecase_Mapping.
+	  Corresponds to u_totitle. @stable ICU 2.4 */
+	UcharSimpleTitlecaseMapping Property = 0x4008
+	/** String property Simple_Uppercase_Mapping.
+	  Corresponds to u_toupper. @stable ICU 2.4 */
+	UCharSimpleUppercaseMapping Property = 0x4009
+	/** String property Titlecase_Mapping.
+	  Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
+	UCharTitlecaseMapping Property = 0x400A
+	/** String property Uppercase_Mapping.
+	  Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
+	UCharUppercaseMapping Property = 0x400C
+	/** String property Bidi_Paired_Bracket (new in Unicode 6.3).
+	  Corresponds to u_getBidiPairedBracket. @stable ICU 52 */
+	UCharBidiPairedBracket Property = 0x400D
+
+	/** Miscellaneous property Script_Extensions (new in Unicode 6.0).
+	  Some characters are commonly used in multiple scripts.
+	  For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+	  Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
+	  @stable ICU 4.6 */
+	UCharScriptExtensions Property = 0x7000
+	/** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
+	UCharOtherPropertyStart = UCharScriptExtensions
+
+	/** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
+	UCharInvalidCode Property = -1
+)
+
+const (
+	uCharBinaryLimit = 65
+	uCharIntLimit    = 0x1019
+	uCharMaskLimit   = 0x2001
+	uCharStringLimit = 0x400E
+)
+
+/*
+ * Properties in vector word 1
+ * Each bit encodes one binary property.
+ * The following constants represent the bit number, use 1<<UPROPS_XYZ.
+ * pBinary1Top<=32!
+ *
+ * Keep this list of property enums in sync with
+ * propListNames[] in icu/source/tools/genprops/props2.c!
+ *
+ * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_".
+ */
+const (
+	pWhiteSpace = iota
+	pDash
+	pHyphen
+	pQuotationMark
+	pTerminalPunctuation
+	pMath
+	pHexDigit
+	pASCIIHexDigit
+	pAlphabetic
+	pIdeographic
+	pDiacritic
+	pExtender
+	pNoncharacterCodePoint
+	pGraphemeExtend
+	pGraphemeLink
+	pIdsBinaryOperator
+	pIdsTrinaryOperator
+	pRadical
+	pUnifiedIdeograph
+	pDefaultIgnorableCodePoint
+	pDeprecated
+	pLogicalOrderException
+	pXidStart
+	pXidContinue
+	pIDStart
+	pIDContinue
+	pGraphemeBase
+	pSTerm
+	pVariationSelector
+	pPatternSyntax
+	pPatternWhiteSpace
+	pPrependedConcatenationMark
+	pBinary1Top
+)
+
+/*
+ * Properties in vector word 2
+ * Bits
+ * 31..26   http://www.unicode.org/reports/tr51/#Emoji_Properties
+ * 25..20   Line Break
+ * 19..15   Sentence Break
+ * 14..10   Word Break
+ *  9.. 5   Grapheme Cluster Break
+ *  4.. 0   Decomposition Type
+ */
+const (
+	p2ExtendedPictographic = 26 + iota
+	p2EmojiComponent
+	p2Emoji
+	p2EmojiPresentation
+	p2EmojiModifier
+	p2EmojiModifierBase
+)
+
+type propertySource int32
+
+const (
+	/** No source, not a supported property. */
+	srcNone propertySource = iota
+	/** From uchar.c/uprops.icu main trie */
+	srcChar
+	/** From uchar.c/uprops.icu properties vectors trie */
+	srcPropsvec
+	/** From unames.c/unames.icu */
+	srcNames
+	/** From ucase.c/ucase.icu */
+	srcCase
+	/** From ubidi_props.c/ubidi.icu */
+	srcBidi
+	/** From uchar.c/uprops.icu main trie as well as properties vectors trie */
+	srcCharAndPropsvec
+	/** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */
+	srcCaseAndNorm
+	/** From normalizer2impl.cpp/nfc.nrm */
+	srcNfc
+	/** From normalizer2impl.cpp/nfkc.nrm */
+	srcNfkc
+	/** From normalizer2impl.cpp/nfkc_cf.nrm */
+	srcNfkcCf
+	/** From normalizer2impl.cpp/nfc.nrm canonical iterator data */
+	srcNfcCanonIter
+	// Text layout properties.
+	srcInpc
+	srcInsc
+	srcVo
+)
+
+const (
+	scriptXMask  = 0x00f000ff
+	scriptXShift = 22
+
+	scriptHighMask  = 0x00300000
+	scriptHighShift = 12
+	maxScript       = 0x3ff
+
+	scriptLowMask = 0x000000ff
+
+	scriptXWithCommon    = 0x400000
+	scriptXWithInherited = 0x800000
+	scriptXWithOther     = 0xc00000
+)
diff --git a/go/mysql/icuregex/internal/uprops/properties.go b/go/mysql/icuregex/internal/uprops/properties.go
new file mode 100644
index 00000000000..06148cbbe18
--- /dev/null
+++ b/go/mysql/icuregex/internal/uprops/properties.go
@@ -0,0 +1,472 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uprops
+
+import (
+	"strconv"
+	"strings"
+	"sync"
+
+	"vitess.io/vitess/go/mysql/icuregex/errors"
+	"vitess.io/vitess/go/mysql/icuregex/internal/normalizer"
+	"vitess.io/vitess/go/mysql/icuregex/internal/pattern"
+	"vitess.io/vitess/go/mysql/icuregex/internal/ubidi"
+	"vitess.io/vitess/go/mysql/icuregex/internal/ucase"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uchar"
+	"vitess.io/vitess/go/mysql/icuregex/internal/ulayout"
+	"vitess.io/vitess/go/mysql/icuregex/internal/unames"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uset"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utrie"
+)
+
+var inclusionsMu sync.Mutex
+var inclusionsForSource = make(map[propertySource]*uset.UnicodeSet)
+var inclusionsForProperty = make(map[Property]*uset.UnicodeSet)
+
+func getInclusionsForSource(src propertySource) (*uset.UnicodeSet, error) {
+	if inc, ok := inclusionsForSource[src]; ok {
+		return inc, nil
+	}
+
+	u := uset.New()
+
+	switch src {
+	case srcChar:
+		uchar.AddPropertyStarts(u)
+	case srcPropsvec:
+		uchar.VecAddPropertyStarts(u)
+	case srcCharAndPropsvec:
+		uchar.AddPropertyStarts(u)
+		uchar.VecAddPropertyStarts(u)
+	case srcCaseAndNorm:
+		normalizer.Nfc().AddPropertyStarts(u)
+		ucase.AddPropertyStarts(u)
+	case srcNfc:
+		normalizer.Nfc().AddPropertyStarts(u)
+	case srcNfkc:
+		normalizer.Nfkc().AddPropertyStarts(u)
+	case srcNfkcCf:
+		return nil, errors.ErrUnsupported
+	case srcNfcCanonIter:
+		return nil, errors.ErrUnsupported
+	case srcCase:
+		ucase.AddPropertyStarts(u)
+	case srcBidi:
+		ubidi.AddPropertyStarts(u)
+	case srcInpc, srcInsc, srcVo:
+		AddULayoutPropertyStarts(src, u)
+	default:
+		return nil, errors.ErrUnsupported
+	}
+
+	inclusionsForSource[src] = u
+	return u, nil
+}
+
+func getInclusionsForPropertyLocked(prop Property) (*uset.UnicodeSet, error) {
+	if UCharIntStart <= prop && prop < uCharIntLimit {
+		return getInclusionsForIntProperty(prop)
+	}
+	return getInclusionsForSource(prop.source())
+}
+
+func getInclusionsForProperty(prop Property) (*uset.UnicodeSet, error) {
+	inclusionsMu.Lock()
+	defer inclusionsMu.Unlock()
+	return getInclusionsForPropertyLocked(prop)
+}
+
+func getInclusionsForBinaryProperty(prop Property) (*uset.UnicodeSet, error) {
+	inclusionsMu.Lock()
+	defer inclusionsMu.Unlock()
+	if inc, ok := inclusionsForProperty[prop]; ok {
+		return inc, nil
+	}
+
+	incl, err := getInclusionsForPropertyLocked(prop)
+	if err != nil {
+		return nil, err
+	}
+	set := uset.New()
+
+	numRanges := incl.RangeCount()
+	startHasProperty := rune(-1)
+
+	for i := 0; i < numRanges; i++ {
+		rangeEnd := incl.RangeEnd(i)
+		for c := incl.RangeStart(i); c <= rangeEnd; c++ {
+			if HasBinaryProperty(c, prop) {
+				if startHasProperty < 0 {
+					startHasProperty = c
+				}
+			} else if startHasProperty >= 0 {
+				set.AddRuneRange(startHasProperty, c-1)
+				startHasProperty = -1
+			}
+		}
+	}
+	if startHasProperty >= 0 {
+		set.AddRuneRange(startHasProperty, uset.MaxValue)
+	}
+
+	inclusionsForProperty[prop] = set
+	return set, nil
+}
+
+func getInclusionsForIntProperty(prop Property) (*uset.UnicodeSet, error) {
+	if inc, ok := inclusionsForProperty[prop]; ok {
+		return inc, nil
+	}
+
+	src := prop.source()
+	incl, err := getInclusionsForSource(src)
+	if err != nil {
+		return nil, err
+	}
+
+	intPropIncl := uset.New()
+	intPropIncl.AddRune(0)
+
+	numRanges := incl.RangeCount()
+	prevValue := int32(0)
+
+	for i := 0; i < numRanges; i++ {
+		rangeEnd := incl.RangeEnd(i)
+		for c := incl.RangeStart(i); c <= rangeEnd; c++ {
+			value := getIntPropertyValue(c, prop)
+			if value != prevValue {
+				intPropIncl.AddRune(c)
+				prevValue = value
+			}
+		}
+	}
+
+	inclusionsForProperty[prop] = intPropIncl
+	return intPropIncl, nil
+}
+
+func ApplyIntPropertyValue(u *uset.UnicodeSet, prop Property, value int32) error {
+	switch {
+	case prop == UCharGeneralCategoryMask:
+		inclusions, err := getInclusionsForProperty(prop)
+		if err != nil {
+			return err
+		}
+		u.ApplyFilter(inclusions, func(ch rune) bool {
+			return (uMask(uchar.CharType(ch)) & uint32(value)) != 0
+		})
+	case prop == UCharScriptExtensions:
+		inclusions, err := getInclusionsForProperty(prop)
+		if err != nil {
+			return err
+		}
+		u.ApplyFilter(inclusions, func(ch rune) bool {
+			return uscriptHasScript(ch, code(value))
+		})
+	case 0 <= prop && prop < uCharBinaryLimit:
+		if value == 0 || value == 1 {
+			set, err := getInclusionsForBinaryProperty(prop)
+			if err != nil {
+				return err
+			}
+			u.CopyFrom(set)
+			if value == 0 {
+				u.Complement()
+			}
+		} else {
+			u.Clear()
+		}
+
+	case UCharIntStart <= prop && prop < uCharIntLimit:
+		inclusions, err := getInclusionsForProperty(prop)
+		if err != nil {
+			return err
+		}
+		u.ApplyFilter(inclusions, func(ch rune) bool {
+			return getIntPropertyValue(ch, prop) == value
+		})
+	default:
+		return errors.ErrUnsupported
+	}
+	return nil
+}
+
+func mungeCharName(charname string) string {
+	out := make([]byte, 0, len(charname))
+	for _, ch := range []byte(charname) {
+		j := len(out)
+		if ch == ' ' && (j == 0 || out[j-1] == ' ') {
+			continue
+		}
+		out = append(out, ch)
+	}
+	return string(out)
+}
+
+func ApplyPropertyPattern(u *uset.UnicodeSet, pat string) error {
+	if len(pat) < 5 {
+		return errors.ErrIllegalArgument
+	}
+
+	var posix, isName, invert bool
+
+	if isPOSIXOpen(pat) {
+		posix = true
+		pat = pattern.SkipWhitespace(pat[2:])
+		if len(pat) > 0 && pat[0] == '^' {
+			pat = pat[1:]
+			invert = true
+		}
+	} else if isPerlOpen(pat) || isNameOpen(pat) {
+		c := pat[1]
+		invert = c == 'P'
+		isName = c == 'N'
+		pat = pattern.SkipWhitespace(pat[2:])
+		if len(pat) == 0 || pat[0] != '{' {
+			return errors.ErrIllegalArgument
+		}
+		pat = pat[1:]
+	} else {
+		return errors.ErrIllegalArgument
+	}
+
+	var closePos int
+	if posix {
+		closePos = strings.Index(pat, ":]")
+	} else {
+		closePos = strings.IndexByte(pat, '}')
+	}
+	if closePos < 0 {
+		return errors.ErrIllegalArgument
+	}
+
+	equals := strings.IndexByte(pat, '=')
+	var propName, valueName string
+	if equals >= 0 && equals < closePos && !isName {
+		propName = pat[:equals]
+		valueName = pat[equals+1 : closePos]
+	} else {
+		propName = pat[:closePos]
+		if isName {
+			valueName = propName
+			propName = "na"
+		}
+	}
+
+	if err := ApplyPropertyAlias(u, propName, valueName); err != nil {
+		return err
+	}
+	if invert {
+		u.Complement()
+	}
+	return nil
+}
+
+func isPOSIXOpen(pattern string) bool {
+	return pattern[0] == '[' && pattern[1] == ':'
+}
+
+func isNameOpen(pattern string) bool {
+	return pattern[0] == '\\' && pattern[1] == 'N'
+}
+
+func isPerlOpen(pattern string) bool {
+	return pattern[0] == '\\' && (pattern[1] == 'p' || pattern[1] == 'P')
+}
+
+func ApplyPropertyAlias(u *uset.UnicodeSet, prop, value string) error {
+	var p Property
+	var v int32
+	var invert bool
+
+	if len(value) > 0 {
+		p = getPropertyEnum(prop)
+		if p == -1 {
+			return errors.ErrIllegalArgument
+		}
+		if p == UCharGeneralCategory {
+			p = UCharGeneralCategoryMask
+		}
+
+		if (p >= UCharBinaryStart && p < uCharBinaryLimit) ||
+			(p >= UCharIntStart && p < uCharIntLimit) ||
+			(p >= UCharMaskStart && p < uCharMaskLimit) {
+			v = getPropertyValueEnum(p, value)
+			if v == -1 {
+				// Handle numeric CCC
+				if p == UCharCanonicalCombiningClass ||
+					p == UCharTrailCanonicalCombiningClass ||
+					p == UCharLeadCanonicalCombiningClass {
+					val, err := strconv.ParseUint(value, 10, 8)
+					if err != nil {
+						return errors.ErrIllegalArgument
+					}
+					v = int32(val)
+				} else {
+					return errors.ErrIllegalArgument
+				}
+			}
+		} else {
+			switch p {
+			case UCharNumericValue:
+				val, err := strconv.ParseFloat(value, 64)
+				if err != nil {
+					return errors.ErrIllegalArgument
+				}
+				incl, err := getInclusionsForProperty(p)
+				if err != nil {
+					return err
+				}
+				u.ApplyFilter(incl, func(ch rune) bool {
+					return uchar.NumericValue(ch) == val
+				})
+				return nil
+			case UCharName:
+				// Must munge name, since u_charFromName() does not do
+				// 'loose' matching.
+				charName := mungeCharName(value)
+				ch := unames.CharForName(unames.ExtendedCharName, charName)
+				if ch < 0 {
+					return errors.ErrIllegalArgument
+				}
+				u.Clear()
+				u.AddRune(ch)
+				return nil
+			case UCharAge:
+				// Must munge name, since u_versionFromString() does not do
+				// 'loose' matching.
+				charName := mungeCharName(value)
+				version := uchar.VersionFromString(charName)
+				incl, err := getInclusionsForProperty(p)
+				if err != nil {
+					return err
+				}
+				u.ApplyFilter(incl, func(ch rune) bool {
+					return uchar.CharAge(ch) == version
+				})
+				return nil
+			case UCharScriptExtensions:
+				v = getPropertyValueEnum(UCharScript, value)
+				if v == -1 {
+					return errors.ErrIllegalArgument
+				}
+			default:
+				// p is a non-binary, non-enumerated property that we
+				// don't support (yet).
+				return errors.ErrIllegalArgument
+			}
+		}
+	} else {
+		// value is empty.  Interpret as General Category, Script, or
+		// Binary property.
+		p = UCharGeneralCategoryMask
+		v = getPropertyValueEnum(p, prop)
+		if v == -1 {
+			p = UCharScript
+			v = getPropertyValueEnum(p, prop)
+			if v == -1 {
+				p = getPropertyEnum(prop)
+				if p >= UCharBinaryStart && p < uCharBinaryLimit {
+					v = 1
+				} else if 0 == comparePropertyNames("ANY", prop) {
+					u.Clear()
+					u.AddRuneRange(uset.MinValue, uset.MaxValue)
+					return nil
+				} else if 0 == comparePropertyNames("ASCII", prop) {
+					u.Clear()
+					u.AddRuneRange(0, 0x7F)
+					return nil
+				} else if 0 == comparePropertyNames("Assigned", prop) {
+					// [:Assigned:]=[:^Cn:]
+					p = UCharGeneralCategoryMask
+					v = int32(uchar.GcCnMask)
+					invert = true
+				} else {
+					return errors.ErrIllegalArgument
+				}
+			}
+		}
+	}
+
+	err := ApplyIntPropertyValue(u, p, v)
+	if err != nil {
+		return err
+	}
+	if invert {
+		u.Complement()
+	}
+	return nil
+}
+
+func AddULayoutPropertyStarts(src propertySource, u *uset.UnicodeSet) {
+	var trie *utrie.UcpTrie
+	switch src {
+	case srcInpc:
+		trie = ulayout.InpcTrie()
+	case srcInsc:
+		trie = ulayout.InscTrie()
+	case srcVo:
+		trie = ulayout.VoTrie()
+	default:
+		panic("unreachable")
+	}
+
+	// Add the start code point of each same-value range of the trie.
+	var start, end rune
+	for {
+		end, _ = trie.GetRange(start, utrie.UcpMapRangeNormal, 0, nil)
+		if end < 0 {
+			break
+		}
+		u.AddRune(start)
+		start = end + 1
+	}
+}
+
+func AddCategory(u *uset.UnicodeSet, mask uint32) error {
+	set := uset.New()
+	err := ApplyIntPropertyValue(set, UCharGeneralCategoryMask, int32(mask))
+	if err != nil {
+		return err
+	}
+	u.AddAll(set)
+	return nil
+}
+
+func NewUnicodeSetFomPattern(pattern string, flags uset.USet) (*uset.UnicodeSet, error) {
+	u := uset.New()
+	if err := ApplyPropertyPattern(u, pattern); err != nil {
+		return nil, err
+	}
+	if flags&uset.CaseInsensitive != 0 {
+		u.CloseOver(uset.CaseInsensitive)
+	}
+	return u, nil
+}
+
+func MustNewUnicodeSetFomPattern(pattern string, flags uset.USet) *uset.UnicodeSet {
+	u, err := NewUnicodeSetFomPattern(pattern, flags)
+	if err != nil {
+		panic(err)
+	}
+	return u
+}
diff --git a/go/mysql/icuregex/internal/uprops/uprops.go b/go/mysql/icuregex/internal/uprops/uprops.go
new file mode 100644
index 00000000000..ddf0989b5d8
--- /dev/null
+++ b/go/mysql/icuregex/internal/uprops/uprops.go
@@ -0,0 +1,269 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uprops
+
+import (
+	"fmt"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/bytestrie"
+	"vitess.io/vitess/go/mysql/icuregex/internal/icudata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uchar"
+	"vitess.io/vitess/go/mysql/icuregex/internal/udata"
+)
+
+var pnames struct {
+	valueMaps []uint32
+	byteTrie  []uint8
+}
+
+const (
+	ixValueMapsOffset  = 0
+	ixByteTriesOffset  = 1
+	ixNameGroupsOffset = 2
+	ixReserved3Offset  = 3
+)
+
+func readData(bytes *udata.Bytes) error {
+	err := bytes.ReadHeader(func(info *udata.DataInfo) bool {
+		return info.DataFormat[0] == 0x70 &&
+			info.DataFormat[1] == 0x6e &&
+			info.DataFormat[2] == 0x61 &&
+			info.DataFormat[3] == 0x6d &&
+			info.FormatVersion[0] == 2
+	})
+	if err != nil {
+		return err
+	}
+
+	count := bytes.Int32() / 4
+	if count < 8 {
+		return fmt.Errorf("indexes[0] too small in ucase.icu")
+	}
+
+	indexes := make([]int32, count)
+	indexes[0] = count * 4
+
+	for i := int32(1); i < count; i++ {
+		indexes[i] = bytes.Int32()
+	}
+
+	offset := indexes[ixValueMapsOffset]
+	nextOffset := indexes[ixByteTriesOffset]
+	numInts := (nextOffset - offset) / 4
+
+	pnames.valueMaps = bytes.Uint32Slice(numInts)
+
+	offset = nextOffset
+	nextOffset = indexes[ixNameGroupsOffset]
+	numBytes := nextOffset - offset
+
+	pnames.byteTrie = bytes.Uint8Slice(numBytes)
+	return nil
+}
+
+func init() {
+	b := udata.NewBytes(icudata.PNames)
+	if err := readData(b); err != nil {
+		panic(err)
+	}
+}
+
+func (prop Property) source() propertySource {
+	if prop < UCharBinaryStart {
+		return srcNone /* undefined */
+	} else if prop < uCharBinaryLimit {
+		bprop := binProps[prop]
+		if bprop.mask != 0 {
+			return srcPropsvec
+		}
+		return bprop.column
+	} else if prop < UCharIntStart {
+		return srcNone /* undefined */
+	} else if prop < uCharIntLimit {
+		iprop := intProps[prop-UCharIntStart]
+		if iprop.mask != 0 {
+			return srcPropsvec
+		}
+		return iprop.column
+	} else if prop < UCharStringStart {
+		switch prop {
+		case UCharGeneralCategoryMask,
+			UCharNumericValue:
+			return srcChar
+
+		default:
+			return srcNone
+		}
+	} else if prop < uCharStringLimit {
+		switch prop {
+		case UCharAge:
+			return srcPropsvec
+
+		case UCharBidiMirroringGlyph:
+			return srcBidi
+
+		case UCharCaseFolding,
+			UCharLowercaseMapping,
+			UCharSimpleCaseFolding,
+			UCharSimpleLowercaseMapping,
+			UcharSimpleTitlecaseMapping,
+			UCharSimpleUppercaseMapping,
+			UCharTitlecaseMapping,
+			UCharUppercaseMapping:
+			return srcCase
+
+		/* UCHAR_ISO_COMMENT, UCHAR_UNICODE_1_NAME (deprecated) */
+		case UCharName:
+			return srcNames
+
+		default:
+			return srcNone
+		}
+	} else {
+		switch prop {
+		case UCharScriptExtensions:
+			return srcPropsvec
+		default:
+			return srcNone /* undefined */
+		}
+	}
+}
+
+func getPropertyEnum(alias string) Property {
+	return Property(getPropertyOrValueEnum(0, alias))
+}
+
+func getPropertyValueEnum(prop Property, alias string) int32 {
+	valueMapIdx := findProperty(prop)
+	if valueMapIdx == 0 {
+		return -1
+	}
+
+	valueMapIdx = int32(pnames.valueMaps[valueMapIdx+1])
+	if valueMapIdx == 0 {
+		return -1
+	}
+	// valueMapIndex is the start of the property's valueMap,
+	// where the first word is the BytesTrie offset.
+	return getPropertyOrValueEnum(int32(pnames.valueMaps[valueMapIdx]), alias)
+}
+
+func findProperty(prop Property) int32 {
+	var i = int32(1)
+	for numRanges := int32(pnames.valueMaps[0]); numRanges > 0; numRanges-- {
+		start := int32(pnames.valueMaps[i])
+		limit := int32(pnames.valueMaps[i+1])
+		i += 2
+		if int32(prop) < start {
+			break
+		}
+		if int32(prop) < limit {
+			return i + (int32(prop)-start)*2
+		}
+		i += (limit - start) * 2
+	}
+	return 0
+}
+
+func getPropertyOrValueEnum(offset int32, alias string) int32 {
+	trie := bytestrie.New(pnames.byteTrie[offset:])
+	if trie.ContainsName(alias) {
+		return trie.GetValue()
+	}
+	return -1
+}
+
+func comparePropertyNames(name1, name2 string) int {
+	next := func(s string) (byte, string) {
+		for len(s) > 0 && (s[0] == 0x2d || s[0] == 0x5f || s[0] == 0x20 || (0x09 <= s[0] && s[0] <= 0x0d)) {
+			s = s[1:]
+		}
+		if len(s) == 0 {
+			return 0, ""
+		}
+		c := s[0]
+		s = s[1:]
+		if 'A' <= c && c <= 'Z' {
+			c += 'a' - 'A'
+		}
+		return c, s
+	}
+
+	var r1, r2 byte
+	for {
+		r1, name1 = next(name1)
+		r2, name2 = next(name2)
+
+		if r1 == 0 && r2 == 0 {
+			return 0
+		}
+
+		/* Compare the lowercased characters */
+		if r1 != r2 {
+			return int(r1) - int(r2)
+		}
+	}
+}
+
+func getIntPropertyValue(c rune, which Property) int32 {
+	if which < UCharIntStart {
+		if UCharBinaryStart <= which && which < uCharBinaryLimit {
+			prop := binProps[which]
+			if prop.contains == nil {
+				return 0
+			}
+			if prop.contains(prop, c, which) {
+				return 1
+			}
+			return 0
+		}
+	} else if which < uCharIntLimit {
+		iprop := intProps[which-UCharIntStart]
+		return iprop.getValue(iprop, c, which)
+	} else if which == UCharGeneralCategoryMask {
+		return int32(uMask(uchar.CharType(c)))
+	}
+	return 0 // undefined
+}
+
+func mergeScriptCodeOrIndex(scriptX uint32) uint32 {
+	return ((scriptX & scriptHighMask) >> scriptHighShift) |
+		(scriptX & scriptLowMask)
+}
+
+func script(c rune) int32 {
+	if c > 0x10ffff {
+		return -1
+	}
+	scriptX := uchar.GetUnicodeProperties(c, 0) & scriptXMask
+	codeOrIndex := mergeScriptCodeOrIndex(scriptX)
+
+	if scriptX < scriptXWithCommon {
+		return int32(codeOrIndex)
+	} else if scriptX < scriptXWithInherited {
+		return 0
+	} else if scriptX < scriptXWithOther {
+		return 1
+	} else {
+		return int32(uchar.ScriptExtension(codeOrIndex))
+	}
+}
diff --git a/go/mysql/icuregex/internal/uprops/uprops_binary.go b/go/mysql/icuregex/internal/uprops/uprops_binary.go
new file mode 100644
index 00000000000..855da92b3b6
--- /dev/null
+++ b/go/mysql/icuregex/internal/uprops/uprops_binary.go
@@ -0,0 +1,239 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uprops
+
+import (
+	"golang.org/x/exp/constraints"
+	"golang.org/x/exp/slices"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/normalizer"
+	"vitess.io/vitess/go/mysql/icuregex/internal/ubidi"
+	"vitess.io/vitess/go/mysql/icuregex/internal/ucase"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uchar"
+)
+
+type binaryProperty struct {
+	column   propertySource
+	mask     uint32
+	contains func(prop *binaryProperty, c rune, which Property) bool
+}
+
+func uMask[T constraints.Integer](x T) uint32 {
+	return 1 << x
+}
+
+func defaultContains(prop *binaryProperty, c rune, _ Property) bool {
+	return (uchar.GetUnicodeProperties(c, int(prop.column)) & prop.mask) != 0
+}
+
+var binProps = [uCharBinaryLimit]*binaryProperty{
+	/*
+	 * column and mask values for binary properties from u_getUnicodeProperties().
+	 * Must be in order of corresponding UProperty,
+	 * and there must be exactly one entry per binary UProperty.
+	 *
+	 * Properties with mask==0 are handled in code.
+	 * For them, column is the UPropertySource value.
+	 *
+	 * See also https://unicode-org.github.io/icu/userguide/strings/properties.html
+	 */
+	{1, uMask(pAlphabetic), defaultContains},
+	{1, uMask(pASCIIHexDigit), defaultContains},
+	{srcBidi, 0, isBidiControl},
+	{srcBidi, 0, isMirrored},
+	{1, uMask(pDash), defaultContains},
+	{1, uMask(pDefaultIgnorableCodePoint), defaultContains},
+	{1, uMask(pDeprecated), defaultContains},
+	{1, uMask(pDiacritic), defaultContains},
+	{1, uMask(pExtender), defaultContains},
+	{srcNfc, 0, hasFullCompositionExclusion},
+	{1, uMask(pGraphemeBase), defaultContains},
+	{1, uMask(pGraphemeExtend), defaultContains},
+	{1, uMask(pGraphemeLink), defaultContains},
+	{1, uMask(pHexDigit), defaultContains},
+	{1, uMask(pHyphen), defaultContains},
+	{1, uMask(pIDContinue), defaultContains},
+	{1, uMask(pIDStart), defaultContains},
+	{1, uMask(pIdeographic), defaultContains},
+	{1, uMask(pIdsBinaryOperator), defaultContains},
+	{1, uMask(pIdsTrinaryOperator), defaultContains},
+	{srcBidi, 0, isJoinControl},
+	{1, uMask(pLogicalOrderException), defaultContains},
+	{srcCase, 0, caseBinaryPropertyContains}, // UCHAR_LOWERCASE
+	{1, uMask(pMath), defaultContains},
+	{1, uMask(pNoncharacterCodePoint), defaultContains},
+	{1, uMask(pQuotationMark), defaultContains},
+	{1, uMask(pRadical), defaultContains},
+	{srcCase, 0, caseBinaryPropertyContains}, // UCHAR_SOFT_DOTTED
+	{1, uMask(pTerminalPunctuation), defaultContains},
+	{1, uMask(pUnifiedIdeograph), defaultContains},
+	{srcCase, 0, caseBinaryPropertyContains}, // UCHAR_UPPERCASE
+	{1, uMask(pWhiteSpace), defaultContains},
+	{1, uMask(pXidContinue), defaultContains},
+	{1, uMask(pXidStart), defaultContains},
+	{srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CASE_SENSITIVE
+	{1, uMask(pSTerm), defaultContains},
+	{1, uMask(pVariationSelector), defaultContains},
+	{srcNfc, 0, isNormInert},  // UCHAR_NFD_INERT
+	{srcNfkc, 0, isNormInert}, // UCHAR_NFKD_INERT
+	{srcNfc, 0, isNormInert},  // UCHAR_NFC_INERT
+	{srcNfkc, 0, isNormInert}, // UCHAR_NFKC_INERT
+	{srcNfcCanonIter, 0, nil}, // Segment_Starter is currently unsupported
+	{1, uMask(pPatternSyntax), defaultContains},
+	{1, uMask(pPatternWhiteSpace), defaultContains},
+	{srcCharAndPropsvec, 0, isPOSIXAlnum},
+	{srcChar, 0, isPOSIXBlank},
+	{srcChar, 0, isPOSIXGraph},
+	{srcChar, 0, isPOSIXPrint},
+	{srcChar, 0, isPOSIXXdigit},
+	{srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CASED
+	{srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CASE_IGNORABLE
+	{srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CHANGES_WHEN_LOWERCASED
+	{srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CHANGES_WHEN_UPPERCASED
+	{srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CHANGES_WHEN_TITLECASED
+	{srcCaseAndNorm, 0, changesWhenCasefolded},
+	{srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CHANGES_WHEN_CASEMAPPED
+	{srcNfkcCf, 0, nil},                      // Changes_When_NFKC_Casefolded is currently unsupported
+	{2, uMask(p2Emoji), defaultContains},
+	{2, uMask(p2EmojiPresentation), defaultContains},
+	{2, uMask(p2EmojiModifier), defaultContains},
+	{2, uMask(p2EmojiModifierBase), defaultContains},
+	{2, uMask(p2EmojiComponent), defaultContains},
+	{2, 0, isRegionalIndicator},
+	{1, uMask(pPrependedConcatenationMark), defaultContains},
+	{2, uMask(p2ExtendedPictographic), defaultContains},
+}
+
+func isBidiControl(_ *binaryProperty, c rune, _ Property) bool {
+	return ubidi.IsBidiControl(c)
+}
+
+func isMirrored(_ *binaryProperty, c rune, _ Property) bool {
+	return ubidi.IsMirrored(c)
+}
+
+func isRegionalIndicator(_ *binaryProperty, c rune, _ Property) bool {
+	return 0x1F1E6 <= c && c <= 0x1F1FF
+}
+
+func changesWhenCasefolded(_ *binaryProperty, c rune, _ Property) bool {
+	if c < 0 {
+		return false
+	}
+
+	nfd := normalizer.Nfc().Decompose(c)
+	if nfd == nil {
+		nfd = []rune{c}
+	}
+	folded := ucase.FoldRunes(nfd)
+	return !slices.Equal(nfd, folded)
+}
+
+func isPOSIXXdigit(_ *binaryProperty, c rune, _ Property) bool {
+	return uchar.IsXDigit(c)
+}
+
+func isPOSIXPrint(_ *binaryProperty, c rune, _ Property) bool {
+	return uchar.IsPOSIXPrint(c)
+}
+
+func isPOSIXGraph(_ *binaryProperty, c rune, _ Property) bool {
+	return uchar.IsGraphPOSIX(c)
+}
+
+func isPOSIXBlank(_ *binaryProperty, c rune, _ Property) bool {
+	return uchar.IsBlank(c)
+}
+
+func isPOSIXAlnum(_ *binaryProperty, c rune, _ Property) bool {
+	return (uchar.GetUnicodeProperties(c, 1)&uMask(pAlphabetic)) != 0 || uchar.IsDigit(c)
+}
+
+func isJoinControl(_ *binaryProperty, c rune, _ Property) bool {
+	return ubidi.IsJoinControl(c)
+}
+
+func hasFullCompositionExclusion(_ *binaryProperty, c rune, _ Property) bool {
+	impl := normalizer.Nfc()
+	return impl.IsCompNo(c)
+}
+
+func caseBinaryPropertyContains(_ *binaryProperty, c rune, which Property) bool {
+	return HasBinaryPropertyUcase(c, which)
+}
+
+func HasBinaryPropertyUcase(c rune, which Property) bool {
+	/* case mapping properties */
+	switch which {
+	case UCharLowercase:
+		return ucase.Lower == ucase.GetType(c)
+	case UCharUppercase:
+		return ucase.Upper == ucase.GetType(c)
+	case UCharSoftDotted:
+		return ucase.IsSoftDotted(c)
+	case UCharCaseSensitive:
+		return ucase.IsCaseSensitive(c)
+	case UCharCased:
+		return ucase.None != ucase.GetType(c)
+	case UCharCaseIgnorable:
+		return (ucase.GetTypeOrIgnorable(c) >> 2) != 0
+	/*
+	 * Note: The following Changes_When_Xyz are defined as testing whether
+	 * the NFD form of the input changes when Xyz-case-mapped.
+	 * However, this simpler implementation of these properties,
+	 * ignoring NFD, passes the tests.
+	 * The implementation needs to be changed if the tests start failing.
+	 * When that happens, optimizations should be used to work with the
+	 * per-single-code point ucase_toFullXyz() functions unless
+	 * the NFD form has more than one code point,
+	 * and the property starts set needs to be the union of the
+	 * start sets for normalization and case mappings.
+	 */
+	case UCharChangesWhenLowercased:
+		return ucase.ToFullLower(c) >= 0
+	case UCharChangesWhenUppercased:
+		return ucase.ToFullUpper(c) >= 0
+	case UCharChangesWhenTitlecased:
+		return ucase.ToFullTitle(c) >= 0
+	/* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
+	case UCharChangesWhenCasemapped:
+		return ucase.ToFullLower(c) >= 0 || ucase.ToFullUpper(c) >= 0 || ucase.ToFullTitle(c) >= 0
+	default:
+		return false
+	}
+}
+
+func isNormInert(_ *binaryProperty, c rune, which Property) bool {
+	mode := normalizer.Mode(int32(which) - int32(UCharNfdInert) + int32(normalizer.NormNfd))
+	return normalizer.IsInert(c, mode)
+}
+
+func HasBinaryProperty(c rune, which Property) bool {
+	if which < UCharBinaryStart || uCharBinaryLimit <= which {
+		return false
+	}
+	prop := binProps[which]
+	if prop.contains == nil {
+		return false
+	}
+	return prop.contains(prop, c, which)
+}
diff --git a/go/mysql/icuregex/internal/uprops/uprops_int.go b/go/mysql/icuregex/internal/uprops/uprops_int.go
new file mode 100644
index 00000000000..3e62d31184f
--- /dev/null
+++ b/go/mysql/icuregex/internal/uprops/uprops_int.go
@@ -0,0 +1,265 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uprops
+
+import (
+	"vitess.io/vitess/go/mysql/icuregex/internal/normalizer"
+	"vitess.io/vitess/go/mysql/icuregex/internal/ubidi"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uchar"
+	"vitess.io/vitess/go/mysql/icuregex/internal/ulayout"
+)
+
+type intPropertyGetValue func(prop *intProperty, c rune, which Property) int32
+
+type intProperty struct {
+	column   propertySource
+	mask     uint32
+	shift    int32
+	getValue intPropertyGetValue
+}
+
+const (
+	blockMask  = 0x0001ff00
+	blockShift = 8
+
+	eaMask  = 0x000e0000
+	eaShift = 17
+
+	lbMask  = 0x03f00000
+	lbShift = 20
+
+	sbMask  = 0x000f8000
+	sbShift = 15
+
+	wbMask  = 0x00007c00
+	wbShift = 10
+
+	gcbMask  = 0x000003e0
+	gcbShift = 5
+
+	dtMask = 0x0000001f
+)
+
+type numericType int32
+
+/**
+ * Numeric Type constants.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @stable ICU 2.2
+ */
+const (
+	/*
+	 * Note: UNumericType constants are parsed by preparseucd.py.
+	 * It matches lines like
+	 *     U_NT_<Unicode Numeric_Type value name>
+	 */
+
+	ntNone    numericType = iota /*[None]*/
+	ntDecimal                    /*[de]*/
+	ntDigit                      /*[di]*/
+	ntNumeric                    /*[nu]*/
+	/**
+	 * One more than the highest normal UNumericType value.
+	 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE).
+	 *
+	 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+	 */
+	ntCount
+)
+
+/**
+ * Hangul Syllable Type constants.
+ *
+ * @see UCHAR_HANGUL_SYLLABLE_TYPE
+ * @stable ICU 2.6
+ */
+
+type hangunSyllableType int32
+
+const (
+	/*
+	 * Note: UHangulSyllableType constants are parsed by preparseucd.py.
+	 * It matches lines like
+	 *     U_HST_<Unicode Hangul_Syllable_Type value name>
+	 */
+
+	hstNotApplicable hangunSyllableType = iota /*[NA]*/
+	hstLeadingJamo                             /*[L]*/
+	hstVowelJamo                               /*[V]*/
+	hstTrailingJamo                            /*[T]*/
+	hstLvSyllable                              /*[LV]*/
+	hstLvtSyllable                             /*[LVT]*/
+	/**
+	 * One more than the highest normal UHangulSyllableType value.
+	 * The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE).
+	 *
+	 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+	 */
+	hstCount
+)
+
+var intProps = [uCharIntLimit - UCharIntStart]*intProperty{
+	/*
+	 * column, mask and shift values for int-value properties from u_getUnicodeProperties().
+	 * Must be in order of corresponding UProperty,
+	 * and there must be exactly one entry per int UProperty.
+	 *
+	 * Properties with mask==0 are handled in code.
+	 * For them, column is the UPropertySource value.
+	 */
+	{srcBidi, 0, 0, getBiDiClass},
+	{0, blockMask, blockShift, defaultGetValue},
+	{srcNfc, 0, 0xff, getCombiningClass},
+	{2, dtMask, 0, defaultGetValue},
+	{0, eaMask, eaShift, defaultGetValue},
+	{srcChar, 0, int32(uchar.CharCategoryCount - 1), getGeneralCategory},
+	{srcBidi, 0, 0, getJoiningGroup},
+	{srcBidi, 0, 0, getJoiningType},
+	{2, lbMask, lbShift, defaultGetValue},
+	{srcChar, 0, int32(ntCount - 1), getNumericType},
+	{srcPropsvec, 0, 0, getScript},
+	{srcPropsvec, 0, int32(hstCount - 1), getHangulSyllableType},
+	// UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
+	{srcNfc, 0, int32(normalizer.Yes), getNormQuickCheck},
+	// UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
+	{srcNfkc, 0, int32(normalizer.Yes), getNormQuickCheck},
+	// UCHAR_NFC_QUICK_CHECK: max=2=MAYBE
+	{srcNfc, 0, int32(normalizer.Maybe), getNormQuickCheck},
+	// UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE
+	{srcNfkc, 0, int32(normalizer.Maybe), getNormQuickCheck},
+	{srcNfc, 0, 0xff, getLeadCombiningClass},
+	{srcNfc, 0, 0xff, getTrailCombiningClass},
+	{2, gcbMask, gcbShift, defaultGetValue},
+	{2, sbMask, sbShift, defaultGetValue},
+	{2, wbMask, wbShift, defaultGetValue},
+	{srcBidi, 0, 0, getBiDiPairedBracketType},
+	{srcInpc, 0, 0, getInPC},
+	{srcInsc, 0, 0, getInSC},
+	{srcVo, 0, 0, getVo},
+}
+
+func getVo(_ *intProperty, c rune, _ Property) int32 {
+	return int32(ulayout.VoTrie().Get(c))
+}
+
+func getInSC(_ *intProperty, c rune, _ Property) int32 {
+	return int32(ulayout.InscTrie().Get(c))
+}
+
+func getInPC(_ *intProperty, c rune, _ Property) int32 {
+	return int32(ulayout.InpcTrie().Get(c))
+}
+
+func getBiDiPairedBracketType(_ *intProperty, c rune, _ Property) int32 {
+	return int32(ubidi.PairedBracketType(c))
+}
+
+func getTrailCombiningClass(_ *intProperty, c rune, _ Property) int32 {
+	return int32(normalizer.Nfc().GetFCD16(c) & 0xff)
+}
+
+func getLeadCombiningClass(_ *intProperty, c rune, _ Property) int32 {
+	val := int32(normalizer.Nfc().GetFCD16(c) >> 8)
+	return val
+}
+
+func getNormQuickCheck(_ *intProperty, c rune, which Property) int32 {
+	return int32(normalizer.QuickCheck(c, normalizer.Mode(int32(which)-int32(UCharNfdQuickCheck)+int32(normalizer.NormNfd))))
+}
+
+/*
+ * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
+ * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
+ */
+var gcbToHst = []hangunSyllableType{
+	hstNotApplicable, /* U_GCB_OTHER */
+	hstNotApplicable, /* U_GCB_CONTROL */
+	hstNotApplicable, /* U_GCB_CR */
+	hstNotApplicable, /* U_GCB_EXTEND */
+	hstLeadingJamo,   /* U_GCB_L */
+	hstNotApplicable, /* U_GCB_LF */
+	hstLvSyllable,    /* U_GCB_LV */
+	hstLvtSyllable,   /* U_GCB_LVT */
+	hstTrailingJamo,  /* U_GCB_T */
+	hstVowelJamo,     /* U_GCB_V */
+	/*
+	 * Omit GCB values beyond what we need for hst.
+	 * The code below checks for the array length.
+	 */
+}
+
+func getHangulSyllableType(_ *intProperty, c rune, _ Property) int32 {
+	/* see comments on gcbToHst[] above */
+	gcb := (int32(uchar.GetUnicodeProperties(c, 2)) & gcbMask) >> gcbShift
+
+	if gcb < int32(len(gcbToHst)) {
+		return int32(gcbToHst[gcb])
+	}
+	return int32(hstNotApplicable)
+}
+
+func getScript(_ *intProperty, c rune, _ Property) int32 {
+	return script(c)
+}
+
+func getNumericType(_ *intProperty, c rune, _ Property) int32 {
+	ntv := uchar.NumericTypeValue(c)
+	return int32(ntvGetType(ntv))
+}
+
+func getJoiningType(_ *intProperty, c rune, _ Property) int32 {
+	return int32(ubidi.JoinType(c))
+}
+
+func getJoiningGroup(_ *intProperty, c rune, _ Property) int32 {
+	return int32(ubidi.JoinGroup(c))
+}
+
+func getGeneralCategory(_ *intProperty, c rune, _ Property) int32 {
+	return int32(uchar.CharType(c))
+}
+
+func getCombiningClass(_ *intProperty, c rune, _ Property) int32 {
+	return int32(normalizer.Nfc().CombiningClass(c))
+}
+
+func defaultGetValue(prop *intProperty, c rune, _ Property) int32 {
+	return int32(uchar.GetUnicodeProperties(c, int(prop.column))&prop.mask) >> prop.shift
+}
+
+func getBiDiClass(_ *intProperty, c rune, _ Property) int32 {
+	return int32(ubidi.Class(c))
+}
+
+func ntvGetType(ntv uint16) numericType {
+	switch {
+	case ntv == uchar.UPropsNtvNone:
+		return ntNone
+	case ntv < uchar.UPropsNtvDigitStart:
+		return ntDecimal
+	case ntv < uchar.UPropsNtvNumericStart:
+		return ntDigit
+	default:
+		return ntNumeric
+	}
+}
diff --git a/go/mysql/icuregex/internal/uprops/uscript.go b/go/mysql/icuregex/internal/uprops/uscript.go
new file mode 100644
index 00000000000..8a4423849df
--- /dev/null
+++ b/go/mysql/icuregex/internal/uprops/uscript.go
@@ -0,0 +1,505 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uprops
+
+import "vitess.io/vitess/go/mysql/icuregex/internal/uchar"
+
+/**
+ * Constants for ISO 15924 script codes.
+ *
+ * The current set of script code constants supports at least all scripts
+ * that are encoded in the version of Unicode which ICU currently supports.
+ * The names of the constants are usually derived from the
+ * Unicode script property value aliases.
+ * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
+ * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
+ *
+ * In addition, constants for many ISO 15924 script codes
+ * are included, for use with language tags, CLDR data, and similar.
+ * Some of those codes are not used in the Unicode Character Database (UCD).
+ * For example, there are no characters that have a UCD script property value of
+ * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
+ *
+ * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
+ *
+ * Starting with ICU 55, script codes are only added when their scripts
+ * have been or will certainly be encoded in Unicode,
+ * and have been assigned Unicode script property value aliases,
+ * to ensure that their script names are stable and match the names of the constants.
+ * Script codes like Latf and Aran that are not subject to separate encoding
+ * may be added at any time.
+ *
+ * @stable ICU 2.2
+ */
+type code int32
+
+/*
+ * Note: UScriptCode constants and their ISO script code comments
+ * are parsed by preparseucd.py.
+ * It matches lines like
+ *     USCRIPT_<Unicode Script value name> = <integer>,  / * <ISO script code> * /
+ */
+
+const (
+	/** @stable ICU 2.2 */
+	invalidCode code = -1
+	/** @stable ICU 2.2 */
+	common code = 0 /* Zyyy */
+	/** @stable ICU 2.2 */
+	inherited code = 1 /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
+	/** @stable ICU 2.2 */
+	arabic code = 2 /* Arab */
+	/** @stable ICU 2.2 */
+	armenian code = 3 /* Armn */
+	/** @stable ICU 2.2 */
+	bengali code = 4 /* Beng */
+	/** @stable ICU 2.2 */
+	bopomofo code = 5 /* Bopo */
+	/** @stable ICU 2.2 */
+	cherokee code = 6 /* Cher */
+	/** @stable ICU 2.2 */
+	coptic code = 7 /* Copt */
+	/** @stable ICU 2.2 */
+	cyrillic code = 8 /* Cyrl */
+	/** @stable ICU 2.2 */
+	deseret code = 9 /* Dsrt */
+	/** @stable ICU 2.2 */
+	devanagari code = 10 /* Deva */
+	/** @stable ICU 2.2 */
+	ethiopic code = 11 /* Ethi */
+	/** @stable ICU 2.2 */
+	georgian code = 12 /* Geor */
+	/** @stable ICU 2.2 */
+	gothic code = 13 /* Goth */
+	/** @stable ICU 2.2 */
+	greek code = 14 /* Grek */
+	/** @stable ICU 2.2 */
+	gujarati code = 15 /* Gujr */
+	/** @stable ICU 2.2 */
+	gurmukhi code = 16 /* Guru */
+	/** @stable ICU 2.2 */
+	han code = 17 /* Hani */
+	/** @stable ICU 2.2 */
+	hangul code = 18 /* Hang */
+	/** @stable ICU 2.2 */
+	hebrew code = 19 /* Hebr */
+	/** @stable ICU 2.2 */
+	hiragana code = 20 /* Hira */
+	/** @stable ICU 2.2 */
+	kannada code = 21 /* Knda */
+	/** @stable ICU 2.2 */
+	katakana code = 22 /* Kana */
+	/** @stable ICU 2.2 */
+	khmer code = 23 /* Khmr */
+	/** @stable ICU 2.2 */
+	lao code = 24 /* Laoo */
+	/** @stable ICU 2.2 */
+	latin code = 25 /* Latn */
+	/** @stable ICU 2.2 */
+	malayalam code = 26 /* Mlym */
+	/** @stable ICU 2.2 */
+	mongolian code = 27 /* Mong */
+	/** @stable ICU 2.2 */
+	myanmar code = 28 /* Mymr */
+	/** @stable ICU 2.2 */
+	ogham code = 29 /* Ogam */
+	/** @stable ICU 2.2 */
+	oldItalic code = 30 /* Ital */
+	/** @stable ICU 2.2 */
+	oriya code = 31 /* Orya */
+	/** @stable ICU 2.2 */
+	runic code = 32 /* Runr */
+	/** @stable ICU 2.2 */
+	sinhala code = 33 /* Sinh */
+	/** @stable ICU 2.2 */
+	syriac code = 34 /* Syrc */
+	/** @stable ICU 2.2 */
+	tamil code = 35 /* Taml */
+	/** @stable ICU 2.2 */
+	telugu code = 36 /* Telu */
+	/** @stable ICU 2.2 */
+	thaana code = 37 /* Thaa */
+	/** @stable ICU 2.2 */
+	thai code = 38 /* Thai */
+	/** @stable ICU 2.2 */
+	tibetan code = 39 /* Tibt */
+	/** Canadian_Aboriginal script. @stable ICU 2.6 */
+	canadianAboriginal code = 40 /* Cans */
+	/** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
+	ucas code = canadianAboriginal
+	/** @stable ICU 2.2 */
+	yi code = 41 /* Yiii */
+	/* New scripts in Unicode 3.2 */
+	/** @stable ICU 2.2 */
+	tagalog code = 42 /* Tglg */
+	/** @stable ICU 2.2 */
+	hanunoo code = 43 /* Hano */
+	/** @stable ICU 2.2 */
+	buhid code = 44 /* Buhd */
+	/** @stable ICU 2.2 */
+	tagbanwa code = 45 /* Tagb */
+
+	/* New scripts in Unicode 4 */
+	/** @stable ICU 2.6 */
+	braille code = 46 /* Brai */
+	/** @stable ICU 2.6 */
+	cypriot code = 47 /* Cprt */
+	/** @stable ICU 2.6 */
+	limbu code = 48 /* Limb */
+	/** @stable ICU 2.6 */
+	linearB code = 49 /* Linb */
+	/** @stable ICU 2.6 */
+	osmanya code = 50 /* Osma */
+	/** @stable ICU 2.6 */
+	shavian code = 51 /* Shaw */
+	/** @stable ICU 2.6 */
+	taiLe code = 52 /* Tale */
+	/** @stable ICU 2.6 */
+	ugaratic code = 53 /* Ugar */
+
+	/** New script code in Unicode 4.0.1 @stable ICU 3.0 */
+	katakanaOrHiragana = 54 /*Hrkt */
+
+	/* New scripts in Unicode 4.1 */
+	/** @stable ICU 3.4 */
+	buginese code = 55 /* Bugi */
+	/** @stable ICU 3.4 */
+	glagolitic code = 56 /* Glag */
+	/** @stable ICU 3.4 */
+	kharoshthi code = 57 /* Khar */
+	/** @stable ICU 3.4 */
+	sylotiNagri code = 58 /* Sylo */
+	/** @stable ICU 3.4 */
+	newTaiLue code = 59 /* Talu */
+	/** @stable ICU 3.4 */
+	tifinagh code = 60 /* Tfng */
+	/** @stable ICU 3.4 */
+	oldPersian code = 61 /* Xpeo */
+
+	/* New script codes from Unicode and ISO 15924 */
+	/** @stable ICU 3.6 */
+	balinese code = 62 /* Bali */
+	/** @stable ICU 3.6 */
+	batak code = 63 /* Batk */
+	/** @stable ICU 3.6 */
+	blissymbols code = 64 /* Blis */
+	/** @stable ICU 3.6 */
+	brahmi code = 65 /* Brah */
+	/** @stable ICU 3.6 */
+	cham code = 66 /* Cham */
+	/** @stable ICU 3.6 */
+	cirth code = 67 /* Cirt */
+	/** @stable ICU 3.6 */
+	oldChurchSlavonicCyrillic code = 68 /* Cyrs */
+	/** @stable ICU 3.6 */
+	demoticEgyptian code = 69 /* Egyd */
+	/** @stable ICU 3.6 */
+	hieraticEgyptian code = 70 /* Egyh */
+	/** @stable ICU 3.6 */
+	egyptianHieroglyphs code = 71 /* Egyp */
+	/** @stable ICU 3.6 */
+	khutsuri code = 72 /* Geok */
+	/** @stable ICU 3.6 */
+	simplfiedHan code = 73 /* Hans */
+	/** @stable ICU 3.6 */
+	traditionalHan code = 74 /* Hant */
+	/** @stable ICU 3.6 */
+	pahawhHmong code = 75 /* Hmng */
+	/** @stable ICU 3.6 */
+	oldHungarian code = 76 /* Hung */
+	/** @stable ICU 3.6 */
+	harappanIndus code = 77 /* Inds */
+	/** @stable ICU 3.6 */
+	javanese code = 78 /* Java */
+	/** @stable ICU 3.6 */
+	kayahLi code = 79 /* Kali */
+	/** @stable ICU 3.6 */
+	latinFraktur code = 80 /* Latf */
+	/** @stable ICU 3.6 */
+	latinGaelic code = 81 /* Latg */
+	/** @stable ICU 3.6 */
+	lepcha code = 82 /* Lepc */
+	/** @stable ICU 3.6 */
+	linearA code = 83 /* Lina */
+	/** @stable ICU 4.6 */
+	mandaic code = 84 /* Mand */
+	/** @stable ICU 3.6 */
+	mandaean code = mandaic
+	/** @stable ICU 3.6 */
+	mayanHieroglyphs code = 85 /* Maya */
+	/** @stable ICU 4.6 */
+	meroiticHieroglyphs code = 86 /* Mero */
+	/** @stable ICU 3.6 */
+	meroitic code = meroiticHieroglyphs
+	/** @stable ICU 3.6 */
+	nko code = 87 /* Nkoo */
+	/** @stable ICU 3.6 */
+	orkhon code = 88 /* Orkh */
+	/** @stable ICU 3.6 */
+	oldPermic code = 89 /* Perm */
+	/** @stable ICU 3.6 */
+	phagsPa code = 90 /* Phag */
+	/** @stable ICU 3.6 */
+	phoenician code = 91 /* Phnx */
+	/** @stable ICU 52 */
+	miao code = 92 /* Plrd */
+	/** @stable ICU 3.6 */
+	phoneticPollard code = miao
+	/** @stable ICU 3.6 */
+	rongoRongo code = 93 /* Roro */
+	/** @stable ICU 3.6 */
+	sarati code = 94 /* Sara */
+	/** @stable ICU 3.6 */
+	extrangeloSyriac code = 95 /* Syre */
+	/** @stable ICU 3.6 */
+	westernSyriac code = 96 /* Syrj */
+	/** @stable ICU 3.6 */
+	easternSyriac code = 97 /* Syrn */
+	/** @stable ICU 3.6 */
+	tengwar code = 98 /* Teng */
+	/** @stable ICU 3.6 */
+	vai code = 99 /* Vaii */
+	/** @stable ICU 3.6 */
+	visibleSpeech code = 100 /* Visp */
+	/** @stable ICU 3.6 */
+	cuneiform code = 101 /* Xsux */
+	/** @stable ICU 3.6 */
+	unwrittenLanguages code = 102 /* Zxxx */
+	/** @stable ICU 3.6 */
+	unknown code = 103 /* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
+
+	/** @stable ICU 3.8 */
+	carian code = 104 /* Cari */
+	/** @stable ICU 3.8 */
+	japanese code = 105 /* Jpan */
+	/** @stable ICU 3.8 */
+	lanna code = 106 /* Lana */
+	/** @stable ICU 3.8 */
+	lycian code = 107 /* Lyci */
+	/** @stable ICU 3.8 */
+	lydian code = 108 /* Lydi */
+	/** @stable ICU 3.8 */
+	olChiki code = 109 /* Olck */
+	/** @stable ICU 3.8 */
+	rejang code = 110 /* Rjng */
+	/** @stable ICU 3.8 */
+	saurashtra code = 111 /* Saur */
+	/** Sutton SignWriting @stable ICU 3.8 */
+	signWriting code = 112 /* Sgnw */
+	/** @stable ICU 3.8 */
+	sundanese code = 113 /* Sund */
+	/** @stable ICU 3.8 */
+	moon code = 114 /* Moon */
+	/** @stable ICU 3.8 */
+	meiteiMayek code = 115 /* Mtei */
+
+	/** @stable ICU 4.0 */
+	imperialAramaic code = 116 /* Armi */
+	/** @stable ICU 4.0 */
+	avestan code = 117 /* Avst */
+	/** @stable ICU 4.0 */
+	chakma code = 118 /* Cakm */
+	/** @stable ICU 4.0 */
+	korean code = 119 /* Kore */
+	/** @stable ICU 4.0 */
+	kaithi code = 120 /* Kthi */
+	/** @stable ICU 4.0 */
+	manichaean code = 121 /* Mani */
+	/** @stable ICU 4.0 */
+	inscriptionalPahlavi code = 122 /* Phli */
+	/** @stable ICU 4.0 */
+	psalterPahlavi code = 123 /* Phlp */
+	/** @stable ICU 4.0 */
+	bookPahlavi code = 124 /* Phlv */
+	/** @stable ICU 4.0 */
+	inscriptionalParthian code = 125 /* Prti */
+	/** @stable ICU 4.0 */
+	samaritan code = 126 /* Samr */
+	/** @stable ICU 4.0 */
+	taiViet code = 127 /* Tavt */
+	/** @stable ICU 4.0 */
+	mathematicalNotation code = 128 /* Zmth */
+	/** @stable ICU 4.0 */
+	symbols code = 129 /* Zsym */
+
+	/** @stable ICU 4.4 */
+	bamum code = 130 /* Bamu */
+	/** @stable ICU 4.4 */
+	lisu code = 131 /* Lisu */
+	/** @stable ICU 4.4 */
+	nakhiGeba code = 132 /* Nkgb */
+	/** @stable ICU 4.4 */
+	oldSouthArabian code = 133 /* Sarb */
+
+	/** @stable ICU 4.6 */
+	bassaVah code = 134 /* Bass */
+	/** @stable ICU 54 */
+	duployan code = 135 /* Dupl */
+	/** @stable ICU 4.6 */
+	elbasan code = 136 /* Elba */
+	/** @stable ICU 4.6 */
+	grantha code = 137 /* Gran */
+	/** @stable ICU 4.6 */
+	kpelle code = 138 /* Kpel */
+	/** @stable ICU 4.6 */
+	loma code = 139 /* Loma */
+	/** Mende Kikakui @stable ICU 4.6 */
+	mende code = 140 /* Mend */
+	/** @stable ICU 4.6 */
+	meroiticCursive code = 141 /* Merc */
+	/** @stable ICU 4.6 */
+	oldNorthArabian code = 142 /* Narb */
+	/** @stable ICU 4.6 */
+	nabataean code = 143 /* Nbat */
+	/** @stable ICU 4.6 */
+	palmyrene code = 144 /* Palm */
+	/** @stable ICU 54 */
+	khudawadi code = 145 /* Sind */
+	/** @stable ICU 4.6 */
+	sindhi code = khudawadi
+	/** @stable ICU 4.6 */
+	warangCiti code = 146 /* Wara */
+
+	/** @stable ICU 4.8 */
+	afaka code = 147 /* Afak */
+	/** @stable ICU 4.8 */
+	jurchen code = 148 /* Jurc */
+	/** @stable ICU 4.8 */
+	mro code = 149 /* Mroo */
+	/** @stable ICU 4.8 */
+	nushu code = 150 /* Nshu */
+	/** @stable ICU 4.8 */
+	sharada code = 151 /* Shrd */
+	/** @stable ICU 4.8 */
+	soraSompeng code = 152 /* Sora */
+	/** @stable ICU 4.8 */
+	takri code = 153 /* Takr */
+	/** @stable ICU 4.8 */
+	tangut code = 154 /* Tang */
+	/** @stable ICU 4.8 */
+	woleai code = 155 /* Wole */
+
+	/** @stable ICU 49 */
+	anatolianHieroglyphs code = 156 /* Hluw */
+	/** @stable ICU 49 */
+	khojki code = 157 /* Khoj */
+	/** @stable ICU 49 */
+	tirhuta code = 158 /* Tirh */
+
+	/** @stable ICU 52 */
+	caucasianAlbanian code = 159 /* Aghb */
+	/** @stable ICU 52 */
+	mahajani code = 160 /* Mahj */
+
+	/** @stable ICU 54 */
+	ahom code = 161 /* Ahom */
+	/** @stable ICU 54 */
+	hatran code = 162 /* Hatr */
+	/** @stable ICU 54 */
+	modi code = 163 /* Modi */
+	/** @stable ICU 54 */
+	multani code = 164 /* Mult */
+	/** @stable ICU 54 */
+	pauCinHau code = 165 /* Pauc */
+	/** @stable ICU 54 */
+	siddham code = 166 /* Sidd */
+
+	/** @stable ICU 58 */
+	adlam code = 167 /* Adlm */
+	/** @stable ICU 58 */
+	bhaiksuki code = 168 /* Bhks */
+	/** @stable ICU 58 */
+	marchen code = 169 /* Marc */
+	/** @stable ICU 58 */
+	newa code = 170 /* Newa */
+	/** @stable ICU 58 */
+	osage code = 171 /* Osge */
+
+	/** @stable ICU 58 */
+	hanWithBopomofo code = 172 /* Hanb */
+	/** @stable ICU 58 */
+	jamo code = 173 /* Jamo */
+	/** @stable ICU 58 */
+	symbolsEmoji code = 174 /* Zsye */
+
+	/** @stable ICU 60 */
+	masaramGondi code = 175 /* Gonm */
+	/** @stable ICU 60 */
+	soyombo code = 176 /* Soyo */
+	/** @stable ICU 60 */
+	zanabazarSquare code = 177 /* Zanb */
+
+	/** @stable ICU 62 */
+	dogra code = 178 /* Dogr */
+	/** @stable ICU 62 */
+	gunjalaGondi code = 179 /* Gong */
+	/** @stable ICU 62 */
+	makasar code = 180 /* Maka */
+	/** @stable ICU 62 */
+	medefaidrin code = 181 /* Medf */
+	/** @stable ICU 62 */
+	hanifiRohingya code = 182 /* Rohg */
+	/** @stable ICU 62 */
+	sogdian code = 183 /* Sogd */
+	/** @stable ICU 62 */
+	oldSogdian code = 184 /* Sogo */
+
+	/** @stable ICU 64 */
+	elymaic code = 185 /* Elym */
+	/** @stable ICU 64 */
+	nyiakengPuachueHmong code = 186 /* Hmnp */
+	/** @stable ICU 64 */
+	nandinagari code = 187 /* Nand */
+	/** @stable ICU 64 */
+	wancho code = 188 /* Wcho */
+
+	/** @stable ICU 66 */
+	chorasmian code = 189 /* Chrs */
+	/** @stable ICU 66 */
+	divesAkuru code = 190 /* Diak */
+	/** @stable ICU 66 */
+	khitanSmallScript code = 191 /* Kits */
+	/** @stable ICU 66 */
+	yezedi code = 192 /* Yezi */
+)
+
+func uscriptHasScript(c rune, sc code) bool {
+	scriptX := uchar.GetUnicodeProperties(c, 0) & scriptXMask
+	codeOrIndex := mergeScriptCodeOrIndex(scriptX)
+	if scriptX < scriptXWithCommon {
+		return sc == code(codeOrIndex)
+	}
+
+	scx := uchar.ScriptExtensions(codeOrIndex)
+	if scriptX >= scriptXWithOther {
+		scx = uchar.ScriptExtensions(uint32(scx[1]))
+	}
+	sc32 := uint32(sc)
+	if sc32 > 0x7fff {
+		/* Guard against bogus input that would make us go past the Script_Extensions terminator. */
+		return false
+	}
+	for sc32 > uint32(scx[0]) {
+		scx = scx[1:]
+	}
+	return sc32 == uint32(scx[0]&0x7fff)
+}
diff --git a/go/mysql/icuregex/internal/uset/close.go b/go/mysql/icuregex/internal/uset/close.go
new file mode 100644
index 00000000000..bd3f9f0f7e3
--- /dev/null
+++ b/go/mysql/icuregex/internal/uset/close.go
@@ -0,0 +1,96 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uset
+
+import "vitess.io/vitess/go/mysql/icuregex/internal/ucase"
+
+type USet uint32
+
+const (
+	/**
+	 * Ignore white space within patterns unless quoted or escaped.
+	 * @stable ICU 2.4
+	 */
+	IgnoreSpace USet = 1
+
+	/**
+	 * Enable case insensitive matching.  E.g., "[ab]" with this flag
+	 * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
+	 * match all except 'a', 'A', 'b', and 'B'. This performs a full
+	 * closure over case mappings, e.g. U+017F for s.
+	 *
+	 * The resulting set is a superset of the input for the code points but
+	 * not for the strings.
+	 * It performs a case mapping closure of the code points and adds
+	 * full case folding strings for the code points, and reduces strings of
+	 * the original set to their full case folding equivalents.
+	 *
+	 * This is designed for case-insensitive matches, for example
+	 * in regular expressions. The full code point case closure allows checking of
+	 * an input character directly against the closure set.
+	 * Strings are matched by comparing the case-folded form from the closure
+	 * set with an incremental case folding of the string in question.
+	 *
+	 * The closure set will also contain single code points if the original
+	 * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
+	 * This is not necessary (that is, redundant) for the above matching method
+	 * but results in the same closure sets regardless of whether the original
+	 * set contained the code point or a string.
+	 *
+	 * @stable ICU 2.4
+	 */
+	CaseInsensitive USet = 2
+
+	/**
+	 * Enable case insensitive matching.  E.g., "[ab]" with this flag
+	 * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
+	 * match all except 'a', 'A', 'b', and 'B'. This adds the lower-,
+	 * title-, and uppercase mappings as well as the case folding
+	 * of each existing element in the set.
+	 * @stable ICU 3.2
+	 */
+	AddCaseMappings USet = 4
+)
+
+func (u *UnicodeSet) CloseOver(attribute USet) {
+	if attribute&AddCaseMappings != 0 {
+		panic("USET_ADD_CASE_MAPPINGS is unsupported")
+	}
+	if (attribute & CaseInsensitive) == 0 {
+		return
+	}
+
+	foldSet := u.Clone()
+	n := u.RangeCount()
+
+	for i := 0; i < n; i++ {
+		start := u.RangeStart(i)
+		end := u.RangeEnd(i)
+
+		// full case closure
+		for cp := start; cp <= end; cp++ {
+			ucase.AddCaseClosure(cp, foldSet)
+		}
+	}
+
+	*u = *foldSet
+}
diff --git a/go/mysql/icuregex/internal/uset/frozen.go b/go/mysql/icuregex/internal/uset/frozen.go
new file mode 100644
index 00000000000..2703a4f6975
--- /dev/null
+++ b/go/mysql/icuregex/internal/uset/frozen.go
@@ -0,0 +1,339 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uset
+
+type frozen struct {
+	// One byte 0 or 1 per Latin-1 character.
+	latin1Contains [0x100]byte
+
+	// true if contains(U+FFFD)
+	containsFFFD bool
+
+	/*
+	 * One bit per code point from U+0000..U+07FF.
+	 * The bits are organized vertically; consecutive code points
+	 * correspond to the same bit positions in consecutive table words.
+	 * With code point parts
+	 *   lead=c{10..6}
+	 *   trail=c{5..0}
+	 * it is set.contains(c)==(table7FF[trail] bit lead)
+	 *
+	 * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
+	 * for faster validity checking at runtime.
+	 */
+	table7FF [64]uint32
+
+	/*
+	 * One bit per 64 BMP code points.
+	 * The bits are organized vertically; consecutive 64-code point blocks
+	 * correspond to the same bit position in consecutive table words.
+	 * With code point parts
+	 *   lead=c{15..12}
+	 *   t1=c{11..6}
+	 * test bits (lead+16) and lead in bmpBlockBits[t1].
+	 * If the upper bit is 0, then the lower bit indicates if contains(c)
+	 * for all code points in the 64-block.
+	 * If the upper bit is 1, then the block is mixed and set.contains(c)
+	 * must be called.
+	 *
+	 * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
+	 * the result of contains(FFFD) for faster validity checking at runtime.
+	 */
+	bmpBlockBits [64]uint32
+
+	/*
+	 * Inversion list indexes for restricted binary searches in
+	 * findCodePoint(), from
+	 * findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
+	 * U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
+	 * always looked up in the bit tables.
+	 * The last pair of indexes is for finding supplementary code points.
+	 */
+	list4kStarts [18]int32
+}
+
+func freeze(list []rune) *frozen {
+	f := &frozen{}
+
+	listEnd := int32(len(list) - 1)
+
+	f.list4kStarts[0] = f.findCodePoint(list, 0x800, 0, listEnd)
+	for i := 1; i <= 0x10; i++ {
+		f.list4kStarts[i] = f.findCodePoint(list, rune(i)<<12, f.list4kStarts[i-1], listEnd)
+	}
+	f.list4kStarts[0x11] = listEnd
+	f.containsFFFD = f.containsSlow(list, 0xfffd, f.list4kStarts[0xf], f.list4kStarts[0x10])
+
+	f.initBits(list)
+	f.overrideIllegal()
+
+	return f
+}
+
+func (f *frozen) containsSlow(list []rune, c rune, lo, hi int32) bool {
+	return (f.findCodePoint(list, c, lo, hi) & 1) != 0
+}
+
+func (f *frozen) findCodePoint(list []rune, c rune, lo, hi int32) int32 {
+	/* Examples:
+	                                   findCodePoint(c)
+	   set              list[]         c=0 1 3 4 7 8
+	   ===              ==============   ===========
+	   []               [110000]         0 0 0 0 0 0
+	   [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
+	   [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
+	   [:Any:]          [0, 110000]      1 1 1 1 1 1
+	*/
+
+	// Return the smallest i such that c < list[i].  Assume
+	// list[len - 1] == HIGH and that c is legal (0..HIGH-1).
+	if c < list[lo] {
+		return lo
+	}
+	// High runner test.  c is often after the last range, so an
+	// initial check for this condition pays off.
+	if lo >= hi || c >= list[hi-1] {
+		return hi
+	}
+	// invariant: c >= list[lo]
+	// invariant: c < list[hi]
+	for {
+		i := (lo + hi) >> 1
+		if i == lo {
+			break // Found!
+		} else if c < list[i] {
+			hi = i
+		} else {
+			lo = i
+		}
+	}
+	return hi
+}
+
+func (f *frozen) set32x64bits(table *[64]uint32, start, limit int32) {
+	lead := start >> 6    // Named for UTF-8 2-byte lead byte with upper 5 bits.
+	trail := start & 0x3f // Named for UTF-8 2-byte trail byte with lower 6 bits.
+
+	// Set one bit indicating an all-one block.
+	bits := uint32(1) << lead
+	if (start + 1) == limit { // Single-character shortcut.
+		table[trail] |= bits
+		return
+	}
+
+	limitLead := limit >> 6
+	limitTrail := limit & 0x3f
+
+	if lead == limitLead {
+		// Partial vertical bit column.
+		for trail < limitTrail {
+			table[trail] |= bits
+			trail++
+		}
+	} else {
+		// Partial vertical bit column,
+		// followed by a bit rectangle,
+		// followed by another partial vertical bit column.
+		if trail > 0 {
+			for {
+				table[trail] |= bits
+				trail++
+				if trail >= 64 {
+					break
+				}
+			}
+			lead++
+		}
+		if lead < limitLead {
+			bits = ^((uint32(1) << lead) - 1)
+			if limitLead < 0x20 {
+				bits &= (uint32(1) << limitLead) - 1
+			}
+			for trail = 0; trail < 64; trail++ {
+				table[trail] |= bits
+			}
+		}
+		// limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
+		// In that case, bits=1<<limitLead is undefined but the bits value
+		// is not used because trail<limitTrail is already false.
+		if limitLead == 0x20 {
+			bits = uint32(1) << (limitLead - 1)
+		} else {
+			bits = uint32(1) << limitLead
+		}
+
+		for trail = 0; trail < limitTrail; trail++ {
+			table[trail] |= bits
+		}
+	}
+}
+
+func (f *frozen) overrideIllegal() {
+	var bits, mask uint32
+	var i int
+
+	if f.containsFFFD {
+		bits = 3 // Lead bytes 0xC0 and 0xC1.
+		for i = 0; i < 64; i++ {
+			f.table7FF[i] |= bits
+		}
+
+		bits = 1                 // Lead byte 0xE0.
+		for i = 0; i < 32; i++ { // First half of 4k block.
+			f.bmpBlockBits[i] |= bits
+		}
+
+		mask = ^(uint32(0x10001) << 0xd) // Lead byte 0xED.
+		bits = 1 << 0xd
+		for i = 32; i < 64; i++ { // Second half of 4k block.
+			f.bmpBlockBits[i] = (f.bmpBlockBits[i] & mask) | bits
+		}
+	} else {
+		mask = ^(uint32(0x10001) << 0xd) // Lead byte 0xED.
+		for i = 32; i < 64; i++ {        // Second half of 4k block.
+			f.bmpBlockBits[i] &= mask
+		}
+	}
+}
+
+func (f *frozen) initBits(list []rune) {
+	var start, limit rune
+	var listIndex int
+
+	// Set latin1Contains[].
+	for {
+		start = list[listIndex]
+		listIndex++
+
+		if listIndex < len(list) {
+			limit = list[listIndex]
+			listIndex++
+		} else {
+			limit = 0x110000
+		}
+		if start >= 0x100 {
+			break
+		}
+		for {
+			f.latin1Contains[start] = 1
+			start++
+			if start >= limit || start >= 0x100 {
+				break
+			}
+		}
+		if limit > 0x100 {
+			break
+		}
+	}
+
+	// Find the first range overlapping with (or after) 80..FF again,
+	// to include them in table7FF as well.
+	listIndex = 0
+	for {
+		start = list[listIndex]
+		listIndex++
+		if listIndex < len(list) {
+			limit = list[listIndex]
+			listIndex++
+		} else {
+			limit = 0x110000
+		}
+		if limit > 0x80 {
+			if start < 0x80 {
+				start = 0x80
+			}
+			break
+		}
+	}
+
+	// Set table7FF[].
+	for start < 0x800 {
+		var end rune
+		if limit <= 0x800 {
+			end = limit
+		} else {
+			end = 0x800
+		}
+		f.set32x64bits(&f.table7FF, start, end)
+		if limit > 0x800 {
+			start = 0x800
+			break
+		}
+
+		start = list[listIndex]
+		listIndex++
+		if listIndex < len(list) {
+			limit = list[listIndex]
+			listIndex++
+		} else {
+			limit = 0x110000
+		}
+	}
+
+	// Set bmpBlockBits[].
+	minStart := rune(0x800)
+	for start < 0x10000 {
+		if limit > 0x10000 {
+			limit = 0x10000
+		}
+
+		if start < minStart {
+			start = minStart
+		}
+		if start < limit { // Else: Another range entirely in a known mixed-value block.
+			if (start & 0x3f) != 0 {
+				// Mixed-value block of 64 code points.
+				start >>= 6
+				f.bmpBlockBits[start&0x3f] |= 0x10001 << (start >> 6)
+				start = (start + 1) << 6 // Round up to the next block boundary.
+				minStart = start         // Ignore further ranges in this block.
+			}
+			if start < limit {
+				if start < (limit &^ 0x3f) {
+					// Multiple all-ones blocks of 64 code points each.
+					f.set32x64bits(&f.bmpBlockBits, start>>6, limit>>6)
+				}
+
+				if (limit & 0x3f) != 0 {
+					// Mixed-value block of 64 code points.
+					limit >>= 6
+					f.bmpBlockBits[limit&0x3f] |= 0x10001 << (limit >> 6)
+					limit = (limit + 1) << 6 // Round up to the next block boundary.
+					minStart = limit         // Ignore further ranges in this block.
+				}
+			}
+		}
+
+		if limit == 0x10000 {
+			break
+		}
+
+		start = list[listIndex]
+		listIndex++
+		if listIndex < len(list) {
+			limit = list[listIndex]
+			listIndex++
+		} else {
+			limit = 0x110000
+		}
+	}
+}
diff --git a/go/mysql/icuregex/internal/uset/pattern.go b/go/mysql/icuregex/internal/uset/pattern.go
new file mode 100644
index 00000000000..20b44da9c6d
--- /dev/null
+++ b/go/mysql/icuregex/internal/uset/pattern.go
@@ -0,0 +1,107 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uset
+
+import (
+	"strings"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/pattern"
+)
+
+func (u *UnicodeSet) String() string {
+	var buf strings.Builder
+	u.ToPattern(&buf, true)
+	return buf.String()
+}
+
+func (u *UnicodeSet) ToPattern(w *strings.Builder, escapeUnprintable bool) {
+	w.WriteByte('[')
+
+	//  // Check against the predefined categories.  We implicitly build
+	//  // up ALL category sets the first time toPattern() is called.
+	//  for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
+	//      if (*this == getCategorySet(cat)) {
+	//          result.append(u':');
+	//          result.append(CATEGORY_NAMES, cat*2, 2);
+	//          return result.append(CATEGORY_CLOSE);
+	//      }
+	//  }
+
+	count := u.RangeCount()
+
+	// If the set contains at least 2 intervals and includes both
+	// MIN_VALUE and MAX_VALUE, then the inverse representation will
+	// be more economical.
+	if count > 1 && u.RangeStart(0) == MinValue && u.RangeEnd(count-1) == MaxValue {
+
+		// Emit the inverse
+		w.WriteByte('^')
+
+		for i := 1; i < count; i++ {
+			start := u.RangeEnd(i-1) + 1
+			end := u.RangeStart(i) - 1
+			u.appendToPattern(w, start, escapeUnprintable)
+			if start != end {
+				if (start + 1) != end {
+					w.WriteByte('-')
+				}
+				u.appendToPattern(w, end, escapeUnprintable)
+			}
+		}
+	} else {
+		// Default; emit the ranges as pairs
+		for i := 0; i < count; i++ {
+			start := u.RangeStart(i)
+			end := u.RangeEnd(i)
+			u.appendToPattern(w, start, escapeUnprintable)
+			if start != end {
+				if (start + 1) != end {
+					w.WriteByte('-')
+				}
+				u.appendToPattern(w, end, escapeUnprintable)
+			}
+		}
+	}
+
+	w.WriteByte(']')
+}
+
+func (u *UnicodeSet) appendToPattern(w *strings.Builder, c rune, escapeUnprintable bool) {
+	if escapeUnprintable && pattern.IsUnprintable(c) {
+		// Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
+		// unprintable
+		pattern.EscapeUnprintable(w, c)
+		return
+	}
+
+	// Okay to let ':' pass through
+	switch c {
+	case '[', ']', '-', '^', '&', '\\', '{', '}', ':', '$':
+		w.WriteByte('\\')
+	default:
+		// Escape whitespace
+		if pattern.IsWhitespace(c) {
+			w.WriteByte('\\')
+		}
+	}
+	w.WriteRune(c)
+}
diff --git a/go/mysql/icuregex/internal/uset/unicode_set.go b/go/mysql/icuregex/internal/uset/unicode_set.go
new file mode 100644
index 00000000000..3dba317eab2
--- /dev/null
+++ b/go/mysql/icuregex/internal/uset/unicode_set.go
@@ -0,0 +1,694 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uset
+
+import (
+	"fmt"
+
+	"golang.org/x/exp/slices"
+)
+
+// HIGH_VALUE > all valid values. 110000 for codepoints
+const unicodeSetHigh = 0x0110000
+
+// LOW <= all valid values. ZERO for codepoints
+const unicodeSetLow = 0x000000
+
+const (
+	/**
+	 * Minimum value that can be stored in a UnicodeSet.
+	 * @stable ICU 2.4
+	 */
+	MinValue = 0
+
+	/**
+	 * Maximum value that can be stored in a UnicodeSet.
+	 * @stable ICU 2.4
+	 */
+	MaxValue = 0x10ffff
+)
+
+type UnicodeSet struct {
+	list   []rune
+	buffer []rune
+	frozen *frozen
+}
+
+func New() *UnicodeSet {
+	buf := make([]rune, 1, 25)
+	buf[0] = unicodeSetHigh
+	return &UnicodeSet{list: buf}
+}
+
+func FromRunes(list []rune) *UnicodeSet {
+	return &UnicodeSet{list: list}
+}
+
+func (u *UnicodeSet) ensureBufferCapacity(c int) {
+	if cap(u.buffer) < c {
+		u.buffer = make([]rune, c)
+		return
+	}
+	u.buffer = u.buffer[:cap(u.buffer)]
+}
+
+func (u *UnicodeSet) addbuffer(other []rune, polarity int8) {
+	if u.frozen != nil {
+		panic("UnicodeSet is frozen")
+	}
+	u.ensureBufferCapacity(len(u.list) + len(other))
+
+	i := 1
+	j := 1
+	k := 0
+
+	a := u.list[0]
+	b := other[0]
+
+	for {
+		switch polarity {
+		case 0:
+			if a < b {
+				if k > 0 && a <= u.buffer[k-1] {
+					k--
+					a = max(u.list[i], u.buffer[k])
+				} else {
+					u.buffer[k] = a
+					k++
+					a = u.list[i]
+				}
+				i++
+				polarity ^= 1
+			} else if b < a {
+				if k > 0 && b <= u.buffer[k-1] {
+					k--
+					b = max(other[j], u.buffer[k])
+				} else {
+					u.buffer[k] = b
+					k++
+					b = other[j]
+				}
+				j++
+				polarity ^= 2
+			} else {
+				if a == unicodeSetHigh {
+					goto loopEnd
+				}
+				if k > 0 && a <= u.buffer[k-1] {
+					k--
+					a = max(u.list[i], u.buffer[k])
+				} else {
+					u.buffer[k] = a
+					k++
+					a = u.list[i]
+				}
+				i++
+				polarity ^= 1
+				b = other[j]
+				j++
+				polarity ^= 2
+			}
+		case 3:
+			if b <= a {
+				if a == unicodeSetHigh {
+					goto loopEnd
+				}
+				u.buffer[k] = a
+				k++
+			} else {
+				if b == unicodeSetHigh {
+					goto loopEnd
+				}
+				u.buffer[k] = b
+				k++
+			}
+			a = u.list[i]
+			i++
+			polarity ^= 1
+			b = other[j]
+			j++
+			polarity ^= 2
+		case 1:
+			if a < b {
+				u.buffer[k] = a
+				k++
+				a = u.list[i]
+				i++
+				polarity ^= 1
+			} else if b < a {
+				b = other[j]
+				j++
+				polarity ^= 2
+			} else {
+				if a == unicodeSetHigh {
+					goto loopEnd
+				}
+				a = u.list[i]
+				i++
+				polarity ^= 1
+				b = other[j]
+				j++
+				polarity ^= 2
+			}
+		case 2:
+			if b < a {
+				u.buffer[k] = b
+				k++
+				b = other[j]
+				j++
+				polarity ^= 2
+			} else if a < b {
+				a = u.list[i]
+				i++
+				polarity ^= 1
+			} else {
+				if a == unicodeSetHigh {
+					goto loopEnd
+				}
+				a = u.list[i]
+				i++
+				polarity ^= 1
+				b = other[j]
+				j++
+				polarity ^= 2
+			}
+		}
+	}
+
+loopEnd:
+	u.buffer[k] = unicodeSetHigh
+	k++
+
+	u.list, u.buffer = u.buffer[:k], u.list
+}
+
+func max(a, b rune) rune {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func pinCodePoint(c *rune) rune {
+	if *c < unicodeSetLow {
+		*c = unicodeSetLow
+	} else if *c > (unicodeSetHigh - 1) {
+		*c = unicodeSetHigh - 1
+	}
+	return *c
+}
+
+func (u *UnicodeSet) AddRune(c rune) {
+	if u.frozen != nil {
+		panic("UnicodeSet is frozen")
+	}
+
+	// find smallest i such that c < list[i]
+	// if odd, then it is IN the set
+	// if even, then it is OUT of the set
+	i := u.findCodePoint(pinCodePoint(&c))
+
+	// already in set?
+	if (i & 1) != 0 {
+		return
+	}
+
+	// HIGH is 0x110000
+	// assert(list[len-1] == HIGH);
+
+	// empty = [HIGH]
+	// [start_0, limit_0, start_1, limit_1, HIGH]
+
+	// [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
+	//                             ^
+	//                             list[i]
+
+	// i == 0 means c is before the first range
+	if c == u.list[i]-1 {
+		// c is before start of next range
+		u.list[i] = c
+		// if we touched the HIGH mark, then add a new one
+		if c == (unicodeSetHigh - 1) {
+			u.list = append(u.list, unicodeSetHigh)
+		}
+		if i > 0 && c == u.list[i-1] {
+			// collapse adjacent ranges
+
+			// [..., start_k-1, c, c, limit_k, ..., HIGH]
+			//                     ^
+			//                     list[i]
+			for k := i - 1; k < len(u.list)-2; k++ {
+				u.list[k] = u.list[k+2]
+			}
+			u.list = u.list[:len(u.list)-2]
+		}
+	} else if i > 0 && c == u.list[i-1] {
+		// c is after end of prior range
+		u.list[i-1]++
+		// no need to check for collapse here
+	} else {
+		// At this point we know the new char is not adjacent to
+		// any existing ranges, and it is not 10FFFF.
+
+		// [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
+		//                             ^
+		//                             list[i]
+
+		// [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
+		//                             ^
+		//                             list[i]
+		u.list = slices.Insert(u.list, i, c, c+1)
+	}
+}
+
+func (u *UnicodeSet) AddRuneRange(start, end rune) {
+	if pinCodePoint(&start) < pinCodePoint(&end) {
+		limit := end + 1
+		// Fast path for adding a new range after the last one.
+		// Odd list length: [..., lastStart, lastLimit, HIGH]
+		if (len(u.list) & 1) != 0 {
+			// If the list is empty, set lastLimit low enough to not be adjacent to 0.
+			var lastLimit rune
+			if len(u.list) == 1 {
+				lastLimit = -2
+			} else {
+				lastLimit = u.list[len(u.list)-2]
+			}
+			if lastLimit <= start {
+				if lastLimit == start {
+					// Extend the last range.
+					u.list[len(u.list)-2] = limit
+					if limit == unicodeSetHigh {
+						u.list = u.list[:len(u.list)-1]
+					}
+				} else {
+					u.list[len(u.list)-1] = start
+					if limit < unicodeSetHigh {
+						u.list = append(u.list, limit)
+						u.list = append(u.list, unicodeSetHigh)
+					} else { // limit == UNICODESET_HIGH
+						u.list = append(u.list, unicodeSetHigh)
+					}
+				}
+				return
+			}
+		}
+		// This is slow. Could be much faster using findCodePoint(start)
+		// and modifying the list, dealing with adjacent & overlapping ranges.
+		addRange := [3]rune{start, limit, unicodeSetHigh}
+		u.addbuffer(addRange[:], 0)
+	} else if start == end {
+		u.AddRune(start)
+	}
+}
+
+func (u *UnicodeSet) AddAll(u2 *UnicodeSet) {
+	if len(u2.list) > 0 {
+		u.addbuffer(u2.list, 0)
+	}
+}
+
+func (u *UnicodeSet) Complement() {
+	if u.frozen != nil {
+		panic("UnicodeSet is frozen")
+	}
+	if u.list[0] == unicodeSetLow {
+		copy(u.list, u.list[1:])
+		u.list = u.list[:len(u.list)-1]
+	} else {
+		u.list = slices.Insert(u.list, 0, unicodeSetLow)
+	}
+}
+
+func (u *UnicodeSet) RemoveRuneRange(start, end rune) {
+	if pinCodePoint(&start) < pinCodePoint(&end) {
+		r := [3]rune{start, end + 1, unicodeSetHigh}
+		u.retain(r[:], 2)
+	}
+}
+
+func (u *UnicodeSet) RemoveAll(c *UnicodeSet) {
+	u.retain(c.list, 2)
+}
+
+func (u *UnicodeSet) RetainAll(c *UnicodeSet) {
+	u.retain(c.list, 0)
+}
+
+func (u *UnicodeSet) retain(other []rune, polarity int8) {
+	if u.frozen != nil {
+		panic("UnicodeSet is frozen")
+	}
+
+	u.ensureBufferCapacity(len(u.list) + len(other))
+
+	i := 1
+	j := 1
+	k := 0
+
+	a := u.list[0]
+	b := other[0]
+
+	// change from xor is that we have to check overlapping pairs
+	// polarity bit 1 means a is second, bit 2 means b is.
+	for {
+		switch polarity {
+		case 0: // both first; drop the smaller
+			if a < b { // drop a
+				a = u.list[i]
+				i++
+				polarity ^= 1
+			} else if b < a { // drop b
+				b = other[j]
+				j++
+				polarity ^= 2
+			} else { // a == b, take one, drop other
+				if a == unicodeSetHigh {
+					goto loop_end
+				}
+				u.buffer[k] = a
+				k++
+				a = u.list[i]
+				i++
+				polarity ^= 1
+				b = other[j]
+				j++
+				polarity ^= 2
+			}
+		case 3: // both second; take lower if unequal
+			if a < b { // take a
+				u.buffer[k] = a
+				k++
+				a = u.list[i]
+				i++
+				polarity ^= 1
+			} else if b < a { // take b
+				u.buffer[k] = b
+				k++
+				b = other[j]
+				j++
+				polarity ^= 2
+			} else { // a == b, take one, drop other
+				if a == unicodeSetHigh {
+					goto loop_end
+				}
+				u.buffer[k] = a
+				k++
+				a = u.list[i]
+				i++
+				polarity ^= 1
+				b = other[j]
+				j++
+				polarity ^= 2
+			}
+		case 1: // a second, b first;
+			if a < b { // NO OVERLAP, drop a
+				a = u.list[i]
+				i++
+				polarity ^= 1
+			} else if b < a { // OVERLAP, take b
+				u.buffer[k] = b
+				k++
+				b = other[j]
+				j++
+				polarity ^= 2
+			} else { // a == b, drop both!
+				if a == unicodeSetHigh {
+					goto loop_end
+				}
+				a = u.list[i]
+				i++
+				polarity ^= 1
+				b = other[j]
+				j++
+				polarity ^= 2
+			}
+		case 2: // a first, b second; if a < b, overlap
+			if b < a { // no overlap, drop b
+				b = other[j]
+				j++
+				polarity ^= 2
+			} else if a < b { // OVERLAP, take a
+				u.buffer[k] = a
+				k++
+				a = u.list[i]
+				i++
+				polarity ^= 1
+			} else { // a == b, drop both!
+				if a == unicodeSetHigh {
+					goto loop_end
+				}
+				a = u.list[i]
+				i++
+				polarity ^= 1
+				b = other[j]
+				j++
+				polarity ^= 2
+			}
+		}
+	}
+
+loop_end:
+	u.buffer[k] = unicodeSetHigh // terminate
+	k++
+	u.list, u.buffer = u.buffer[:k], u.list
+}
+
+func (u *UnicodeSet) Clear() {
+	if u.frozen != nil {
+		panic("UnicodeSet is frozen")
+	}
+	u.list = u.list[:1]
+	u.list[0] = unicodeSetHigh
+}
+
+func (u *UnicodeSet) Len() (n int) {
+	count := u.RangeCount()
+	for i := 0; i < count; i++ {
+		n += int(u.RangeEnd(i)) - int(u.RangeStart(i)) + 1
+	}
+	return
+}
+
+func (u *UnicodeSet) RangeCount() int {
+	return len(u.list) / 2
+}
+
+func (u *UnicodeSet) RangeStart(idx int) rune {
+	return u.list[idx*2]
+}
+
+func (u *UnicodeSet) RangeEnd(idx int) rune {
+	return u.list[idx*2+1] - 1
+}
+
+func (u *UnicodeSet) RuneAt(idx int) rune {
+	if idx >= 0 {
+		// len2 is the largest even integer <= len, that is, it is len
+		// for even values and len-1 for odd values.  With odd values
+		// the last entry is UNICODESET_HIGH.
+		len2 := len(u.list)
+		if (len2 & 0x1) != 0 {
+			len2--
+		}
+
+		var i int
+		for i < len2 {
+			start := u.list[i]
+			count := int(u.list[i+1] - start)
+			i += 2
+			if idx < count {
+				return start + rune(idx)
+			}
+			idx -= count
+		}
+	}
+	return -1
+}
+
+func (u *UnicodeSet) ContainsRune(c rune) bool {
+	if f := u.frozen; f != nil {
+		if c < 0 {
+			return false
+		} else if c <= 0xff {
+			return f.latin1Contains[c] != 0
+		} else if c <= 0x7ff {
+			return (f.table7FF[c&0x3f] & (uint32(1) << (c >> 6))) != 0
+		} else if c < 0xd800 || (c >= 0xe000 && c <= 0xffff) {
+			lead := c >> 12
+			twoBits := (f.bmpBlockBits[(c>>6)&0x3f] >> lead) & 0x10001
+			if twoBits <= 1 {
+				// All 64 code points with the same bits 15..6
+				// are either in the set or not.
+				return twoBits != 0
+			}
+			// Look up the code point in its 4k block of code points.
+			return f.containsSlow(u.list, c, f.list4kStarts[lead], f.list4kStarts[lead+1])
+		} else if c <= 0x10ffff {
+			// surrogate or supplementary code point
+			return f.containsSlow(u.list, c, f.list4kStarts[0xd], f.list4kStarts[0x11])
+		}
+		// Out-of-range code points get FALSE, consistent with long-standing
+		// behavior of UnicodeSet::contains(c).
+		return false
+	}
+
+	if c >= unicodeSetHigh {
+		return false
+	}
+	i := u.findCodePoint(c)
+	return (i & 1) != 0
+}
+
+func (u *UnicodeSet) ContainsRuneRange(from, to rune) bool {
+	i := u.findCodePoint(from)
+	return (i&1) != 0 && to < u.list[i]
+}
+
+func (u *UnicodeSet) findCodePoint(c rune) int {
+	/* Examples:
+	                                   findCodePoint(c)
+	   set              list[]         c=0 1 3 4 7 8
+	   ===              ==============   ===========
+	   []               [110000]         0 0 0 0 0 0
+	   [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
+	   [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
+	   [:Any:]          [0, 110000]      1 1 1 1 1 1
+	*/
+
+	// Return the smallest i such that c < list[i].  Assume
+	// list[len - 1] == HIGH and that c is legal (0..HIGH-1).
+	if c < u.list[0] {
+		return 0
+	}
+
+	// High runner test.  c is often after the last range, so an
+	// initial check for this condition pays off.
+	lo := 0
+	hi := len(u.list) - 1
+	if lo >= hi || c >= u.list[hi-1] {
+		return hi
+	}
+
+	// invariant: c >= list[lo]
+	// invariant: c < list[hi]
+	for {
+		i := (lo + hi) >> 1
+		if i == lo {
+			break // Found!
+		} else if c < u.list[i] {
+			hi = i
+		} else {
+			lo = i
+		}
+	}
+	return hi
+}
+
+func (u *UnicodeSet) AddString(chars string) {
+	for _, c := range chars {
+		u.AddRune(c)
+	}
+}
+
+type Filter func(ch rune) bool
+
+func (u *UnicodeSet) ApplyFilter(inclusions *UnicodeSet, filter Filter) {
+	// Logically, walk through all Unicode characters, noting the start
+	// and end of each range for which filter.contain(c) is
+	// true.  Add each range to a set.
+	//
+	// To improve performance, use an inclusions set which
+	// encodes information about character ranges that are known
+	// to have identical properties.
+	// inclusions contains the first characters of
+	// same-value ranges for the given property.
+
+	u.Clear()
+
+	startHasProperty := rune(-1)
+	limitRange := inclusions.RangeCount()
+
+	for j := 0; j < limitRange; j++ {
+		// get current range
+		start := inclusions.RangeStart(j)
+		end := inclusions.RangeEnd(j)
+
+		// for all the code points in the range, process
+		for ch := start; ch <= end; ch++ {
+			// only add to this UnicodeSet on inflection points --
+			// where the hasProperty value changes to false
+			if filter(ch) {
+				if startHasProperty < 0 {
+					startHasProperty = ch
+				}
+			} else if startHasProperty >= 0 {
+				u.AddRuneRange(startHasProperty, ch-1)
+				startHasProperty = -1
+			}
+		}
+	}
+	if startHasProperty >= 0 {
+		u.AddRuneRange(startHasProperty, 0x10FFFF)
+	}
+}
+
+func (u *UnicodeSet) Clone() *UnicodeSet {
+	return &UnicodeSet{list: slices.Clone(u.list)}
+}
+
+func (u *UnicodeSet) IsEmpty() bool {
+	return len(u.list) == 1
+}
+
+func (u *UnicodeSet) CopyFrom(set *UnicodeSet) {
+	if u.frozen != nil {
+		panic("UnicodeSet is frozen")
+	}
+	u.list = slices.Clone(set.list)
+}
+
+func (u *UnicodeSet) Equals(other *UnicodeSet) bool {
+	return slices.Equal(u.list, other.list)
+}
+
+func (u *UnicodeSet) Freeze() *UnicodeSet {
+	u.frozen = freeze(u.list)
+	return u
+}
+
+func (u *UnicodeSet) FreezeCheck_() error {
+	if u == nil {
+		return nil
+	}
+	if u.frozen == nil {
+		return fmt.Errorf("UnicodeSet is not frozen")
+	}
+	for r := rune(0); r <= 0x10ffff; r++ {
+		want := (u.findCodePoint(r) & 1) != 0
+		got := u.ContainsRune(r)
+		if want != got {
+			return fmt.Errorf("rune '%c' (U+%04X) did not freeze", r, r)
+		}
+	}
+	return nil
+}
diff --git a/go/mysql/icuregex/internal/uset/unicode_set_test.go b/go/mysql/icuregex/internal/uset/unicode_set_test.go
new file mode 100644
index 00000000000..908abd8889d
--- /dev/null
+++ b/go/mysql/icuregex/internal/uset/unicode_set_test.go
@@ -0,0 +1,43 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package uset
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSimpleBelong(t *testing.T) {
+	ss1 := New()
+	ss1.AddString("*?+[(){}^$|\\.")
+	ss2 := New()
+	ss2.AddString("*?+[(){}^$|\\.")
+	ss2.Complement()
+	ss3 := New()
+	ss3.AddRune('*')
+	ss3.AddRune('?')
+
+	assert.True(t, ss1.ContainsRune('('))
+	assert.False(t, ss2.ContainsRune('('))
+	assert.True(t, ss3.ContainsRune('*'))
+}
diff --git a/go/mysql/icuregex/internal/utf16/helpers.go b/go/mysql/icuregex/internal/utf16/helpers.go
new file mode 100644
index 00000000000..bdf53ae731c
--- /dev/null
+++ b/go/mysql/icuregex/internal/utf16/helpers.go
@@ -0,0 +1,65 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package utf16
+
+import "unicode/utf16"
+
+func IsLead(c rune) bool {
+	return (uint32(c) & 0xfffffc00) == 0xd800
+}
+
+func IsTrail(c rune) bool {
+	return (uint32(c) & 0xfffffc00) == 0xdc00
+}
+
+/**
+ * Is this code point a surrogate (U+d800..U+dfff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+func IsSurrogate(c rune) bool {
+	return (uint32(c) & 0xfffff800) == 0xd800
+}
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+func IsSurrogateLead(c rune) bool {
+	return (uint32(c) & 0x400) == 0
+}
+
+func DecodeRune(a, b rune) rune {
+	return utf16.DecodeRune(a, b)
+}
+
+func NextUnsafe(s []uint16) (rune, []uint16) {
+	c := rune(s[0])
+	if !IsLead(c) {
+		return c, s[1:]
+	}
+	return DecodeRune(c, rune(s[1])), s[2:]
+}
diff --git a/go/mysql/icuregex/internal/utrie/ucptrie.go b/go/mysql/icuregex/internal/utrie/ucptrie.go
new file mode 100644
index 00000000000..74e4eb9b2fa
--- /dev/null
+++ b/go/mysql/icuregex/internal/utrie/ucptrie.go
@@ -0,0 +1,708 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package utrie
+
+import (
+	"errors"
+	"fmt"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/udata"
+)
+
+type UcpTrie struct {
+	index  []uint16
+	data8  []uint8
+	data16 []uint16
+	data32 []uint32
+
+	indexLength, dataLength int32
+	/** Start of the last range which ends at U+10FFFF. @internal */
+	highStart          rune
+	shifted12HighStart uint16
+
+	typ        ucpTrieType
+	valueWidth ucpTrieValueWidth
+
+	/**
+	 * Internal index-3 null block offset.
+	 * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
+	 * @internal
+	 */
+	index3NullOffset uint16
+	/**
+	 * Internal data null block offset, not shifted.
+	 * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
+	 * @internal
+	 */
+	dataNullOffset int32
+
+	nullValue uint32
+}
+
+/**
+ * Selectors for the type of a UCPTrie.
+ * Different trade-offs for size vs. speed.
+ *
+ * @see umutablecptrie_buildImmutable
+ * @see ucptrie_openFromBinary
+ * @see ucptrie_getType
+ * @stable ICU 63
+ */
+type ucpTrieType int8
+
+const (
+	/**
+	 * For ucptrie_openFromBinary() to accept any type.
+	 * ucptrie_getType() will return the actual type.
+	 * @stable ICU 63
+	 */
+	typeAny ucpTrieType = iota - 1
+	/**
+	 * Fast/simple/larger BMP data structure. Use functions and "fast" macros.
+	 * @stable ICU 63
+	 */
+	typeFast
+	/**
+	 * Small/slower BMP data structure. Use functions and "small" macros.
+	 * @stable ICU 63
+	 */
+	typeSmall
+)
+
+/**
+ * Selectors for the number of bits in a UCPTrie data value.
+ *
+ * @see umutablecptrie_buildImmutable
+ * @see ucptrie_openFromBinary
+ * @see ucptrie_getValueWidth
+ * @stable ICU 63
+ */
+type ucpTrieValueWidth int8
+
+const (
+	/**
+	 * For ucptrie_openFromBinary() to accept any data value width.
+	 * ucptrie_getValueWidth() will return the actual data value width.
+	 * @stable ICU 63
+	 */
+	valueBitsAny ucpTrieValueWidth = iota - 1
+	/**
+	 * The trie stores 16 bits per data value.
+	 * It returns them as unsigned values 0..0xffff=65535.
+	 * @stable ICU 63
+	 */
+	valueBits16
+	/**
+	 * The trie stores 32 bits per data value.
+	 * @stable ICU 63
+	 */
+	valueBits32
+	/**
+	 * The trie stores 8 bits per data value.
+	 * It returns them as unsigned values 0..0xff=255.
+	 * @stable ICU 63
+	 */
+	valueBits8
+)
+
+const ucpTrieSig = 0x54726933
+const ucpTrieOESig = 0x33697254
+
+/**
+ * Constants for use with UCPTrieHeader.options.
+ * @internal
+ */
+const (
+	optionsDataLengthMask     = 0xf000
+	optionsDataNullOffsetMask = 0xf00
+	optionsReservedMask       = 0x38
+	optionsValueBitsMask      = 7
+)
+
+const (
+	/** @internal */
+	fastShift = 6
+
+	/** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
+	fastDataBlockLength = 1 << fastShift
+
+	/** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
+	fastDataMask = fastDataBlockLength - 1
+
+	/** @internal */
+	smallMax = 0xfff
+
+	/**
+	 * Offset from dataLength (to be subtracted) for fetching the
+	 * value returned for out-of-range code points and ill-formed UTF-8/16.
+	 * @internal
+	 */
+	errorValueNegDataOffset = 1
+	/**
+	 * Offset from dataLength (to be subtracted) for fetching the
+	 * value returned for code points highStart..U+10FFFF.
+	 * @internal
+	 */
+	highValueNegDataOffset = 2
+)
+
+// Internal constants.
+const (
+	/** The length of the BMP index table. 1024=0x400 */
+	bmpIndexLength = 0x10000 >> fastShift
+
+	smallLimit       = 0x1000
+	smallIndexLength = smallLimit >> fastShift
+
+	/** Shift size for getting the index-3 table offset. */
+	ucpShift3 = 4
+
+	/** Shift size for getting the index-2 table offset. */
+	ucpShift2 = 5 + ucpShift3
+
+	/** Shift size for getting the index-1 table offset. */
+	ucpShift1 = 5 + ucpShift2
+
+	/**
+	 * Difference between two shift sizes,
+	 * for getting an index-2 offset from an index-3 offset. 5=9-4
+	 */
+	ucpShift2Min3 = ucpShift2 - ucpShift3
+
+	/**
+	 * Difference between two shift sizes,
+	 * for getting an index-1 offset from an index-2 offset. 5=14-9
+	 */
+	ucpShift1Min2 = ucpShift1 - ucpShift2
+
+	/**
+	 * Number of index-1 entries for the BMP. (4)
+	 * This part of the index-1 table is omitted from the serialized form.
+	 */
+	ucpOmittedBmpIndex1Length = 0x10000 >> ucpShift1
+
+	/** Number of entries in an index-2 block. 32=0x20 */
+	ucpIndex2BlockLength = 1 << ucpShift1Min2
+
+	/** Mask for getting the lower bits for the in-index-2-block offset. */
+	ucpIndex2Mask = ucpIndex2BlockLength - 1
+
+	/** Number of code points per index-2 table entry. 512=0x200 */
+	ucpCpPerIndex2Entry = 1 << ucpShift2
+
+	/** Number of entries in an index-3 block. 32=0x20 */
+	ucpIndex3BlockLength = 1 << ucpShift2Min3
+
+	/** Mask for getting the lower bits for the in-index-3-block offset. */
+	ucpIndex3Mask = ucpIndex3BlockLength - 1
+
+	/** Number of entries in a small data block. 16=0x10 */
+	ucpSmallDataBlockLength = 1 << ucpShift3
+
+	/** Mask for getting the lower bits for the in-small-data-block offset. */
+	ucpSmallDataMask = ucpSmallDataBlockLength - 1
+)
+
+func UcpTrieFromBytes(bytes *udata.Bytes) (*UcpTrie, error) {
+	type ucpHeader struct {
+		/** "Tri3" in big-endian US-ASCII (0x54726933) */
+		signature uint32
+
+		/**
+			 * Options bit field:
+			 * Bits 15..12: Data length bits 19..16.
+			 * Bits 11..8: Data null block offset bits 19..16.
+			 * Bits 7..6: UCPTrieType
+			 * Bits 5..3: Reserved (0).
+		 	 * Bits 2..0: UCPTrieValueWidth
+		*/
+		options uint16
+
+		/** Total length of the index tables. */
+		indexLength uint16
+
+		/** Data length bits 15..0. */
+		dataLength uint16
+
+		/** Index-3 null block offset, 0x7fff or 0xffff if none. */
+		index3NullOffset uint16
+
+		/** Data null block offset bits 15..0, 0xfffff if none. */
+		dataNullOffset uint16
+
+		/**
+		 * First code point of the single-value range ending with U+10ffff,
+		 * rounded up and then shifted right by UCPTRIE_SHIFT_2.
+		 */
+		shiftedHighStart uint16
+	}
+
+	var header ucpHeader
+	header.signature = bytes.Uint32()
+
+	switch header.signature {
+	case ucpTrieSig:
+	case ucpTrieOESig:
+		return nil, errors.New("unsupported: BigEndian encoding")
+	default:
+		return nil, fmt.Errorf("invalid signature for UcpTrie: 0x%08x", header.signature)
+	}
+
+	header.options = bytes.Uint16()
+	header.indexLength = bytes.Uint16()
+	header.dataLength = bytes.Uint16()
+	header.index3NullOffset = bytes.Uint16()
+	header.dataNullOffset = bytes.Uint16()
+	header.shiftedHighStart = bytes.Uint16()
+
+	typeInt := (header.options >> 6) & 3
+	valueWidthInt := header.options & optionsValueBitsMask
+	if typeInt > uint16(typeSmall) || valueWidthInt > uint16(valueBits8) ||
+		(header.options&optionsReservedMask) != 0 {
+		return nil, errors.New("invalid options for serialized UcpTrie")
+	}
+	actualType := ucpTrieType(typeInt)
+	actualValueWidth := ucpTrieValueWidth(valueWidthInt)
+
+	trie := &UcpTrie{
+		indexLength:      int32(header.indexLength),
+		dataLength:       int32(((header.options & optionsDataLengthMask) << 4) | header.dataLength),
+		index3NullOffset: header.index3NullOffset,
+		dataNullOffset:   int32(((header.options & optionsDataNullOffsetMask) << 8) | header.dataNullOffset),
+		highStart:        rune(header.shiftedHighStart) << ucpShift2,
+		typ:              actualType,
+		valueWidth:       actualValueWidth,
+	}
+	nullValueOffset := trie.dataNullOffset
+	if nullValueOffset >= trie.dataLength {
+		nullValueOffset = trie.dataLength - highValueNegDataOffset
+	}
+
+	trie.shifted12HighStart = uint16((trie.highStart + 0xfff) >> 12)
+	trie.index = bytes.Uint16Slice(int32(header.indexLength))
+	switch actualValueWidth {
+	case valueBits16:
+		trie.data16 = bytes.Uint16Slice(trie.dataLength)
+		trie.nullValue = uint32(trie.data16[nullValueOffset])
+	case valueBits32:
+		trie.data32 = bytes.Uint32Slice(trie.dataLength)
+		trie.nullValue = trie.data32[nullValueOffset]
+	case valueBits8:
+		trie.data8 = bytes.Uint8Slice(trie.dataLength)
+		trie.nullValue = uint32(trie.data8[nullValueOffset])
+	}
+
+	return trie, nil
+}
+
+func (t *UcpTrie) Get(c rune) uint32 {
+	var dataIndex int32
+	if c <= 0x7f {
+		// linear ASCII
+		dataIndex = c
+	} else {
+		var fastMax rune
+		if t.typ == typeFast {
+			fastMax = 0xffff
+		} else {
+			fastMax = smallMax
+		}
+		dataIndex = t.cpIndex(fastMax, c)
+	}
+	return t.getValue(dataIndex)
+}
+
+func (t *UcpTrie) getValue(dataIndex int32) uint32 {
+	switch t.valueWidth {
+	case valueBits16:
+		return uint32(t.data16[dataIndex])
+	case valueBits32:
+		return t.data32[dataIndex]
+	case valueBits8:
+		return uint32(t.data8[dataIndex])
+	default:
+		// Unreachable if the trie is properly initialized.
+		return 0xffffffff
+	}
+}
+
+/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */
+func (t *UcpTrie) fastIndex(c rune) int32 {
+	return int32(t.index[c>>fastShift]) + (c & fastDataMask)
+}
+
+/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */
+func (t *UcpTrie) smallIndex(c rune) int32 {
+	if c >= t.highStart {
+		return t.dataLength - highValueNegDataOffset
+	}
+	return t.internalSmallIndex(c)
+}
+
+func (t *UcpTrie) internalSmallIndex(c rune) int32 {
+	i1 := c >> ucpShift1
+	if t.typ == typeFast {
+		i1 += bmpIndexLength - ucpOmittedBmpIndex1Length
+	} else {
+		i1 += smallIndexLength
+	}
+	i3Block := int32(t.index[int32(t.index[i1])+((c>>ucpShift2)&ucpIndex2Mask)])
+	i3 := (c >> ucpShift3) & ucpIndex3Mask
+	var dataBlock int32
+	if (i3Block & 0x8000) == 0 {
+		// 16-bit indexes
+		dataBlock = int32(t.index[i3Block+i3])
+	} else {
+		// 18-bit indexes stored in groups of 9 entries per 8 indexes.
+		i3Block = (i3Block & 0x7fff) + (i3 & ^7) + (i3 >> 3)
+		i3 &= 7
+		dataBlock = int32(t.index[i3Block]) << (2 + (2 * i3)) & 0x30000
+		i3Block++
+		dataBlock |= int32(t.index[i3Block+i3])
+	}
+	return dataBlock + (c & ucpSmallDataMask)
+}
+
+/**
+ * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF.
+ * Returns the data index.
+ * @internal
+ */
+func (t *UcpTrie) cpIndex(fastMax, c rune) int32 {
+	if c <= fastMax {
+		return t.fastIndex(c)
+	}
+	if c <= 0x10ffff {
+		return t.smallIndex(c)
+	}
+	return t.dataLength - errorValueNegDataOffset
+}
+
+/**
+ * Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
+ * Most users should use UCPMAP_RANGE_NORMAL.
+ *
+ * @see ucpmap_getRange
+ * @see ucptrie_getRange
+ * @see umutablecptrie_getRange
+ * @stable ICU 63
+ */
+type UcpMapRangeOption int8
+
+const (
+	/**
+	 * ucpmap_getRange() enumerates all same-value ranges as stored in the map.
+	 * Most users should use this option.
+	 * @stable ICU 63
+	 */
+	UcpMapRangeNormal UcpMapRangeOption = iota
+	/**
+	 * ucpmap_getRange() enumerates all same-value ranges as stored in the map,
+	 * except that lead surrogates (U+D800..U+DBFF) are treated as having the
+	 * surrogateValue, which is passed to getRange() as a separate parameter.
+	 * The surrogateValue is not transformed via filter().
+	 * See U_IS_LEAD(c).
+	 *
+	 * Most users should use UCPMAP_RANGE_NORMAL instead.
+	 *
+	 * This option is useful for maps that map surrogate code *units* to
+	 * special values optimized for UTF-16 string processing
+	 * or for special error behavior for unpaired surrogates,
+	 * but those values are not to be associated with the lead surrogate code *points*.
+	 * @stable ICU 63
+	 */
+	UcpMapRangeFixedLeadSurrogates
+	/**
+	 * ucpmap_getRange() enumerates all same-value ranges as stored in the map,
+	 * except that all surrogates (U+D800..U+DFFF) are treated as having the
+	 * surrogateValue, which is passed to getRange() as a separate parameter.
+	 * The surrogateValue is not transformed via filter().
+	 * See U_IS_SURROGATE(c).
+	 *
+	 * Most users should use UCPMAP_RANGE_NORMAL instead.
+	 *
+	 * This option is useful for maps that map surrogate code *units* to
+	 * special values optimized for UTF-16 string processing
+	 * or for special error behavior for unpaired surrogates,
+	 * but those values are not to be associated with the lead surrogate code *points*.
+	 * @stable ICU 63
+	 */
+	UcpMapRangeFixedAllSurrogates
+)
+
+/**
+ * Callback function type: Modifies a map value.
+ * Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
+ * The modified value will be returned by the getRange function.
+ *
+ * Can be used to ignore some of the value bits,
+ * make a filter for one of several values,
+ * return a value index computed from the map value, etc.
+ *
+ * @param context an opaque pointer, as passed into the getRange function
+ * @param value a value from the map
+ * @return the modified value
+ * @stable ICU 63
+ */
+type UcpMapValueFilter func(value uint32) uint32
+
+/**
+ * GetRange returns the last code point such that all those from start to there have the same value.
+ * Can be used to efficiently iterate over all same-value ranges in a trie.
+ * (This is normally faster than iterating over code points and get()ting each value,
+ * but much slower than a data structure that stores ranges directly.)
+ *
+ * If the UCPMapValueFilter function pointer is not NULL, then
+ * the value to be delivered is passed through that function, and the return value is the end
+ * of the range where all values are modified to the same actual value.
+ * The value is unchanged if that function pointer is NULL.
+ *
+ * Example:
+ * \code
+ * UChar32 start = 0, end;
+ * uint32_t value;
+ * while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
+ *                                NULL, NULL, &value)) >= 0) {
+ *     // Work with the range start..end and its value.
+ *     start = end + 1;
+ * }
+ * \endcode
+ *
+ * @param trie the trie
+ * @param start range start
+ * @param option defines whether surrogates are treated normally,
+ *               or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
+ * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
+ * @param filter a pointer to a function that may modify the trie data value,
+ *     or NULL if the values from the trie are to be used unmodified
+ * @param context an opaque pointer that is passed on to the filter function
+ * @param pValue if not NULL, receives the value that every code point start..end has;
+ *     may have been modified by filter(context, trie value)
+ *     if that function pointer is not NULL
+ * @return the range end code point, or -1 if start is not a valid code point
+ * @stable ICU 63
+ */
+func (t *UcpTrie) GetRange(start rune, option UcpMapRangeOption, surrogateValue uint32, filter UcpMapValueFilter) (rune, uint32) {
+	if option == UcpMapRangeNormal {
+		return t.getRange(start, filter)
+	}
+
+	var surrEnd rune
+	if option == UcpMapRangeFixedAllSurrogates {
+		surrEnd = 0xdfff
+	} else {
+		surrEnd = 0xdbff
+	}
+	end, value := t.getRange(start, filter)
+	if end < 0xd7ff || start > surrEnd {
+		return end, value
+	}
+	if value == surrogateValue {
+		if end >= surrEnd {
+			// Surrogates followed by a non-surrogateValue range,
+			// or surrogates are part of a larger surrogateValue range.
+			return end, value
+		}
+	} else {
+		if start <= 0xd7ff {
+			return 0xd7ff, value // Non-surrogateValue range ends before surrogateValue surrogates.
+		}
+		// Start is a surrogate with a non-surrogateValue code *unit* value.
+		// Return a surrogateValue code *point* range.
+		value = surrogateValue
+		if end > surrEnd {
+			return surrEnd, value // Surrogate range ends before non-surrogateValue rest of range.
+		}
+	}
+	// See if the surrogateValue surrogate range can be merged with
+	// an immediately following range.
+	end2, value2 := t.getRange(surrEnd+1, filter)
+	if value2 == surrogateValue {
+		return end2, value
+	}
+	return surrEnd, value
+}
+
+const maxUnicode = 0x10ffff
+
+func (t *UcpTrie) getRange(start rune, filter UcpMapValueFilter) (rune, uint32) {
+	if start > maxUnicode {
+		return -1, 0
+	}
+
+	if start >= t.highStart {
+		di := t.dataLength - highValueNegDataOffset
+		value := t.getValue(di)
+		if filter != nil {
+			value = filter(value)
+		}
+		return maxUnicode, value
+	}
+
+	nullValue := t.nullValue
+	if filter != nil {
+		nullValue = filter(nullValue)
+	}
+	index := t.index
+
+	prevI3Block := int32(-1)
+	prevBlock := int32(-1)
+	c := start
+	var trieValue uint32
+	value := nullValue
+	haveValue := false
+	for {
+		var i3Block, i3, i3BlockLength, dataBlockLength int32
+		if c <= 0xffff && (t.typ == typeFast || c <= smallMax) {
+			i3Block = 0
+			i3 = c >> fastShift
+			if t.typ == typeFast {
+				i3BlockLength = bmpIndexLength
+			} else {
+				i3BlockLength = smallIndexLength
+			}
+			dataBlockLength = fastDataBlockLength
+		} else {
+			// Use the multi-stage index.
+			i1 := c >> ucpShift1
+			if t.typ == typeFast {
+				i1 += bmpIndexLength - ucpOmittedBmpIndex1Length
+			} else {
+				i1 += smallIndexLength
+			}
+			shft := c >> ucpShift2
+			idx := int32(t.index[i1]) + (shft & ucpIndex2Mask)
+			i3Block = int32(t.index[idx])
+			if i3Block == prevI3Block && (c-start) >= ucpCpPerIndex2Entry {
+				// The index-3 block is the same as the previous one, and filled with value.
+				c += ucpCpPerIndex2Entry
+				continue
+			}
+			prevI3Block = i3Block
+			if i3Block == int32(t.index3NullOffset) {
+				// This is the index-3 null block.
+				if haveValue {
+					if nullValue != value {
+						return c - 1, value
+					}
+				} else {
+					trieValue = t.nullValue
+					value = nullValue
+					haveValue = true
+				}
+				prevBlock = t.dataNullOffset
+				c = (c + ucpCpPerIndex2Entry) & ^(ucpCpPerIndex2Entry - 1)
+				continue
+			}
+			i3 = (c >> ucpShift3) & ucpIndex3Mask
+			i3BlockLength = ucpIndex3BlockLength
+			dataBlockLength = ucpSmallDataBlockLength
+		}
+
+		// Enumerate data blocks for one index-3 block.
+		for {
+			var block int32
+			if (i3Block & 0x8000) == 0 {
+				block = int32(index[i3Block+i3])
+			} else {
+				// 18-bit indexes stored in groups of 9 entries per 8 indexes.
+				group := (i3Block & 0x7fff) + (i3 & ^7) + (i3 >> 3)
+				gi := i3 & 7
+				block = (int32(index[group]) << (2 + (2 * gi))) & 0x30000
+				group++
+				block |= int32(index[group+gi])
+			}
+			if block == prevBlock && (c-start) >= dataBlockLength {
+				// The block is the same as the previous one, and filled with value.
+				c += dataBlockLength
+			} else {
+				dataMask := dataBlockLength - 1
+				prevBlock = block
+				if block == t.dataNullOffset {
+					// This is the data null block.
+					if haveValue {
+						if nullValue != value {
+							return c - 1, value
+						}
+					} else {
+						trieValue = t.nullValue
+						value = nullValue
+						haveValue = true
+					}
+					c = (c + dataBlockLength) & ^dataMask
+				} else {
+					di := block + (c & dataMask)
+					trieValue2 := t.getValue(di)
+					if haveValue {
+						if trieValue2 != trieValue {
+							if filter == nil || maybeFilterValue(trieValue2, t.nullValue, nullValue, filter) != value {
+								return c - 1, value
+							}
+							trieValue = trieValue2 // may or may not help
+						}
+					} else {
+						trieValue = trieValue2
+						value = maybeFilterValue(trieValue2, t.nullValue, nullValue, filter)
+						haveValue = true
+					}
+					for {
+						c++
+						if c&dataMask == 0 {
+							break
+						}
+						di++
+						trieValue2 = t.getValue(di)
+						if trieValue2 != trieValue {
+							if filter == nil || maybeFilterValue(trieValue2, t.nullValue, nullValue, filter) != value {
+								return c - 1, value
+							}
+							trieValue = trieValue2 // may or may not help
+						}
+					}
+				}
+			}
+			i3++
+			if i3 >= i3BlockLength {
+				break
+			}
+		}
+		if c >= t.highStart {
+			break
+		}
+	}
+
+	di := t.dataLength - highValueNegDataOffset
+	highValue := t.getValue(di)
+	if maybeFilterValue(highValue, t.nullValue, nullValue, filter) != value {
+		return c - 1, value
+	}
+	return maxUnicode, value
+}
+
+func maybeFilterValue(value uint32, trieNullValue uint32, nullValue uint32, filter UcpMapValueFilter) uint32 {
+	if value == trieNullValue {
+		value = nullValue
+	} else if filter != nil {
+		value = filter(value)
+	}
+	return value
+}
diff --git a/go/mysql/icuregex/internal/utrie/utrie2.go b/go/mysql/icuregex/internal/utrie/utrie2.go
new file mode 100644
index 00000000000..a2c80cf1c50
--- /dev/null
+++ b/go/mysql/icuregex/internal/utrie/utrie2.go
@@ -0,0 +1,440 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package utrie
+
+import (
+	"errors"
+	"fmt"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/udata"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utf16"
+)
+
+type UTrie2 struct {
+	index  []uint16
+	data16 []uint16
+	data32 []uint32
+
+	indexLength, dataLength int
+	index2NullOffset        uint16
+	dataNullOffset          uint16
+	InitialValue            uint32
+	ErrorValue              uint32
+
+	HighStart      rune
+	HighValueIndex int
+}
+
+func (t *UTrie2) SerializedLength() int32 {
+	return 16 + int32(t.indexLength+t.dataLength)*2
+}
+
+func (t *UTrie2) getIndex(asciiOffset int, c rune) uint16 {
+	return t.index[t.indexFromCp(asciiOffset, c)]
+}
+
+func (t *UTrie2) Get16(c rune) uint16 {
+	return t.getIndex(t.indexLength, c)
+}
+
+func (t *UTrie2) indexFromCp(asciiOffset int, c rune) int {
+	switch {
+	case c < 0xd800:
+		return indexRaw(0, t.index, c)
+	case c <= 0xffff:
+		var offset int32
+		if c <= 0xdbff {
+			offset = lscpIndex2Offset - (0xd800 >> shift2)
+		}
+		return indexRaw(offset, t.index, c)
+	case c > 0x10ffff:
+		return asciiOffset + badUtf8DataOffset
+	case c >= t.HighStart:
+		return t.HighValueIndex
+	default:
+		return indexFromSupp(t.index, c)
+	}
+}
+
+type EnumRange func(start, end rune, value uint32) bool
+type EnumValue func(value uint32) uint32
+
+func (t *UTrie2) Enum(enumValue EnumValue, enumRange EnumRange) {
+	t.enumEitherTrie(0, 0x110000, enumValue, enumRange)
+}
+
+func enumSameValue(value uint32) uint32 {
+	return value
+}
+
+func min(a, b rune) rune {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func (t *UTrie2) enumEitherTrie(start, limit rune, enumValue EnumValue, enumRange EnumRange) {
+	if enumRange == nil {
+		return
+	}
+	if enumValue == nil {
+		enumValue = enumSameValue
+	}
+
+	/* frozen trie */
+	var (
+		idx              = t.index
+		data32           = t.data32
+		index2NullOffset = int(t.index2NullOffset)
+		nullBlock        = int(t.dataNullOffset)
+
+		c         rune
+		prev      = start
+		highStart = t.HighStart
+
+		/* get the enumeration value that corresponds to an initial-value trie data entry */
+		initialValue = enumValue(t.InitialValue)
+
+		/* set variables for previous range */
+		i2Block     int
+		block       int
+		prevI2Block = -1
+		prevBlock   = -1
+		prevValue   = uint32(0)
+	)
+
+	/* enumerate index-2 blocks */
+	for c = start; c < limit && c < highStart; {
+		/* Code point limit for iterating inside this i2Block. */
+		tempLimit := c + cpPerIndex1Entry
+		if limit < tempLimit {
+			tempLimit = limit
+		}
+		if c <= 0xffff {
+			if !utf16.IsSurrogate(c) {
+				i2Block = int(c >> shift2)
+			} else if utf16.IsSurrogateLead(c) {
+				/*
+				 * Enumerate values for lead surrogate code points, not code units:
+				 * This special block has half the normal length.
+				 */
+				i2Block = lscpIndex2Offset
+				tempLimit = min(0xdc00, limit)
+			} else {
+				/*
+				 * Switch back to the normal part of the index-2 table.
+				 * Enumerate the second half of the surrogates block.
+				 */
+				i2Block = 0xd800 >> shift2
+				tempLimit = min(0xe000, limit)
+			}
+		} else {
+			/* supplementary code points */
+			i2Block = int(idx[(index1Offset-omittedBmpIndex1Length)+(c>>shift1)])
+			if i2Block == prevI2Block && (c-prev) >= cpPerIndex1Entry {
+				/*
+				 * The index-2 block is the same as the previous one, and filled with prevValue.
+				 * Only possible for supplementary code points because the linear-BMP index-2
+				 * table creates unique i2Block values.
+				 */
+				c += cpPerIndex1Entry
+				continue
+			}
+		}
+		prevI2Block = i2Block
+		if i2Block == index2NullOffset {
+			/* this is the null index-2 block */
+			if prevValue != initialValue {
+				if prev < c && !enumRange(prev, c-1, prevValue) {
+					return
+				}
+				prevBlock = nullBlock
+				prev = c
+				prevValue = initialValue
+			}
+			c += cpPerIndex1Entry
+		} else {
+			/* enumerate data blocks for one index-2 block */
+			var i2Limit int
+			if (c >> shift1) == (tempLimit >> shift1) {
+				i2Limit = int(tempLimit>>shift2) & index2Mask
+			} else {
+				i2Limit = index2BlockLength
+			}
+			for i2 := int(c>>shift2) & index2Mask; i2 < i2Limit; i2++ {
+				block = int(idx[i2Block+i2] << indexShift)
+				if block == prevBlock && (c-prev) >= dataBlockLength {
+					/* the block is the same as the previous one, and filled with prevValue */
+					c += dataBlockLength
+					continue
+				}
+				prevBlock = block
+				if block == nullBlock {
+					/* this is the null data block */
+					if prevValue != initialValue {
+						if prev < c && !enumRange(prev, c-1, prevValue) {
+							return
+						}
+						prev = c
+						prevValue = initialValue
+					}
+					c += dataBlockLength
+				} else {
+					for j := 0; j < dataBlockLength; j++ {
+						var value uint32
+						if data32 != nil {
+							value = data32[block+j]
+						} else {
+							value = uint32(idx[block+j])
+						}
+						value = enumValue(value)
+						if value != prevValue {
+							if prev < c && !enumRange(prev, c-1, prevValue) {
+								return
+							}
+							prev = c
+							prevValue = value
+						}
+						c++
+					}
+				}
+			}
+		}
+	}
+
+	if c > limit {
+		c = limit /* could be higher if in the index2NullOffset */
+	} else if c < limit {
+		/* c==highStart<limit */
+		var highValue uint32
+		if data32 != nil {
+			highValue = data32[t.HighValueIndex]
+		} else {
+			highValue = uint32(idx[t.HighValueIndex])
+		}
+		value := enumValue(highValue)
+		if value != prevValue {
+			if prev < c && !enumRange(prev, c-1, prevValue) {
+				return
+			}
+			prev = c
+			prevValue = value
+		}
+		c = limit
+	}
+
+	/* deliver last range */
+	enumRange(prev, c-1, prevValue)
+}
+
+func indexFromSupp(index []uint16, c rune) int {
+	i1 := int(index[(index1Offset-omittedBmpIndex1Length)+(c>>shift1)])
+	return (int(index[i1+int((c>>shift2)&index2Mask)]) << indexShift) + int(c&dataMask)
+}
+
+func indexRaw(offset int32, index []uint16, c rune) int {
+	return int(index[offset+(c>>shift2)]<<indexShift) + int(c&dataMask)
+}
+
+const (
+	/** Shift size for getting the index-1 table offset. */
+	shift1 = 6 + 5
+
+	/** Shift size for getting the index-2 table offset. */
+	shift2 = 5
+
+	/**
+	 * Difference between the two shift sizes,
+	 * for getting an index-1 offset from an index-2 offset. 6=11-5
+	 */
+	shift1min2 = shift1 - shift2
+
+	/**
+	 * Number of index-1 entries for the BMP. 32=0x20
+	 * This part of the index-1 table is omitted from the serialized form.
+	 */
+	omittedBmpIndex1Length = 0x10000 >> shift1
+
+	/** Number of code points per index-1 table entry. 2048=0x800 */
+	cpPerIndex1Entry = 1 << shift1
+
+	/** Number of entries in an index-2 block. 64=0x40 */
+	index2BlockLength = 1 << shift1min2
+
+	/** Mask for getting the lower bits for the in-index-2-block offset. */
+	index2Mask = index2BlockLength - 1
+
+	/** Number of entries in a data block. 32=0x20 */
+	dataBlockLength = 1 << shift2
+
+	/** Mask for getting the lower bits for the in-data-block offset. */
+	dataMask = dataBlockLength - 1
+
+	/**
+	 * Shift size for shifting left the index array values.
+	 * Increases possible data size with 16-bit index values at the cost
+	 * of compactability.
+	 * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY.
+	 */
+	indexShift = 2
+
+	/** The alignment size of a data block. Also the granularity for compaction. */
+	dataGranularity = 1 << indexShift
+
+	/* Fixed layout of the first part of the index array. ------------------- */
+
+	/**
+	 * The part of the index-2 table for U+D800..U+DBFF stores values for
+	 * lead surrogate code _units_ not code _points_.
+	 * Values for lead surrogate code _points_ are indexed with this portion of the table.
+	 * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.)
+	 */
+	lscpIndex2Offset = 0x10000 >> shift2
+	lscpIndex2Length = 0x400 >> shift2
+
+	/** Count the lengths of both BMP pieces. 2080=0x820 */
+	index2BmpLength = lscpIndex2Offset + lscpIndex2Length
+
+	/**
+	 * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
+	 * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2.
+	 */
+	utf82BIndex2Offset = index2BmpLength
+	utf82BIndex2Length = 0x800 >> 6 /* U+0800 is the first code point after 2-byte UTF-8 */
+
+	/**
+	 * The index-1 table, only used for supplementary code points, at offset 2112=0x840.
+	 * Variable length, for code points up to highStart, where the last single-value range starts.
+	 * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1.
+	 * (For 0x100000 supplementary code points U+10000..U+10ffff.)
+	 *
+	 * The part of the index-2 table for supplementary code points starts
+	 * after this index-1 table.
+	 *
+	 * Both the index-1 table and the following part of the index-2 table
+	 * are omitted completely if there is only BMP data.
+	 */
+	index1Offset    = utf82BIndex2Offset + utf82BIndex2Length
+	maxIndex1Length = 0x100000 >> shift1
+
+	/*
+	 * Fixed layout of the first part of the data array. -----------------------
+	 * Starts with 4 blocks (128=0x80 entries) for ASCII.
+	 */
+
+	/**
+	 * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80.
+	 * Used with linear access for single bytes 0..0xbf for simple error handling.
+	 * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH.
+	 */
+	badUtf8DataOffset = 0x80
+)
+
+func UTrie2FromBytes(bytes *udata.Bytes) (*UTrie2, error) {
+	type utrie2Header struct {
+		/** "Tri2" in big-endian US-ASCII (0x54726932) */
+		signature uint32
+
+		/**
+		 * options bit field:
+		 * 15.. 4   reserved (0)
+		 *  3.. 0   UTrie2ValueBits valueBits
+		 */
+		options uint16
+
+		/** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */
+		indexLength uint16
+
+		/** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */
+		shiftedDataLength uint16
+
+		/** Null index and data blocks, not shifted. */
+		index2NullOffset, dataNullOffset uint16
+
+		/**
+		 * First code point of the single-value range ending with U+10ffff,
+		 * rounded up and then shifted right by UTRIE2_SHIFT_1.
+		 */
+		shiftedHighStart uint16
+	}
+
+	var header utrie2Header
+	header.signature = bytes.Uint32()
+
+	switch header.signature {
+	case 0x54726932:
+	case 0x32697254:
+		return nil, errors.New("unsupported: BigEndian encoding")
+	default:
+		return nil, fmt.Errorf("invalid signature for Trie2: 0x%08x", header.signature)
+	}
+
+	header.options = bytes.Uint16()
+	header.indexLength = bytes.Uint16()
+	header.shiftedDataLength = bytes.Uint16()
+	header.index2NullOffset = bytes.Uint16()
+	header.dataNullOffset = bytes.Uint16()
+	header.shiftedHighStart = bytes.Uint16()
+
+	var width int
+	switch header.options & 0xf {
+	case 0:
+		width = 16
+	case 1:
+		width = 32
+	default:
+		return nil, errors.New("invalid width for serialized UTrie2")
+	}
+
+	trie := &UTrie2{
+		indexLength:      int(header.indexLength),
+		dataLength:       int(header.shiftedDataLength) << indexShift,
+		index2NullOffset: header.index2NullOffset,
+		dataNullOffset:   header.dataNullOffset,
+		HighStart:        rune(header.shiftedHighStart) << shift1,
+	}
+
+	trie.HighValueIndex = trie.dataLength - dataGranularity
+	if width == 16 {
+		trie.HighValueIndex += trie.indexLength
+	}
+
+	indexArraySize := trie.indexLength
+	if width == 16 {
+		indexArraySize += trie.dataLength
+	}
+
+	trie.index = bytes.Uint16Slice(int32(indexArraySize))
+
+	if width == 16 {
+		trie.data16 = trie.index[trie.indexLength:]
+		trie.InitialValue = uint32(trie.index[trie.dataNullOffset])
+		trie.ErrorValue = uint32(trie.index[trie.indexLength+badUtf8DataOffset])
+	} else {
+		trie.data32 = bytes.Uint32Slice(int32(trie.dataLength))
+		trie.InitialValue = trie.data32[trie.dataNullOffset]
+		trie.ErrorValue = trie.data32[badUtf8DataOffset]
+	}
+
+	return trie, nil
+}
diff --git a/go/mysql/icuregex/matcher.go b/go/mysql/icuregex/matcher.go
new file mode 100644
index 00000000000..11fbc152d73
--- /dev/null
+++ b/go/mysql/icuregex/matcher.go
@@ -0,0 +1,1655 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex
+
+import (
+	"fmt"
+	"io"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/ucase"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uchar"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uprops"
+)
+
+const timerInitialValue = 10000
+const defaultTimeout = 3
+const defaultStackLimit = 0
+
+type Matcher struct {
+	pattern *Pattern
+
+	input []rune
+
+	regionStart int // Start of the input region, default = 0.
+	regionLimit int // End of input region, default to input.length.
+
+	anchorStart int // Region bounds for anchoring operations (^ or $).
+	anchorLimit int //   See useAnchoringBounds
+
+	lookStart int // Region bounds for look-ahead/behind and
+	lookLimit int //   and other boundary tests.  See
+	//   useTransparentBounds
+
+	activeStart int // Currently active bounds for matching.
+	activeLimit int //   Usually is the same as region, but
+	//   is changed to fLookStart/Limit when
+	//   entering look around regions.
+
+	match      bool // True if the last attempted match was successful.
+	matchStart int  // Position of the start of the most recent match
+	matchEnd   int  // First position after the end of the most recent match
+	//   Zero if no previous match, even when a region
+	//   is active.
+	lastMatchEnd int // First position after the end of the previous match,
+	//   or -1 if there was no previous match.
+	appendPosition int // First position after the end of the previous
+	//   appendReplacement().  As described by the
+	//   JavaDoc for Java Matcher, where it is called
+	//   "append position"
+	hitEnd     bool // True if the last match touched the end of input.
+	requireEnd bool // True if the last match required end-of-input
+	//    (matched $ or Z)
+
+	stack stack
+	frame stackFrame // After finding a match, the last active stack frame,
+	//   which will contain the capture group results.
+	//   NOT valid while match engine is running.
+
+	data []int // Data area for use by the compiled pattern.
+
+	timeLimit int32 // Max time (in arbitrary steps) to let the
+	//   match engine run.  Zero for unlimited.
+
+	time        int32 // Match time, accumulates while matching.
+	tickCounter int32 // Low bits counter for time.  Counts down StateSaves.
+	//   Kept separately from fTime to keep as much
+	//   code as possible out of the inline
+	//   StateSave function.
+
+	dumper io.Writer
+}
+
+func NewMatcher(pat *Pattern) *Matcher {
+	m := &Matcher{
+		pattern: pat,
+		data:    make([]int, pat.dataSize),
+		stack: stack{
+			frameSize:  pat.frameSize,
+			stackLimit: defaultStackLimit,
+		},
+		timeLimit: defaultTimeout,
+	}
+	m.reset()
+	return m
+}
+
+func (m *Matcher) MatchAt(startIdx int, toEnd bool) error {
+	//--------------------------------------------------------------------------------
+	//
+	//   MatchAt      This is the actual matching engine.
+	//
+	//                  startIdx:    begin matching a this index.
+	//                  toEnd:       if true, match must extend to end of the input region
+	//
+	//--------------------------------------------------------------------------------
+	var err error
+	var isMatch bool // True if the we have a match.
+
+	if m.dumper != nil {
+		fmt.Fprintf(m.dumper, "MatchAt(startIdx=%d)\n", startIdx)
+		fmt.Fprintf(m.dumper, "Original Pattern: \"%s\"\n", m.pattern.pattern)
+		fmt.Fprintf(m.dumper, "Input String:     \"%s\"\n\n", string(m.input))
+	}
+
+	pat := m.pattern.compiledPat
+	inputText := m.input
+	litText := m.pattern.literalText
+	sets := m.pattern.sets
+
+	fp := m.resetStack()
+	*fp.inputIdx() = startIdx
+	*fp.patIdx() = 0
+	for i := 0; i < len(m.data); i++ {
+		m.data[i] = 0
+	}
+
+	for {
+		op := pat[*fp.patIdx()]
+
+		if m.dumper != nil {
+			fmt.Fprintf(m.dumper, "inputIdx=%d   inputChar=%x   sp=%3d   activeLimit=%d ", *fp.inputIdx(),
+				charAt(inputText, *fp.inputIdx()), m.stack.sp(), m.activeLimit)
+			m.pattern.dumpOp(m.dumper, *fp.patIdx())
+		}
+
+		*fp.patIdx()++
+
+		switch op.typ() {
+		case urxNop:
+			// Nothing to do.
+		case urxBacktrack:
+			// Force a backtrack.  In some circumstances, the pattern compiler
+			//   will notice that the pattern can't possibly match anything, and will
+			//   emit one of these at that point.
+			fp = m.stack.popFrame()
+		case urxOnechar:
+			if *fp.inputIdx() < m.activeLimit {
+				c := charAt(inputText, *fp.inputIdx())
+				*fp.inputIdx()++
+				if c == rune(op.value()) {
+					break
+				}
+			} else {
+				m.hitEnd = true
+			}
+			fp = m.stack.popFrame()
+		case urxString:
+			// Test input against a literal string.
+			// Strings require two slots in the compiled pattern, one for the
+			//   offset to the string text, and one for the length.
+			stringStartIdx := op.value()
+			nextOp := pat[*fp.patIdx()] // Fetch the second operand
+			*fp.patIdx()++
+			stringLen := nextOp.value()
+
+			patternString := litText[stringStartIdx:]
+			var patternStringIndex int
+			success := true
+			for patternStringIndex < stringLen {
+				if *fp.inputIdx() >= m.activeLimit {
+					m.hitEnd = true
+					success = false
+					break
+				}
+				if charAt(patternString, patternStringIndex) != charAt(inputText, *fp.inputIdx()) {
+					success = false
+					break
+				}
+				patternStringIndex++
+				*fp.inputIdx()++
+			}
+
+			if !success {
+				fp = m.stack.popFrame()
+			}
+		case urxStateSave:
+			fp, err = m.stateSave(*fp.inputIdx(), op.value())
+			if err != nil {
+				return err
+			}
+		case urxEnd:
+			// The match loop will exit via this path on a successful match,
+			//   when we reach the end of the pattern.
+			if toEnd && *fp.inputIdx() != m.activeLimit {
+				// The pattern matched, but not to the end of input.  Try some more.
+				fp = m.stack.popFrame()
+				break
+			}
+			isMatch = true
+			goto breakFromLoop
+
+		// Start and End Capture stack frame variables are laid out out like this:
+		//  fp->fExtra[opValue]  - The start of a completed capture group
+		//             opValue+1 - The end   of a completed capture group
+		//             opValue+2 - the start of a capture group whose end
+		//                          has not yet been reached (and might not ever be).
+		case urxStartCapture:
+			*fp.extra(op.value() + 2) = *fp.inputIdx()
+		case urxEndCapture:
+			*fp.extra(op.value()) = *fp.extra(op.value() + 2) // Tentative start becomes real.
+			*fp.extra(op.value() + 1) = *fp.inputIdx()        // End position
+
+		case urxDollar: //  $, test for End of line
+			if *fp.inputIdx() < m.anchorLimit-2 {
+				fp = m.stack.popFrame()
+				break
+			}
+			//     or for position before new line at end of input
+			if *fp.inputIdx() >= m.anchorLimit {
+				// We really are at the end of input.  Success.
+				m.hitEnd = true
+				m.requireEnd = true
+				break
+			}
+
+			if *fp.inputIdx() == m.anchorLimit-1 {
+				c := m.input[*fp.inputIdx()]
+				if isLineTerminator(c) {
+					if !(c == 0x0a && *fp.inputIdx() > m.anchorStart && m.input[*fp.inputIdx()-1] == 0x0d) {
+						// At new-line at end of input. Success
+						m.hitEnd = true
+						m.requireEnd = true
+						break
+					}
+				}
+			} else if *fp.inputIdx() == m.anchorLimit-2 && m.input[*fp.inputIdx()] == 0x0d && m.input[*fp.inputIdx()+1] == 0x0a {
+				m.hitEnd = true
+				m.requireEnd = true
+				break // At CR/LF at end of input.  Success
+			}
+			fp = m.stack.popFrame()
+
+		case urxDollarD: //  $, test for End of Line, in UNIX_LINES mode.
+			if *fp.inputIdx() >= m.anchorLimit {
+				// Off the end of input.  Success.
+				m.hitEnd = true
+				m.requireEnd = true
+				break
+			}
+			c := charAt(inputText, *fp.inputIdx())
+			*fp.inputIdx()++
+			// Either at the last character of input, or off the end.
+			if c == 0x0a && *fp.inputIdx() == m.anchorLimit {
+				m.hitEnd = true
+				m.requireEnd = true
+				break
+			}
+
+			// Not at end of input.  Back-track out.
+			fp = m.stack.popFrame()
+		case urxDollarM: //  $, test for End of line in multi-line mode
+			if *fp.inputIdx() >= m.anchorLimit {
+				// We really are at the end of input.  Success.
+				m.hitEnd = true
+				m.requireEnd = true
+				break
+			}
+			// If we are positioned just before a new-line, succeed.
+			// It makes no difference where the new-line is within the input.
+			c := charAt(inputText, *fp.inputIdx())
+			if isLineTerminator(c) {
+				// At a line end, except for the odd chance of  being in the middle of a CR/LF sequence
+				//  In multi-line mode, hitting a new-line just before the end of input does not
+				//   set the hitEnd or requireEnd flags
+				if !(c == 0x0a && *fp.inputIdx() > m.anchorStart && charAt(inputText, *fp.inputIdx()-1) == 0x0d) {
+					break
+				}
+			}
+			// not at a new line.  Fail.
+			fp = m.stack.popFrame()
+		case urxDollarMd: //  $, test for End of line in multi-line and UNIX_LINES mode
+			if *fp.inputIdx() >= m.anchorLimit {
+				// We really are at the end of input.  Success.
+				m.hitEnd = true
+				m.requireEnd = true // Java set requireEnd in this case, even though
+				break               //   adding a new-line would not lose the match.
+			}
+			// If we are not positioned just before a new-line, the test fails; backtrack out.
+			// It makes no difference where the new-line is within the input.
+			if charAt(inputText, *fp.inputIdx()) != 0x0a {
+				fp = m.stack.popFrame()
+			}
+		case urxCaret: //  ^, test for start of line
+			if *fp.inputIdx() != m.anchorStart {
+				fp = m.stack.popFrame()
+			}
+		case urxCaretM: //  ^, test for start of line in mulit-line mode
+			if *fp.inputIdx() == m.anchorStart {
+				// We are at the start input.  Success.
+				break
+			}
+			// Check whether character just before the current pos is a new-line
+			//   unless we are at the end of input
+			c := charAt(inputText, *fp.inputIdx()-1)
+			if (*fp.inputIdx() < m.anchorLimit) && isLineTerminator(c) {
+				//  It's a new-line.  ^ is true.  Success.
+				//  TODO:  what should be done with positions between a CR and LF?
+				break
+			}
+			// Not at the start of a line.  Fail.
+			fp = m.stack.popFrame()
+		case urxCaretMUnix: //  ^, test for start of line in mulit-line + Unix-line mode
+			if *fp.inputIdx() <= m.anchorStart {
+				// We are at the start input.  Success.
+				break
+			}
+
+			c := charAt(inputText, *fp.inputIdx()-1)
+			if c != 0x0a {
+				// Not at the start of a line.  Back-track out.
+				fp = m.stack.popFrame()
+			}
+		case urxBackslashB: // Test for word boundaries
+			success := m.isWordBoundary(*fp.inputIdx())
+			success = success != (op.value() != 0) // flip sense for \B
+			if !success {
+				fp = m.stack.popFrame()
+			}
+		case urxBackslashBu: // Test for word boundaries, Unicode-style
+			success := m.isUWordBoundary(*fp.inputIdx())
+			success = success != (op.value() != 0) // flip sense for \B
+			if !success {
+				fp = m.stack.popFrame()
+			}
+		case urxBackslashD: // Test for decimal digit
+			if *fp.inputIdx() >= m.activeLimit {
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+
+			c := charAt(inputText, *fp.inputIdx())
+
+			success := m.isDecimalDigit(c)
+			success = success != (op.value() != 0) // flip sense for \D
+			if success {
+				*fp.inputIdx()++
+			} else {
+				fp = m.stack.popFrame()
+			}
+
+		case urxBackslashG: // Test for position at end of previous match
+			if !((m.match && *fp.inputIdx() == m.matchEnd) || (!m.match && *fp.inputIdx() == m.activeStart)) {
+				fp = m.stack.popFrame()
+			}
+
+		case urxBackslashH: // Test for \h, horizontal white space.
+			if *fp.inputIdx() >= m.activeLimit {
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+
+			c := charAt(inputText, *fp.inputIdx())
+			success := m.isHorizWS(c) || c == 9
+			success = success != (op.value() != 0) // flip sense for \H
+			if success {
+				*fp.inputIdx()++
+			} else {
+				fp = m.stack.popFrame()
+			}
+
+		case urxBackslashR: // Test for \R, any line break sequence.
+			if *fp.inputIdx() >= m.activeLimit {
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+			c := charAt(inputText, *fp.inputIdx())
+			if isLineTerminator(c) {
+				if c == 0x0d && charAt(inputText, *fp.inputIdx()+1) == 0x0a {
+					*fp.inputIdx()++
+				}
+				*fp.inputIdx()++
+			} else {
+				fp = m.stack.popFrame()
+			}
+
+		case urxBackslashV: // \v, any single line ending character.
+			if *fp.inputIdx() >= m.activeLimit {
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+			c := charAt(inputText, *fp.inputIdx())
+			success := isLineTerminator(c)
+			success = success != (op.value() != 0) // flip sense for \V
+			if success {
+				*fp.inputIdx()++
+			} else {
+				fp = m.stack.popFrame()
+			}
+
+		case urxBackslashX:
+			//  Match a Grapheme, as defined by Unicode UAX 29.
+
+			// Fail if at end of input
+			if *fp.inputIdx() >= m.activeLimit {
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+
+			*fp.inputIdx() = m.followingGCBoundary(*fp.inputIdx())
+			if *fp.inputIdx() >= m.activeLimit {
+				m.hitEnd = true
+				*fp.inputIdx() = m.activeLimit
+			}
+
+		case urxBackslashZ: // Test for end of Input
+			if *fp.inputIdx() < m.anchorLimit {
+				fp = m.stack.popFrame()
+			} else {
+				m.hitEnd = true
+				m.requireEnd = true
+			}
+		case urxStaticSetref:
+			// Test input character against one of the predefined sets
+			//    (Word Characters, for example)
+			// The high bit of the op value is a flag for the match polarity.
+			//    0:   success if input char is in set.
+			//    1:   success if input char is not in set.
+			if *fp.inputIdx() >= m.activeLimit {
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+
+			success := (op.value() & urxNegSet) == urxNegSet
+			negOp := op.value() & ^urxNegSet
+
+			c := charAt(inputText, *fp.inputIdx())
+			s := staticPropertySets[negOp]
+			if s.ContainsRune(c) {
+				success = !success
+			}
+
+			if success {
+				*fp.inputIdx()++
+			} else {
+				// the character wasn't in the set.
+				fp = m.stack.popFrame()
+			}
+		case urxStatSetrefN:
+			// Test input character for NOT being a member of  one of
+			//    the predefined sets (Word Characters, for example)
+			if *fp.inputIdx() >= m.activeLimit {
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+
+			c := charAt(inputText, *fp.inputIdx())
+			s := staticPropertySets[op.value()]
+			if !s.ContainsRune(c) {
+				*fp.inputIdx()++
+				break
+			}
+			// the character wasn't in the set.
+			fp = m.stack.popFrame()
+
+		case urxSetref:
+			if *fp.inputIdx() >= m.activeLimit {
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+
+			// There is input left.  Pick up one char and test it for set membership.
+			c := charAt(inputText, *fp.inputIdx())
+
+			s := sets[op.value()]
+			if s.ContainsRune(c) {
+				*fp.inputIdx()++
+				break
+			}
+
+			// the character wasn't in the set.
+			fp = m.stack.popFrame()
+
+		case urxDotany:
+			// . matches anything, but stops at end-of-line.
+			if *fp.inputIdx() >= m.activeLimit {
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+
+			c := charAt(inputText, *fp.inputIdx())
+			if isLineTerminator(c) {
+				// End of line in normal mode.   . does not match.
+				fp = m.stack.popFrame()
+				break
+			}
+			*fp.inputIdx()++
+
+		case urxDotanyAll:
+			// ., in dot-matches-all (including new lines) mode
+			if *fp.inputIdx() >= m.activeLimit {
+				// At end of input.  Match failed.  Backtrack out.
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+
+			c := charAt(inputText, *fp.inputIdx())
+			*fp.inputIdx()++
+			if c == 0x0d && *fp.inputIdx() < m.activeLimit {
+				// In the case of a CR/LF, we need to advance over both.
+				nextc := charAt(inputText, *fp.inputIdx())
+				if nextc == 0x0a {
+					*fp.inputIdx()++
+				}
+			}
+
+		case urxDotanyUnix:
+			// '.' operator, matches all, but stops at end-of-line.
+			//   UNIX_LINES mode, so 0x0a is the only recognized line ending.
+			if *fp.inputIdx() >= m.activeLimit {
+				// At end of input.  Match failed.  Backtrack out.
+				m.hitEnd = true
+				fp = m.stack.popFrame()
+				break
+			}
+
+			// There is input left.  Advance over one char, unless we've hit end-of-line
+			c := charAt(inputText, *fp.inputIdx())
+			if c == 0x0a {
+				// End of line in normal mode.   '.' does not match the \n
+				fp = m.stack.popFrame()
+			} else {
+				*fp.inputIdx()++
+			}
+		case urxJmp:
+			*fp.patIdx() = op.value()
+
+		case urxFail:
+			isMatch = false
+			goto breakFromLoop
+
+		case urxJmpSav:
+			fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()) // State save to loc following current
+			if err != nil {
+				return err
+			}
+			*fp.patIdx() = op.value() // Then JMP.
+
+		case urxJmpSavX:
+			// This opcode is used with (x)+, when x can match a zero length string.
+			// Same as JMP_SAV, except conditional on the match having made forward progress.
+			// Destination of the JMP must be a URX_STO_INP_LOC, from which we get the
+			//   data address of the input position at the start of the loop.
+			stoOp := pat[op.value()-1]
+			frameLoc := stoOp.value()
+
+			prevInputIdx := *fp.extra(frameLoc)
+			if prevInputIdx < *fp.inputIdx() {
+				// The match did make progress.  Repeat the loop.
+				fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()) // State save to loc following current
+				if err != nil {
+					return err
+				}
+				*fp.patIdx() = op.value() // Then JMP.
+				*fp.extra(frameLoc) = *fp.inputIdx()
+			}
+			// If the input position did not advance, we do nothing here,
+			//   execution will fall out of the loop.
+
+		case urxCtrInit:
+			*fp.extra(op.value()) = 0 // Set the loop counter variable to zero
+
+			// Pick up the three extra operands that CTR_INIT has, and
+			//    skip the pattern location counter past
+			instOperandLoc := *fp.patIdx()
+			*fp.patIdx() += 3 // Skip over the three operands that CTR_INIT has.
+
+			loopLoc := pat[instOperandLoc].value()
+			minCount := int(pat[instOperandLoc+1])
+			maxCount := int(pat[instOperandLoc+2])
+
+			if minCount == 0 {
+				fp, err = m.stateSave(*fp.inputIdx(), loopLoc+1)
+				if err != nil {
+					return err
+				}
+			}
+			if maxCount == -1 {
+				*fp.extra(op.value() + 1) = *fp.inputIdx() // For loop breaking.
+			} else if maxCount == 0 {
+				fp = m.stack.popFrame()
+			}
+
+		case utxCtrLoop:
+			initOp := pat[op.value()]
+			opValue := initOp.value()
+			pCounter := fp.extra(opValue)
+			minCount := int(pat[op.value()+2])
+			maxCount := int(pat[op.value()+3])
+			*pCounter++
+			if *pCounter >= maxCount && maxCount != -1 {
+				break
+			}
+
+			if *pCounter >= minCount {
+				if maxCount == -1 {
+					// Loop has no hard upper bound.
+					// Check that it is progressing through the input, break if it is not.
+					pLastIntputIdx := fp.extra(opValue + 1)
+					if *pLastIntputIdx == *fp.inputIdx() {
+						break
+					}
+					*pLastIntputIdx = *fp.inputIdx()
+				}
+				fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx())
+				if err != nil {
+					return err
+				}
+			} else {
+				// Increment time-out counter. (StateSave() does it if count >= minCount)
+				m.tickCounter--
+				if m.tickCounter <= 0 {
+					if err = m.incrementTime(*fp.inputIdx()); err != nil {
+						return err
+					} // Re-initializes fTickCounter
+				}
+			}
+
+			*fp.patIdx() = op.value() + 4 // Loop back.
+
+		case urxCtrInitNg:
+			*fp.extra(op.value()) = 0 // Set the loop counter variable to zero
+
+			// Pick up the three extra operands that CTR_INIT_NG has, and
+			//    skip the pattern location counter past
+			instrOperandLoc := *fp.patIdx()
+			*fp.patIdx() += 3
+			loopLoc := pat[instrOperandLoc].value()
+			minCount := pat[instrOperandLoc+1].value()
+			maxCount := pat[instrOperandLoc+2].value()
+
+			if maxCount == -1 {
+				*fp.extra(op.value() + 1) = *fp.inputIdx() //  Save initial input index for loop breaking.
+			}
+
+			if minCount == 0 {
+				if maxCount != 0 {
+					fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx())
+					if err != nil {
+						return err
+					}
+				}
+				*fp.patIdx() = loopLoc + 1
+			}
+
+		case urxCtrLoopNg:
+			initOp := pat[op.value()]
+			pCounter := fp.extra(initOp.value())
+			minCount := int(pat[op.value()+2])
+			maxCount := int(pat[op.value()+3])
+			*pCounter++
+			if *pCounter >= maxCount && maxCount != -1 {
+				// The loop has matched the maximum permitted number of times.
+				//   Break out of here with no action.  Matching will
+				//   continue with the following pattern.
+				break
+			}
+
+			if *pCounter < minCount {
+				// We haven't met the minimum number of matches yet.
+				//   Loop back for another one.
+				*fp.patIdx() = op.value() + 4 // Loop back.
+				// Increment time-out counter. (StateSave() does it if count >= minCount)
+				m.tickCounter--
+				if m.tickCounter <= 0 {
+					if err = m.incrementTime(*fp.inputIdx()); err != nil {
+						return err
+					} // Re-initializes fTickCounter
+				}
+			} else {
+				// We do have the minimum number of matches.
+
+				// If there is no upper bound on the loop iterations, check that the input index
+				// is progressing, and stop the loop if it is not.
+				if maxCount == -1 {
+					lastInputIdx := fp.extra(initOp.value() + 1)
+					if *fp.inputIdx() == *lastInputIdx {
+						break
+					}
+					*lastInputIdx = *fp.inputIdx()
+				}
+			}
+
+			// Loop Continuation: we will fall into the pattern following the loop
+			//   (non-greedy, don't execute loop body first), but first do
+			//   a state save to the top of the loop, so that a match failure
+			//   in the following pattern will try another iteration of the loop.
+			fp, err = m.stateSave(*fp.inputIdx(), op.value()+4)
+			if err != nil {
+				return err
+			}
+
+		case urxStoSp:
+			m.data[op.value()] = m.stack.len()
+
+		case urxLdSp:
+			newStackSize := m.data[op.value()]
+			newFp := m.stack.offset(newStackSize)
+			if newFp.equals(fp) {
+				break
+			}
+			copy(newFp, fp)
+			fp = newFp
+
+			m.stack.setSize(newStackSize)
+		case urxBackref:
+			groupStartIdx := *fp.extra(op.value())
+			groupEndIdx := *fp.extra(op.value() + 1)
+
+			if groupStartIdx < 0 {
+				// This capture group has not participated in the match thus far,
+				fp = m.stack.popFrame() // FAIL, no match.
+				break
+			}
+
+			success := true
+			for {
+				if groupStartIdx >= groupEndIdx {
+					success = true
+					break
+				}
+
+				if *fp.inputIdx() >= m.activeLimit {
+					success = false
+					m.hitEnd = true
+					break
+				}
+
+				captureGroupChar := charAt(inputText, groupStartIdx)
+				inputChar := charAt(inputText, *fp.inputIdx())
+				groupStartIdx++
+				*fp.inputIdx()++
+				if inputChar != captureGroupChar {
+					success = false
+					break
+				}
+			}
+
+			if !success {
+				fp = m.stack.popFrame()
+			}
+		case urxBackrefI:
+			groupStartIdx := *fp.extra(op.value())
+			groupEndIdx := *fp.extra(op.value() + 1)
+
+			if groupStartIdx < 0 {
+				// This capture group has not participated in the match thus far,
+				fp = m.stack.popFrame() // FAIL, no match.
+				break
+			}
+
+			captureGroupItr := newCaseFoldIterator(m.input, groupStartIdx, groupEndIdx)
+			inputItr := newCaseFoldIterator(m.input, *fp.inputIdx(), m.activeLimit)
+			success := true
+
+			for {
+				captureGroupChar := captureGroupItr.next()
+				if captureGroupChar == -1 {
+					success = true
+					break
+				}
+				inputChar := inputItr.next()
+				if inputChar == -1 {
+					success = false
+					m.hitEnd = true
+					break
+				}
+				if inputChar != captureGroupChar {
+					success = false
+					break
+				}
+			}
+
+			if success && inputItr.inExpansion() {
+				// We otained a match by consuming part of a string obtained from
+				// case-folding a single code point of the input text.
+				// This does not count as an overall match.
+				success = false
+			}
+
+			if success {
+				*fp.inputIdx() = inputItr.index
+			} else {
+				fp = m.stack.popFrame()
+			}
+
+		case urxStoInpLoc:
+			*fp.extra(op.value()) = *fp.inputIdx()
+
+		case urxJmpx:
+			instrOperandLoc := *fp.patIdx()
+			*fp.patIdx()++
+			dataLoc := pat[instrOperandLoc].value()
+
+			saveInputIdx := *fp.extra(dataLoc)
+
+			if saveInputIdx < *fp.inputIdx() {
+				*fp.patIdx() = op.value() // JMP
+			} else {
+				fp = m.stack.popFrame() // FAIL, no progress in loop.
+			}
+
+		case urxLaStart:
+			m.data[op.value()] = m.stack.len()
+			m.data[op.value()+1] = *fp.inputIdx()
+			m.data[op.value()+2] = m.activeStart
+			m.data[op.value()+3] = m.activeLimit
+			m.activeStart = m.lookStart // Set the match region change for
+			m.activeLimit = m.lookLimit //   transparent bounds.
+
+		case urxLaEnd:
+			stackSize := m.stack.len()
+			newStackSize := m.data[op.value()]
+			if stackSize > newStackSize {
+				// Copy the current top frame back to the new (cut back) top frame.
+				//   This makes the capture groups from within the look-ahead
+				//   expression available.
+				newFp := m.stack.offset(newStackSize)
+				copy(newFp, fp)
+				fp = newFp
+				m.stack.setSize(newStackSize)
+			}
+
+			*fp.inputIdx() = m.data[op.value()+1]
+
+			m.activeStart = m.data[op.value()+2]
+			m.activeLimit = m.data[op.value()+3]
+
+		case urcOnecharI:
+			// Case insensitive one char.  The char from the pattern is already case folded.
+			// Input text is not, but case folding the input can not reduce two or more code
+			// points to one.
+			if *fp.inputIdx() < m.activeLimit {
+				c := charAt(inputText, *fp.inputIdx())
+				if ucase.Fold(c) == op.value32() {
+					*fp.inputIdx()++
+					break
+				}
+			} else {
+				m.hitEnd = true
+			}
+
+			fp = m.stack.popFrame()
+
+		case urxStringI:
+			// Case-insensitive test input against a literal string.
+			// Strings require two slots in the compiled pattern, one for the
+			//   offset to the string text, and one for the length.
+			//   The compiled string has already been case folded.
+			patternString := litText[op.value():]
+			var patternStringIdx int
+			nextOp := pat[*fp.patIdx()]
+			*fp.patIdx()++
+			patternStringLen := nextOp.value()
+
+			success := true
+
+			it := newCaseFoldIterator(inputText, *fp.inputIdx(), m.activeLimit)
+			for patternStringIdx < patternStringLen {
+				cText := it.next()
+				cPattern := patternString[patternStringIdx]
+				patternStringIdx++
+
+				if cText != cPattern {
+					success = false
+					if cText == -1 {
+						m.hitEnd = true
+					}
+					break
+				}
+			}
+			if it.inExpansion() {
+				success = false
+			}
+
+			if success {
+				*fp.inputIdx() = it.index
+			} else {
+				fp = m.stack.popFrame()
+			}
+
+		case urxLbStart:
+			// Entering a look-behind block.
+			// Save Stack Ptr, Input Pos and active input region.
+			//   TODO:  implement transparent bounds.  Ticket #6067
+			m.data[op.value()] = m.stack.len()
+			m.data[op.value()+1] = *fp.inputIdx()
+			// Save input string length, then reset to pin any matches to end at
+			//   the current position.
+			m.data[op.value()+2] = m.activeStart
+			m.data[op.value()+3] = m.activeLimit
+			m.activeStart = m.regionStart
+			m.activeLimit = *fp.inputIdx()
+			// Init the variable containing the start index for attempted matches.
+			m.data[op.value()+4] = -1
+		case urxLbCont:
+			// Positive Look-Behind, at top of loop checking for matches of LB expression
+			//    at all possible input starting positions.
+
+			// Fetch the min and max possible match lengths.  They are the operands
+			//   of this op in the pattern.
+			minML := pat[*fp.patIdx()]
+			*fp.patIdx()++
+			maxML := pat[*fp.patIdx()]
+			*fp.patIdx()++
+
+			lbStartIdx := &m.data[op.value()+4]
+			if *lbStartIdx < 0 {
+				// First time through loop.
+				*lbStartIdx = *fp.inputIdx() - int(minML)
+				if *lbStartIdx > 0 {
+					*lbStartIdx = *fp.inputIdx()
+				}
+			} else {
+				// 2nd through nth time through the loop.
+				// Back up start position for match by one.
+				*lbStartIdx--
+			}
+
+			if *lbStartIdx < 0 || *lbStartIdx < *fp.inputIdx()-int(maxML) {
+				// We have tried all potential match starting points without
+				//  getting a match.  Backtrack out, and out of the
+				//   Look Behind altogether.
+				fp = m.stack.popFrame()
+				m.activeStart = m.data[op.value()+2]
+				m.activeLimit = m.data[op.value()+3]
+				break
+			}
+
+			//    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
+			//      (successful match will fall off the end of the loop.)
+			fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()-3)
+			if err != nil {
+				return err
+			}
+			*fp.inputIdx() = *lbStartIdx
+
+		case urxLbEnd:
+			// End of a look-behind block, after a successful match.
+			if *fp.inputIdx() != m.activeLimit {
+				//  The look-behind expression matched, but the match did not
+				//    extend all the way to the point that we are looking behind from.
+				//  FAIL out of here, which will take us back to the LB_CONT, which
+				//     will retry the match starting at another position or fail
+				//     the look-behind altogether, whichever is appropriate.
+				fp = m.stack.popFrame()
+				break
+			}
+
+			// Look-behind match is good.  Restore the orignal input string region,
+			//   which had been truncated to pin the end of the lookbehind match to the
+			//   position being looked-behind.
+			m.activeStart = m.data[op.value()+2]
+			m.activeLimit = m.data[op.value()+3]
+		case urxLbnCount:
+			// Negative Look-Behind, at top of loop checking for matches of LB expression
+			//    at all possible input starting positions.
+
+			// Fetch the extra parameters of this op.
+			minML := pat[*fp.patIdx()]
+			*fp.patIdx()++
+			maxML := pat[*fp.patIdx()]
+			*fp.patIdx()++
+
+			continueLoc := pat[*fp.patIdx()].value()
+			*fp.patIdx()++
+
+			lbStartIdx := &m.data[op.value()+4]
+
+			if *lbStartIdx < 0 {
+				// First time through loop.
+				*lbStartIdx = *fp.inputIdx() - int(minML)
+				if *lbStartIdx > 0 {
+					// move index to a code point boundary, if it's not on one already.
+					*lbStartIdx = *fp.inputIdx()
+				}
+			} else {
+				// 2nd through nth time through the loop.
+				// Back up start position for match by one.
+				*lbStartIdx--
+			}
+
+			if *lbStartIdx < 0 || *lbStartIdx < *fp.inputIdx()-int(maxML) {
+				// We have tried all potential match starting points without
+				//  getting a match, which means that the negative lookbehind as
+				//  a whole has succeeded.  Jump forward to the continue location
+				m.activeStart = m.data[op.value()+2]
+				m.activeLimit = m.data[op.value()+3]
+				*fp.patIdx() = continueLoc
+				break
+			}
+
+			//    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
+			//      (successful match will cause a FAIL out of the loop altogether.)
+			fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()-4)
+			if err != nil {
+				return err
+			}
+			*fp.inputIdx() = *lbStartIdx
+		case urxLbnEnd:
+			// End of a negative look-behind block, after a successful match.
+
+			if *fp.inputIdx() != m.activeLimit {
+				//  The look-behind expression matched, but the match did not
+				//    extend all the way to the point that we are looking behind from.
+				//  FAIL out of here, which will take us back to the LB_CONT, which
+				//     will retry the match starting at another position or succeed
+				//     the look-behind altogether, whichever is appropriate.
+				fp = m.stack.popFrame()
+				break
+			}
+
+			// Look-behind expression matched, which means look-behind test as
+			//   a whole Fails
+
+			//   Restore the orignal input string length, which had been truncated
+			//   inorder to pin the end of the lookbehind match
+			//   to the position being looked-behind.
+			m.activeStart = m.data[op.value()+2]
+			m.activeLimit = m.data[op.value()+3]
+
+			// Restore original stack position, discarding any state saved
+			//   by the successful pattern match.
+			newStackSize := m.data[op.value()]
+			m.stack.setSize(newStackSize)
+
+			//  FAIL, which will take control back to someplace
+			//  prior to entering the look-behind test.
+			fp = m.stack.popFrame()
+		case urxLoopSrI:
+			// Loop Initialization for the optimized implementation of
+			//     [some character set]*
+			//   This op scans through all matching input.
+			//   The following LOOP_C op emulates stack unwinding if the following pattern fails.
+			s := sets[op.value()]
+
+			// Loop through input, until either the input is exhausted or
+			//   we reach a character that is not a member of the set.
+			ix := *fp.inputIdx()
+
+			for {
+				if ix >= m.activeLimit {
+					m.hitEnd = true
+					break
+				}
+				c := charAt(inputText, ix)
+				if !s.ContainsRune(c) {
+					break
+				}
+				ix++
+			}
+
+			// If there were no matching characters, skip over the loop altogether.
+			//   The loop doesn't run at all, a * op always succeeds.
+			if ix == *fp.inputIdx() {
+				*fp.patIdx()++ // skip the URX_LOOP_C op.
+				break
+			}
+
+			// Peek ahead in the compiled pattern, to the URX_LOOP_C that
+			//   must follow.  It's operand is the stack location
+			//   that holds the starting input index for the match of this [set]*
+			loopcOp := pat[*fp.patIdx()]
+			stackLoc := loopcOp.value()
+			*fp.extra(stackLoc) = *fp.inputIdx()
+			*fp.inputIdx() = ix
+
+			// Save State to the URX_LOOP_C op that follows this one,
+			//   so that match failures in the following code will return to there.
+			//   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
+			fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx())
+			if err != nil {
+				return err
+			}
+			*fp.patIdx()++
+		case urxLoopDotI:
+			// Loop Initialization for the optimized implementation of .*
+			//   This op scans through all remaining input.
+			//   The following LOOP_C op emulates stack unwinding if the following pattern fails.
+
+			// Loop through input until the input is exhausted (we reach an end-of-line)
+			// In DOTALL mode, we can just go straight to the end of the input.
+			var ix int
+			if (op.value() & 1) == 1 {
+				// Dot-matches-All mode.  Jump straight to the end of the string.
+				ix = m.activeLimit
+				m.hitEnd = true
+			} else {
+				// NOT DOT ALL mode.  Line endings do not match '.'
+				// Scan forward until a line ending or end of input.
+				ix = *fp.inputIdx()
+				for {
+					if ix >= m.activeLimit {
+						m.hitEnd = true
+						break
+					}
+					c := charAt(inputText, ix)
+					if (c & 0x7f) <= 0x29 { // Fast filter of non-new-line-s
+						if (c == 0x0a) || //  0x0a is newline in both modes.
+							(((op.value() & 2) == 0) && // IF not UNIX_LINES mode
+								isLineTerminator(c)) {
+							//  char is a line ending.  Exit the scanning loop.
+							break
+						}
+					}
+					ix++
+				}
+			}
+
+			// If there were no matching characters, skip over the loop altogether.
+			//   The loop doesn't run at all, a * op always succeeds.
+			if ix == *fp.inputIdx() {
+				*fp.patIdx()++ // skip the URX_LOOP_C op.
+				break
+			}
+
+			// Peek ahead in the compiled pattern, to the URX_LOOP_C that
+			//   must follow.  It's operand is the stack location
+			//   that holds the starting input index for the match of this .*
+			loopcOp := pat[*fp.patIdx()]
+			stackLoc := loopcOp.value()
+			*fp.extra(stackLoc) = *fp.inputIdx()
+			*fp.inputIdx() = ix
+
+			// Save State to the URX_LOOP_C op that follows this one,
+			//   so that match failures in the following code will return to there.
+			//   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
+			fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx())
+			if err != nil {
+				return err
+			}
+			*fp.patIdx()++
+
+		case urxLoopC:
+			backSearchIndex := *fp.extra(op.value())
+
+			if backSearchIndex == *fp.inputIdx() {
+				// We've backed up the input idx to the point that the loop started.
+				// The loop is done.  Leave here without saving state.
+				//  Subsequent failures won't come back here.
+				break
+			}
+			// Set up for the next iteration of the loop, with input index
+			//   backed up by one from the last time through,
+			//   and a state save to this instruction in case the following code fails again.
+			//   (We're going backwards because this loop emulates stack unwinding, not
+			//    the initial scan forward.)
+
+			prevC := charAt(inputText, *fp.inputIdx()-1)
+			*fp.inputIdx()--
+			twoPrevC := charAt(inputText, *fp.inputIdx()-1)
+
+			if prevC == 0x0a &&
+				*fp.inputIdx() > backSearchIndex &&
+				twoPrevC == 0x0d {
+				prevOp := pat[*fp.patIdx()-2]
+				if prevOp.typ() == urxLoopDotI {
+					// .*, stepping back over CRLF pair.
+					*fp.inputIdx()--
+				}
+			}
+
+			fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()-1)
+			if err != nil {
+				return err
+			}
+		default:
+			// Trouble.  The compiled pattern contains an entry with an
+			//           unrecognized type tag.
+			panic("unreachable")
+		}
+	}
+
+breakFromLoop:
+	m.match = isMatch
+	if isMatch {
+		m.lastMatchEnd = m.matchEnd
+		m.matchStart = startIdx
+		m.matchEnd = *fp.inputIdx()
+	}
+
+	if m.dumper != nil {
+		if isMatch {
+			fmt.Fprintf(m.dumper, "Match.  start=%d   end=%d\n\n", m.matchStart, m.matchEnd)
+		} else {
+			fmt.Fprintf(m.dumper, "No match\n\n")
+		}
+	}
+
+	m.frame = fp // The active stack frame when the engine stopped.
+	//   Contains the capture group results that we need to
+	//    access later.
+	return nil
+}
+
+func charAt(str []rune, idx int) rune {
+	if idx >= 0 && idx < len(str) {
+		return str[idx]
+	}
+	return -1
+}
+
+func (m *Matcher) isWordBoundary(pos int) bool {
+	cIsWord := false
+
+	if pos >= m.lookLimit {
+		m.hitEnd = true
+	} else {
+		c := charAt(m.input, pos)
+		if uprops.HasBinaryProperty(c, uprops.UCharGraphemeExtend) || uchar.CharType(c) == uchar.FormatChar {
+			return false
+		}
+		cIsWord = staticPropertySets[urxIswordSet].ContainsRune(c)
+	}
+
+	prevCIsWord := false
+	for {
+		if pos <= m.lookStart {
+			break
+		}
+		prevChar := charAt(m.input, pos-1)
+		pos--
+		if !(uprops.HasBinaryProperty(prevChar, uprops.UCharGraphemeExtend) || uchar.CharType(prevChar) == uchar.FormatChar) {
+			prevCIsWord = staticPropertySets[urxIswordSet].ContainsRune(prevChar)
+			break
+		}
+	}
+	return cIsWord != prevCIsWord
+}
+
+func (m *Matcher) isUWordBoundary(pos int) bool {
+	// TODO: implement
+	/*
+		    UBool       returnVal = FALSE;
+
+		#if UCONFIG_NO_BREAK_ITERATION==0
+		    // Note: this point will never be reached if break iteration is configured out.
+		    //       Regex patterns that would require this function will fail to compile.
+
+		    // If we haven't yet created a break iterator for this matcher, do it now.
+		    if (fWordBreakItr == nullptr) {
+		        fWordBreakItr = BreakIterator::createWordInstance(Locale::getEnglish(), status);
+		        if (U_FAILURE(status)) {
+		            return FALSE;
+		        }
+		        fWordBreakItr->setText(fInputText, status);
+		    }
+
+		    // Note: zero width boundary tests like \b see through transparent region bounds,
+		    //       which is why fLookLimit is used here, rather than fActiveLimit.
+		    if (pos >= fLookLimit) {
+		        fHitEnd = TRUE;
+		        returnVal = TRUE;   // With Unicode word rules, only positions within the interior of "real"
+		                            //    words are not boundaries.  All non-word chars stand by themselves,
+		                            //    with word boundaries on both sides.
+		    } else {
+		        returnVal = fWordBreakItr->isBoundary((int32_t)pos);
+		    }
+		#endif
+		    return   returnVal;
+	*/
+	return false
+}
+
+func (m *Matcher) resetStack() stackFrame {
+	m.stack.reset()
+	frame, _ := m.stack.newFrame(0, nil, "")
+	frame.clearExtra()
+	return frame
+}
+
+func (m *Matcher) stateSave(inputIdx, savePatIdx int) (stackFrame, error) {
+	// push storage for a new frame.
+	newFP, err := m.stack.newFrame(inputIdx, m.input, m.pattern.pattern)
+	if err != nil {
+		return nil, err
+	}
+	fp := m.stack.prevFromTop()
+
+	// New stack frame = copy of old top frame.
+	copy(newFP, fp)
+
+	m.tickCounter--
+	if m.tickCounter <= 0 {
+		if err := m.incrementTime(*fp.inputIdx()); err != nil {
+			return nil, err
+		}
+	}
+	*fp.patIdx() = savePatIdx
+	return newFP, nil
+}
+
+func (m *Matcher) incrementTime(inputIdx int) error {
+	m.tickCounter = timerInitialValue
+	m.time++
+	if m.timeLimit > 0 && m.time >= m.timeLimit {
+		return &MatchError{
+			Code:     TimeOut,
+			Pattern:  m.pattern.pattern,
+			Position: inputIdx,
+			Input:    m.input,
+		}
+	}
+	return nil
+}
+
+func (m *Matcher) isDecimalDigit(c rune) bool {
+	return uchar.IsDigit(c)
+}
+
+func (m *Matcher) isHorizWS(c rune) bool {
+	return uchar.CharType(c) == uchar.SpaceSeparator || c == 9
+}
+
+func (m *Matcher) followingGCBoundary(pos int) int {
+	// TODO: implement
+	return pos
+	/*
+		// Note: this point will never be reached if break iteration is configured out.
+		//       Regex patterns that would require this function will fail to compile.
+
+		// If we haven't yet created a break iterator for this matcher, do it now.
+		if (m.gcBreakItr == nil) {
+			m.gcBreakItr = BreakIterator::createCharacterInstance(Locale::getEnglish(), status);
+			if (U_FAILURE(status)) {
+				return pos;
+			}
+			fGCBreakItr->setText(fInputText, status);
+		}
+		result = fGCBreakItr->following(pos);
+		if (result == BreakIterator::DONE) {
+			result = pos;
+		}
+	*/
+}
+
+func (m *Matcher) ResetString(input string) {
+	m.Reset([]rune(input))
+}
+
+func (m *Matcher) Reset(input []rune) {
+	m.input = input
+	m.reset()
+}
+
+func (m *Matcher) Matches() (bool, error) {
+	err := m.MatchAt(m.activeStart, true)
+	return m.match, err
+}
+
+func (m *Matcher) LookingAt() (bool, error) {
+	err := m.MatchAt(m.activeStart, false)
+	return m.match, err
+}
+
+func (m *Matcher) Find() (bool, error) {
+	startPos := m.matchEnd
+	if startPos == 0 {
+		startPos = m.activeStart
+	}
+
+	if m.match {
+		// Save the position of any previous successful match.
+		m.lastMatchEnd = m.matchEnd
+		if m.matchStart == m.matchEnd {
+			// Previous match had zero length.  Move start position up one position
+			//  to avoid sending find() into a loop on zero-length matches.
+			if startPos >= m.activeLimit {
+				m.match = false
+				m.hitEnd = true
+				return false, nil
+			}
+			startPos++
+		}
+	} else {
+		if m.lastMatchEnd >= 0 {
+			// A previous find() failed to match.  Don't try again.
+			//   (without this test, a pattern with a zero-length match
+			//    could match again at the end of an input string.)
+			m.hitEnd = true
+			return false, nil
+		}
+	}
+
+	testStartLimit := m.activeLimit - int(m.pattern.minMatchLen)
+	if startPos > testStartLimit {
+		m.match = false
+		m.hitEnd = true
+		return false, nil
+	}
+
+	switch m.pattern.startType {
+	case startNoInfo:
+		// No optimization was found.
+		//  Try a match at each input position.
+		for {
+			err := m.MatchAt(startPos, false)
+			if err != nil {
+				return false, err
+			}
+			if m.match {
+				return true, nil
+			}
+			if startPos >= testStartLimit {
+				m.hitEnd = true
+				return false, nil
+			}
+			startPos++
+		}
+	case startSet:
+		// Match may start on any char from a pre-computed set.
+		for {
+			pos := startPos
+			c := charAt(m.input, startPos)
+			startPos++
+			// c will be -1 (U_SENTINEL) at end of text, in which case we
+			// skip this next block (so we don't have a negative array index)
+			// and handle end of text in the following block.
+			if c >= 0 && m.pattern.initialChars.ContainsRune(c) {
+				err := m.MatchAt(pos, false)
+				if err != nil {
+					return false, err
+				}
+				if m.match {
+					return true, nil
+				}
+			}
+
+			if startPos > testStartLimit {
+				m.match = false
+				m.hitEnd = true
+				return false, nil
+			}
+		}
+	case startStart:
+		// Matches are only possible at the start of the input string
+		//   (pattern begins with ^ or \A)
+		if startPos > m.activeStart {
+			m.match = false
+			return false, nil
+		}
+		err := m.MatchAt(startPos, false)
+		return m.match, err
+	case startLine:
+		var ch rune
+		if startPos == m.anchorStart {
+			err := m.MatchAt(startPos, false)
+			if err != nil {
+				return false, err
+			}
+			if m.match {
+				return true, nil
+			}
+			ch = charAt(m.input, startPos)
+			startPos++
+		} else {
+			ch = charAt(m.input, startPos-1)
+		}
+
+		if m.pattern.flags&UnixLines != 0 {
+			for {
+				if ch == 0x0a {
+					err := m.MatchAt(startPos, false)
+					if err != nil {
+						return false, err
+					}
+					if m.match {
+						return true, nil
+					}
+				}
+				if startPos >= testStartLimit {
+					m.match = false
+					m.hitEnd = true
+					return false, nil
+				}
+				ch = charAt(m.input, startPos)
+				startPos++
+			}
+		} else {
+			for {
+				if isLineTerminator(ch) {
+					if ch == 0x0d && startPos < m.activeLimit && charAt(m.input, startPos) == 0x0a {
+						startPos++
+					}
+					err := m.MatchAt(startPos, false)
+					if err != nil {
+						return false, err
+					}
+					if m.match {
+						return true, nil
+					}
+				}
+				if startPos >= testStartLimit {
+					m.match = false
+					m.hitEnd = true
+					return false, nil
+				}
+				ch = charAt(m.input, startPos)
+				startPos++
+			}
+		}
+	case startChar, startString:
+		// Match starts on exactly one char.
+		theChar := m.pattern.initialChar
+		for {
+			pos := startPos
+			c := charAt(m.input, startPos)
+			startPos++
+			if c == theChar {
+				err := m.MatchAt(pos, false)
+				if err != nil {
+					return false, err
+				}
+				if m.match {
+					return true, nil
+				}
+			}
+			if startPos > testStartLimit {
+				m.match = false
+				m.hitEnd = true
+				return false, nil
+			}
+		}
+	default:
+		panic("unreachable")
+	}
+}
+
+func (m *Matcher) Start() int {
+	if !m.match {
+		return -1
+	}
+
+	return m.matchStart
+}
+
+func (m *Matcher) reset() {
+	m.regionStart = 0
+	m.regionLimit = len(m.input)
+	m.activeStart = 0
+	m.activeLimit = len(m.input)
+	m.anchorStart = 0
+	m.anchorLimit = len(m.input)
+	m.lookStart = 0
+	m.lookLimit = len(m.input)
+	m.resetPreserveRegion()
+}
+
+func (m *Matcher) resetPreserveRegion() {
+	m.matchStart = 0
+	m.matchEnd = 0
+	m.lastMatchEnd = -1
+	m.appendPosition = 0
+	m.match = false
+	m.hitEnd = false
+	m.requireEnd = false
+	m.time = 0
+	m.tickCounter = timerInitialValue
+}
+
+func (m *Matcher) GroupCount() int {
+	return len(m.pattern.groupMap)
+}
+
+func (m *Matcher) StartForGroup(group int) int {
+	if !m.match {
+		return -1
+	}
+	if group < 0 || group > len(m.pattern.groupMap) {
+		return -1
+	}
+	if group == 0 {
+		return m.matchStart
+	}
+	groupOffset := int(m.pattern.groupMap[group-1])
+	return *m.frame.extra(groupOffset)
+}
+
+func (m *Matcher) EndForGroup(group int) int {
+	if !m.match {
+		return -1
+	}
+	if group < 0 || group > len(m.pattern.groupMap) {
+		return -1
+	}
+	if group == 0 {
+		return m.matchEnd
+	}
+	groupOffset := int(m.pattern.groupMap[group-1])
+	return *m.frame.extra(groupOffset + 1)
+}
+
+func (m *Matcher) HitEnd() bool {
+	return m.hitEnd
+}
+
+func (m *Matcher) RequireEnd() bool {
+	return m.requireEnd
+}
+
+func (m *Matcher) Group(i int) (string, bool) {
+	start := m.StartForGroup(i)
+	end := m.EndForGroup(i)
+	if start == -1 || end == -1 {
+		return "", false
+	}
+	return string(m.input[start:end]), true
+}
+
+func (m *Matcher) End() int {
+	if !m.match {
+		return -1
+	}
+
+	return m.matchEnd
+}
+
+func (m *Matcher) Dumper(out io.Writer) {
+	m.dumper = out
+}
+
+// Test for any of the Unicode line terminating characters.
+func isLineTerminator(c rune) bool {
+	if (c & ^(0x0a | 0x0b | 0x0c | 0x0d | 0x85 | 0x2028 | 0x2029)) != 0 {
+		return false
+	}
+	return (c <= 0x0d && c >= 0x0a) || c == 0x85 || c == 0x2028 || c == 0x2029
+}
diff --git a/go/mysql/icuregex/ops.go b/go/mysql/icuregex/ops.go
new file mode 100644
index 00000000000..dbb83ee3d24
--- /dev/null
+++ b/go/mysql/icuregex/ops.go
@@ -0,0 +1,414 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex
+
+import (
+	"golang.org/x/exp/slices"
+
+	"vitess.io/vitess/go/mysql/icuregex/internal/ucase"
+	"vitess.io/vitess/go/mysql/icuregex/internal/utf16"
+)
+
+type opcode uint8
+
+const (
+	urxReservedOp opcode = iota // For multi-operand ops, most non-first words.
+	urxBacktrack                // Force a backtrack, as if a match test had failed.
+	urxEnd
+	urxOnechar   // Value field is the 21 bit unicode char to match
+	urxString    // Value field is index of string start
+	urxStringLen // Value field is string length (code units)
+	urxStateSave // Value field is pattern position to push
+	urxNop
+	urxStartCapture // Value field is capture group number.
+	urxEndCapture   // Value field is capture group number
+	urxStaticSetref // Value field is index of set in array of sets.
+	urxSetref       // Value field is index of set in array of sets.
+	urxDotany
+	urxJmp  // Value field is destination position in the pattern.
+	urxFail // Stop match operation,  No match.
+
+	urxJmpSav     // Operand:  JMP destination location
+	urxBackslashB // Value field:  0:  \b    1:  \B
+	urxBackslashG
+	urxJmpSavX // Conditional JMP_SAV,
+	//    Used in (x)+, breaks loop on zero length match.
+	//    Operand:  Jmp destination.
+	urxBackslashX
+	urxBackslashZ // \z   Unconditional end of line.
+
+	urxDotanyAll  // ., in the . matches any mode.
+	urxBackslashD // Value field:  0:  \d    1:  \D
+	urxCaret      // Value field:  1:  multi-line mode.
+	urxDollar     // Also for \Z
+
+	urxCtrInit   // Counter Inits for {Interval} loops.
+	urxCtrInitNg //   2 kinds, normal and non-greedy.
+	//   These are 4 word opcodes.  See description.
+	//    First Operand:  Data loc of counter variable
+	//    2nd   Operand:  Pat loc of the URX_CTR_LOOPx
+	//                    at the end of the loop.
+	//    3rd   Operand:  Minimum count.
+	//    4th   Operand:  Max count, -1 for unbounded.
+
+	urxDotanyUnix // '.' operator in UNIX_LINES mode, only \n marks end of line.
+
+	utxCtrLoop   // Loop Ops for {interval} loops.
+	urxCtrLoopNg //   Also in three flavors.
+	//   Operand is loc of corresponding CTR_INIT.
+
+	urxCaretMUnix // '^' operator, test for start of line in multi-line
+	//      plus UNIX_LINES mode.
+
+	urxRelocOprnd // Operand value in multi-operand ops that refers
+	//   back into compiled pattern code, and thus must
+	//   be relocated when inserting/deleting ops in code.
+
+	urxStoSp // Store the stack ptr.  Operand is location within
+	//   matcher data (not stack data) to store it.
+	urxLdSp // Load the stack pointer.  Operand is location
+	//   to load from.
+	urxBackref // Back Reference.  Parameter is the index of the
+	//   capture group variables in the state stack frame.
+	urxStoInpLoc // Store the input location.  Operand is location
+	//   within the matcher stack frame.
+	urxJmpx // Conditional JMP.
+	//   First Operand:  JMP target location.
+	//   Second Operand:  Data location containing an
+	//     input position.  If current input position ==
+	//     saved input position, FAIL rather than taking
+	//     the JMP
+	urxLaStart // Starting a LookAround expression.
+	//   Save InputPos, SP and active region in static data.
+	//   Operand:  Static data offset for the save
+	urxLaEnd // Ending a Lookaround expression.
+	//   Restore InputPos and Stack to saved values.
+	//   Operand:  Static data offset for saved data.
+	urcOnecharI // Test for case-insensitive match of a literal character.
+	//   Operand:  the literal char.
+	urxStringI // Case insensitive string compare.
+	//   First Operand:  Index of start of string in string literals
+	//   Second Operand (next word in compiled code):
+	//     the length of the string.
+	urxBackrefI // Case insensitive back reference.
+	//   Parameter is the index of the
+	//   capture group variables in the state stack frame.
+	urxDollarM // $ in multi-line mode.
+	urxCaretM  // ^ in multi-line mode.
+	urxLbStart // LookBehind Start.
+	//   Parameter is data location
+	urxLbCont // LookBehind Continue.
+	//   Param 0:  the data location
+	//   Param 1:  The minimum length of the look-behind match
+	//   Param 2:  The max length of the look-behind match
+	urxLbEnd // LookBehind End.
+	//   Parameter is the data location.
+	//     Check that match ended at the right spot,
+	//     Restore original input string len.
+	urxLbnCount // Negative LookBehind Continue
+	//   Param 0:  the data location
+	//   Param 1:  The minimum length of the look-behind match
+	//   Param 2:  The max     length of the look-behind match
+	//   Param 3:  The pattern loc following the look-behind block.
+	urxLbnEnd // Negative LookBehind end
+	//   Parameter is the data location.
+	//   Check that the match ended at the right spot.
+	urxStatSetrefN // Reference to a prebuilt set (e.g. \w), negated
+	//   Operand is index of set in array of sets.
+	urxLoopSrI // Init a [set]* loop.
+	//   Operand is the sets index in array of user sets.
+	urxLoopC // Continue a [set]* or OneChar* loop.
+	//   Operand is a matcher static data location.
+	//   Must always immediately follow  LOOP_x_I instruction.
+	urxLoopDotI // .*, initialization of the optimized loop.
+	//   Operand value:
+	//      bit 0:
+	//         0:  Normal (. doesn't match new-line) mode.
+	//         1:  . matches new-line mode.
+	//      bit 1:  controls what new-lines are recognized by this operation.
+	//         0:  All Unicode New-lines
+	//         1:  UNIX_LINES, \u000a only.
+	urxBackslashBu // \b or \B in UREGEX_UWORD mode, using Unicode style
+	//   word boundaries.
+	urxDollarD    // $ end of input test, in UNIX_LINES mode.
+	urxDollarMd   // $ end of input test, in MULTI_LINE and UNIX_LINES mode.
+	urxBackslashH // Value field:  0:  \h    1:  \H
+	urxBackslashR // Any line break sequence.
+	urxBackslashV // Value field:  0:  \v    1:  \V
+
+	urxReservedOpN opcode = 255 // For multi-operand ops, negative operand values.
+)
+
+// Keep this list of opcode names in sync with the above enum
+//
+//	Used for debug printing only.
+var urxOpcodeNames = []string{
+	"               ",
+	"BACKTRACK",
+	"END",
+	"ONECHAR",
+	"STRING",
+	"STRING_LEN",
+	"STATE_SAVE",
+	"NOP",
+	"START_CAPTURE",
+	"END_CAPTURE",
+	"URX_STATIC_SETREF",
+	"SETREF",
+	"DOTANY",
+	"JMP",
+	"FAIL",
+	"JMP_SAV",
+	"BACKSLASH_B",
+	"BACKSLASH_G",
+	"JMP_SAV_X",
+	"BACKSLASH_X",
+	"BACKSLASH_Z",
+	"DOTANY_ALL",
+	"BACKSLASH_D",
+	"CARET",
+	"DOLLAR",
+	"CTR_INIT",
+	"CTR_INIT_NG",
+	"DOTANY_UNIX",
+	"CTR_LOOP",
+	"CTR_LOOP_NG",
+	"URX_CARET_M_UNIX",
+	"RELOC_OPRND",
+	"STO_SP",
+	"LD_SP",
+	"BACKREF",
+	"STO_INP_LOC",
+	"JMPX",
+	"LA_START",
+	"LA_END",
+	"ONECHAR_I",
+	"STRING_I",
+	"BACKREF_I",
+	"DOLLAR_M",
+	"CARET_M",
+	"LB_START",
+	"LB_CONT",
+	"LB_END",
+	"LBN_CONT",
+	"LBN_END",
+	"STAT_SETREF_N",
+	"LOOP_SR_I",
+	"LOOP_C",
+	"LOOP_DOT_I",
+	"BACKSLASH_BU",
+	"DOLLAR_D",
+	"DOLLAR_MD",
+	"URX_BACKSLASH_H",
+	"URX_BACKSLASH_R",
+	"URX_BACKSLASH_V",
+}
+
+type instruction int32
+
+func (ins instruction) typ() opcode {
+	return opcode(uint32(ins) >> 24)
+}
+
+func (ins instruction) value32() int32 {
+	return int32(ins) & 0xffffff
+}
+
+func (ins instruction) value() int {
+	return int(ins.value32())
+}
+
+// Access to Unicode Sets composite character properties
+//
+//	The sets are accessed by the match engine for things like \w (word boundary)
+const (
+	urxIswordSet  = 1
+	urxIsalnumSet = 2
+	urxIsalphaSet = 3
+	urxIsspaceSet = 4
+
+	urxGcNormal = iota + 1 // Sets for finding grapheme cluster boundaries.
+	urxGcExtend
+	urxGcControl
+	urxGcL
+	urxGcLv
+	urxGcLvt
+	urxGcV
+	urxGcT
+
+	urxNegSet = 0x800000 // Flag bit to reverse sense of set
+	//   membership test.
+)
+
+type stack struct {
+	ary        []int
+	frameSize  int
+	stackLimit int
+}
+
+type stackFrame []int
+
+func (f stackFrame) inputIdx() *int {
+	return &f[0]
+}
+
+func (f stackFrame) patIdx() *int {
+	return &f[1]
+}
+
+func (f stackFrame) extra(n int) *int {
+	return &f[2+n]
+}
+
+func (f stackFrame) equals(f2 stackFrame) bool {
+	return &f[0] == &f2[0]
+}
+
+func (s *stack) len() int {
+	return len(s.ary)
+}
+
+func (s *stack) sp() int {
+	return len(s.ary) - s.frameSize
+}
+
+func (s *stack) newFrame(inputIdx int, input []rune, pattern string) (stackFrame, error) {
+	if s.stackLimit != 0 && len(s.ary)+s.frameSize > s.stackLimit {
+		return nil, &MatchError{
+			Code:     StackOverflow,
+			Pattern:  pattern,
+			Position: inputIdx,
+			Input:    input,
+		}
+	}
+	s.ary = slices.Grow(s.ary, s.frameSize)
+
+	f := s.ary[len(s.ary) : len(s.ary)+s.frameSize]
+	s.ary = s.ary[:len(s.ary)+s.frameSize]
+	return f, nil
+}
+
+func (s *stack) prevFromTop() stackFrame {
+	return s.ary[len(s.ary)-2*s.frameSize:]
+}
+
+func (s *stack) popFrame() stackFrame {
+	s.ary = s.ary[:len(s.ary)-s.frameSize]
+	return s.ary[len(s.ary)-s.frameSize:]
+}
+
+func (s *stack) reset() {
+	s.ary = s.ary[:0]
+}
+
+func (s *stack) offset(size int) stackFrame {
+	return s.ary[size-s.frameSize : size]
+}
+
+func (s *stack) setSize(size int) {
+	s.ary = s.ary[:size]
+}
+
+func (f stackFrame) clearExtra() {
+	for i := 2; i < len(f); i++ {
+		f[i] = -1
+	}
+}
+
+// number of UVector elements in the header
+const restackframeHdrCount = 2
+
+// Start-Of-Match type.  Used by find() to quickly scan to positions where a
+//
+//	match might start before firing up the full match engine.
+type startOfMatch int8
+
+const (
+	startNoInfo startOfMatch = iota // No hint available.
+	startChar                       // Match starts with a literal code point.
+	startSet                        // Match starts with something matching a set.
+	startStart                      // Match starts at start of buffer only (^ or \A)
+	startLine                       // Match starts with ^ in multi-line mode.
+	startString                     // Match starts with a literal string.
+)
+
+func (som startOfMatch) String() string {
+	switch som {
+	case startNoInfo:
+		return "START_NO_INFO"
+	case startChar:
+		return "START_CHAR"
+	case startSet:
+		return "START_SET"
+	case startStart:
+		return "START_START"
+	case startLine:
+		return "START_LINE"
+	case startString:
+		return "START_STRING"
+	default:
+		panic("unknown StartOfMatch")
+	}
+}
+
+type caseFoldIterator struct {
+	chars []rune
+	index int
+	limit int
+
+	foldChars []uint16
+}
+
+func (it *caseFoldIterator) next() rune {
+	if len(it.foldChars) == 0 {
+		// We are not in a string folding of an earlier character.
+		// Start handling the next char from the input UText.
+		if it.index >= it.limit {
+			return -1
+		}
+
+		originalC := it.chars[it.index]
+		it.index++
+
+		originalC, it.foldChars = ucase.FullFolding(originalC)
+		if len(it.foldChars) == 0 {
+			// input code point folds to a single code point, possibly itself.
+			return originalC
+		}
+	}
+
+	var res rune
+	res, it.foldChars = utf16.NextUnsafe(it.foldChars)
+	return res
+}
+
+func (it *caseFoldIterator) inExpansion() bool {
+	return len(it.foldChars) > 0
+}
+
+func newCaseFoldIterator(chars []rune, start, limit int) caseFoldIterator {
+	return caseFoldIterator{
+		chars: chars,
+		index: start,
+		limit: limit,
+	}
+}
diff --git a/go/mysql/icuregex/pattern.go b/go/mysql/icuregex/pattern.go
new file mode 100644
index 00000000000..f0823a213d4
--- /dev/null
+++ b/go/mysql/icuregex/pattern.go
@@ -0,0 +1,149 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex
+
+import (
+	"vitess.io/vitess/go/mysql/icuregex/internal/uset"
+)
+
+type Pattern struct {
+	pattern string
+	flags   RegexpFlag
+
+	compiledPat []instruction
+	literalText []rune
+
+	sets []*uset.UnicodeSet
+
+	minMatchLen int32
+	frameSize   int
+	dataSize    int
+
+	groupMap []int32
+
+	startType        startOfMatch
+	initialStringIdx int
+	initialStringLen int
+	initialChars     *uset.UnicodeSet
+	initialChar      rune
+	needsAltInput    bool
+
+	namedCaptureMap map[string]int
+}
+
+func NewPattern(flags RegexpFlag) *Pattern {
+	return &Pattern{
+		flags:        flags,
+		initialChars: uset.New(),
+		// Slot zero of the vector of sets is reserved.  Fill it here.
+		sets: []*uset.UnicodeSet{nil},
+	}
+}
+
+func MustCompileString(in string, flags RegexpFlag) *Pattern {
+	pat, err := CompileString(in, flags)
+	if err != nil {
+		panic(err)
+	}
+	return pat
+}
+
+func Compile(in []rune, flags RegexpFlag) (*Pattern, error) {
+	pat := NewPattern(flags)
+	cmp := newCompiler(pat)
+	if err := cmp.compile(in); err != nil {
+		return nil, err
+	}
+	return pat, nil
+}
+
+func CompileString(in string, flags RegexpFlag) (*Pattern, error) {
+	pat := NewPattern(flags)
+	cmp := newCompiler(pat)
+	if err := cmp.compile([]rune(in)); err != nil {
+		return nil, err
+	}
+	return pat, nil
+}
+
+func (p *Pattern) Match(input string) *Matcher {
+	m := NewMatcher(p)
+	m.ResetString(input)
+	return m
+}
+
+type RegexpFlag int32
+
+const (
+	/**  Enable case insensitive matching.  @stable ICU 2.4 */
+	CaseInsensitive RegexpFlag = 2
+
+	/**  Allow white space and comments within patterns  @stable ICU 2.4 */
+	Comments RegexpFlag = 4
+
+	/**  If set, '.' matches line terminators,  otherwise '.' matching stops at line end.
+	 *  @stable ICU 2.4 */
+	DotAll RegexpFlag = 32
+
+	/**  If set, treat the entire pattern as a literal string.
+	 *  Metacharacters or escape sequences in the input sequence will be given
+	 *  no special meaning.
+	 *
+	 *  The flag UREGEX_CASE_INSENSITIVE retains its impact
+	 *  on matching when used in conjunction with this flag.
+	 *  The other flags become superfluous.
+	 *
+	 * @stable ICU 4.0
+	 */
+	Literal RegexpFlag = 16
+
+	/**   Control behavior of "$" and "^"
+	 *    If set, recognize line terminators within string,
+	 *    otherwise, match only at start and end of input string.
+	 *   @stable ICU 2.4 */
+	Multiline RegexpFlag = 8
+
+	/**   Unix-only line endings.
+	 *   When this mode is enabled, only \\u000a is recognized as a line ending
+	 *    in the behavior of ., ^, and $.
+	 *   @stable ICU 4.0
+	 */
+	UnixLines RegexpFlag = 1
+
+	/**  Unicode word boundaries.
+	 *     If set, \b uses the Unicode TR 29 definition of word boundaries.
+	 *     Warning: Unicode word boundaries are quite different from
+	 *     traditional regular expression word boundaries.  See
+	 *     http://unicode.org/reports/tr29/#Word_Boundaries
+	 *     @stable ICU 2.8
+	 */
+	UWord RegexpFlag = 256
+
+	/**  Error on Unrecognized backslash escapes.
+	 *     If set, fail with an error on patterns that contain
+	 *     backslash-escaped ASCII letters without a known special
+	 *     meaning.  If this flag is not set, these
+	 *     escaped letters represent themselves.
+	 *     @stable ICU 4.0
+	 */
+	ErrorOnUnknownEscapes RegexpFlag = 512
+)
diff --git a/go/mysql/icuregex/perl_test.go b/go/mysql/icuregex/perl_test.go
new file mode 100644
index 00000000000..0e7beda9fbd
--- /dev/null
+++ b/go/mysql/icuregex/perl_test.go
@@ -0,0 +1,216 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex
+
+import (
+	"bufio"
+	"os"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+func TestPerl(t *testing.T) {
+	f, err := os.Open("testdata/re_tests.txt")
+	if err != nil {
+		t.Fatalf("failed to open test data: %v", err)
+	}
+	defer f.Close()
+
+	flagPat := MustCompileString(`('?)(.*)\1(.*)`, 0)
+	flagMat := NewMatcher(flagPat)
+
+	groupsPat := MustCompileString(`\$([+\-])\[(\d+)\]`, 0)
+	groupsMat := NewMatcher(groupsPat)
+
+	cgPat := MustCompileString(`\$(\d+)`, 0)
+	cgMat := NewMatcher(cgPat)
+
+	group := func(m *Matcher, idx int) string {
+		g, _ := m.Group(idx)
+		return g
+	}
+
+	lookingAt := func(m *Matcher) bool {
+		ok, err := m.LookingAt()
+		if err != nil {
+			t.Fatalf("failed to match with LookingAt(): %v", err)
+		}
+		return ok
+	}
+
+	replacer := strings.NewReplacer(
+		`${bang}`, "!",
+		`${nulnul}`, "\x00\x00",
+		`${ffff}`, "\uffff",
+	)
+
+	scanner := bufio.NewScanner(f)
+	var lineno int
+
+	for scanner.Scan() {
+		lineno++
+		fields := strings.Split(scanner.Text(), "\t")
+
+		flagMat.ResetString(fields[0])
+		ok, _ := flagMat.Matches()
+		if !ok {
+			t.Fatalf("could not match pattern+flags (line %d)", lineno)
+		}
+
+		pattern, _ := flagMat.Group(2)
+		pattern = replacer.Replace(pattern)
+
+		flagStr, _ := flagMat.Group(3)
+		var flags RegexpFlag
+		if strings.IndexByte(flagStr, 'i') >= 0 {
+			flags |= CaseInsensitive
+		}
+		if strings.IndexByte(flagStr, 'm') >= 0 {
+			flags |= Multiline
+		}
+		if strings.IndexByte(flagStr, 'x') >= 0 {
+			flags |= Comments
+		}
+
+		testPat, err := CompileString(pattern, flags)
+		if err != nil {
+			if cerr, ok := err.(*CompileError); ok && cerr.Code == Unimplemented {
+				continue
+			}
+			if strings.IndexByte(fields[2], 'c') == -1 && strings.IndexByte(fields[2], 'i') == -1 {
+				t.Errorf("line %d: ICU error %q", lineno, err)
+			}
+			continue
+		}
+
+		if strings.IndexByte(fields[2], 'i') >= 0 {
+			continue
+		}
+		if strings.IndexByte(fields[2], 'c') >= 0 {
+			t.Errorf("line %d: expected error", lineno)
+			continue
+		}
+
+		matchString := fields[1]
+		matchString = replacer.Replace(matchString)
+		matchString = strings.ReplaceAll(matchString, `\n`, "\n")
+
+		testMat := testPat.Match(matchString)
+		found, _ := testMat.Find()
+		expected := strings.IndexByte(fields[2], 'y') >= 0
+
+		if expected != found {
+			t.Errorf("line %d: expected %v, found %v", lineno, expected, found)
+			continue
+		}
+
+		if !found {
+			continue
+		}
+
+		var result []byte
+		var perlExpr = fields[3]
+
+		for len(perlExpr) > 0 {
+			groupsMat.ResetString(perlExpr)
+			cgMat.ResetString(perlExpr)
+
+			switch {
+			case strings.HasPrefix(perlExpr, "$&"):
+				result = append(result, group(testMat, 0)...)
+				perlExpr = perlExpr[2:]
+
+			case lookingAt(groupsMat):
+				groupNum, err := strconv.ParseInt(group(groupsMat, 2), 10, 32)
+				if err != nil {
+					t.Fatalf("failed to parse Perl pattern: %v", err)
+				}
+
+				var matchPosition int
+				if group(groupsMat, 1) == "+" {
+					matchPosition = testMat.EndForGroup(int(groupNum))
+				} else {
+					matchPosition = testMat.StartForGroup(int(groupNum))
+				}
+				if matchPosition != -1 {
+					result = strconv.AppendInt(result, int64(matchPosition), 10)
+				}
+
+				perlExpr = perlExpr[groupsMat.EndForGroup(0):]
+
+			case lookingAt(cgMat):
+				groupNum, err := strconv.ParseInt(group(cgMat, 1), 10, 32)
+				if err != nil {
+					t.Fatalf("failed to parse Perl pattern: %v", err)
+				}
+				result = append(result, group(testMat, int(groupNum))...)
+				perlExpr = perlExpr[cgMat.EndForGroup(0):]
+
+			case strings.HasPrefix(perlExpr, "@-"):
+				for i := 0; i <= testMat.GroupCount(); i++ {
+					if i > 0 {
+						result = append(result, ' ')
+					}
+					result = strconv.AppendInt(result, int64(testMat.StartForGroup(i)), 10)
+				}
+				perlExpr = perlExpr[2:]
+
+			case strings.HasPrefix(perlExpr, "@+"):
+				for i := 0; i <= testMat.GroupCount(); i++ {
+					if i > 0 {
+						result = append(result, ' ')
+					}
+					result = strconv.AppendInt(result, int64(testMat.EndForGroup(i)), 10)
+				}
+				perlExpr = perlExpr[2:]
+
+			case strings.HasPrefix(perlExpr, "\\"):
+				if len(perlExpr) > 1 {
+					perlExpr = perlExpr[1:]
+				}
+				c := perlExpr[0]
+				switch c {
+				case 'n':
+					c = '\n'
+				}
+				result = append(result, c)
+				perlExpr = perlExpr[1:]
+
+			default:
+				result = append(result, perlExpr[0])
+				perlExpr = perlExpr[1:]
+			}
+		}
+
+		var expectedS string
+		if len(fields) > 4 {
+			expectedS = fields[4]
+			expectedS = replacer.Replace(expectedS)
+			expectedS = strings.ReplaceAll(expectedS, `\n`, "\n")
+		}
+
+		if expectedS != string(result) {
+			t.Errorf("line %d: Incorrect Perl expression results for %s\nwant: %q\ngot: %q", lineno, pattern, expectedS, result)
+		}
+	}
+}
diff --git a/go/mysql/icuregex/sets.go b/go/mysql/icuregex/sets.go
new file mode 100644
index 00000000000..0f745b3374d
--- /dev/null
+++ b/go/mysql/icuregex/sets.go
@@ -0,0 +1,104 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex
+
+import (
+	"vitess.io/vitess/go/mysql/icuregex/internal/uprops"
+	"vitess.io/vitess/go/mysql/icuregex/internal/uset"
+)
+
+var staticPropertySets [13]*uset.UnicodeSet
+
+func init() {
+	staticPropertySets[urxIswordSet] = func() *uset.UnicodeSet {
+		s := uset.New()
+		s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{Alphabetic}`, 0))
+		s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{M}`, 0))
+		s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{Nd}`, 0))
+		s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{Pc}`, 0))
+		s.AddRune(0x200c)
+		s.AddRune(0x200d)
+		return s.Freeze()
+	}()
+
+	staticPropertySets[urxIsspaceSet] = uprops.MustNewUnicodeSetFomPattern(`\p{Whitespace}`, 0).Freeze()
+
+	staticPropertySets[urxGcExtend] = uprops.MustNewUnicodeSetFomPattern(`\p{Grapheme_Extend}`, 0).Freeze()
+	staticPropertySets[urxGcControl] = func() *uset.UnicodeSet {
+		s := uset.New()
+		s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Zl:]`, 0))
+		s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Zp:]`, 0))
+		s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Cc:]`, 0))
+		s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Cf:]`, 0))
+		s.RemoveAll(uprops.MustNewUnicodeSetFomPattern(`[:Grapheme_Extend:]`, 0))
+		return s.Freeze()
+	}()
+	staticPropertySets[urxGcL] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=L}`, 0).Freeze()
+	staticPropertySets[urxGcLv] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=LV}`, 0).Freeze()
+	staticPropertySets[urxGcLvt] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=LVT}`, 0).Freeze()
+	staticPropertySets[urxGcV] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=V}`, 0).Freeze()
+	staticPropertySets[urxGcT] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=T}`, 0).Freeze()
+
+	staticPropertySets[urxGcNormal] = func() *uset.UnicodeSet {
+		s := uset.New()
+		s.Complement()
+		s.RemoveRuneRange(0xac00, 0xd7a4)
+		s.RemoveAll(staticPropertySets[urxGcControl])
+		s.RemoveAll(staticPropertySets[urxGcL])
+		s.RemoveAll(staticPropertySets[urxGcV])
+		s.RemoveAll(staticPropertySets[urxGcT])
+		return s.Freeze()
+	}()
+}
+
+var staticSetUnescape = func() *uset.UnicodeSet {
+	u := uset.New()
+	u.AddString("acefnrtuUx")
+	return u.Freeze()
+}()
+
+const (
+	ruleSetDigitChar   = 128
+	ruleSetASCIILetter = 129
+	ruleSetRuleChar    = 130
+	ruleSetCount       = 131 - 128
+)
+
+var staticRuleSet = [ruleSetCount]*uset.UnicodeSet{
+	func() *uset.UnicodeSet {
+		u := uset.New()
+		u.AddRuneRange('0', '9')
+		return u.Freeze()
+	}(),
+	func() *uset.UnicodeSet {
+		u := uset.New()
+		u.AddRuneRange('A', 'Z')
+		u.AddRuneRange('a', 'z')
+		return u.Freeze()
+	}(),
+	func() *uset.UnicodeSet {
+		u := uset.New()
+		u.AddString("*?+[(){}^$|\\.")
+		u.Complement()
+		return u.Freeze()
+	}(),
+}
diff --git a/go/mysql/icuregex/sets_test.go b/go/mysql/icuregex/sets_test.go
new file mode 100644
index 00000000000..d33552732f2
--- /dev/null
+++ b/go/mysql/icuregex/sets_test.go
@@ -0,0 +1,66 @@
+/*
+© 2016 and later: Unicode, Inc. and others.
+Copyright (C) 2004-2015, International Business Machines Corporation and others.
+Copyright 2023 The Vitess Authors.
+
+This file contains code derived from the Unicode Project's ICU library.
+License & terms of use for the original code: http://www.unicode.org/copyright.html
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package icuregex
+
+import (
+	"testing"
+)
+
+func TestStaticSetContents(t *testing.T) {
+	// These are the number of codepoints contained in each of the static sets as of ICU69-1,
+	// as to sanity check that we're re-creating the sets properly.
+	// This table must be re-created when updating Unicode versions.
+	var ExpectedSetSizes = map[int]int{
+		1:  134564,
+		4:  25,
+		5:  1102451,
+		6:  1979,
+		7:  131,
+		8:  125,
+		9:  399,
+		10: 10773,
+		11: 95,
+		12: 137,
+	}
+
+	for setid, expected := range ExpectedSetSizes {
+		if got := staticPropertySets[setid].Len(); got != expected {
+			t.Fatalf("static set [%d] has wrong size: got %d, expected %d", setid, got, expected)
+		}
+	}
+}
+
+func TestStaticFreeze(t *testing.T) {
+	for _, s := range staticPropertySets {
+		if err := s.FreezeCheck_(); err != nil {
+			t.Error(err)
+		}
+	}
+	for _, s := range staticRuleSet {
+		if err := s.FreezeCheck_(); err != nil {
+			t.Error(err)
+		}
+	}
+	if err := staticSetUnescape.FreezeCheck_(); err != nil {
+		t.Error(err)
+	}
+}
diff --git a/go/mysql/icuregex/testdata/re_tests.txt b/go/mysql/icuregex/testdata/re_tests.txt
new file mode 100644
index 00000000000..c18b638f9b3
--- /dev/null
+++ b/go/mysql/icuregex/testdata/re_tests.txt
@@ -0,0 +1,923 @@
+abc	abc	y	$&	abc
+abc	abc	y	$-[0]	0
+abc	abc	y	$+[0]	3
+abc	xbc	n	-	-
+abc	axc	n	-	-
+abc	abx	n	-	-
+abc	xabcy	y	$&	abc
+abc	xabcy	y	$-[0]	1
+abc	xabcy	y	$+[0]	4
+abc	ababc	y	$&	abc
+abc	ababc	y	$-[0]	2
+abc	ababc	y	$+[0]	5
+ab*c	abc	y	$&	abc
+ab*c	abc	y	$-[0]	0
+ab*c	abc	y	$+[0]	3
+ab*bc	abc	y	$&	abc
+ab*bc	abc	y	$-[0]	0
+ab*bc	abc	y	$+[0]	3
+ab*bc	abbc	y	$&	abbc
+ab*bc	abbc	y	$-[0]	0
+ab*bc	abbc	y	$+[0]	4
+ab*bc	abbbbc	y	$&	abbbbc
+ab*bc	abbbbc	y	$-[0]	0
+ab*bc	abbbbc	y	$+[0]	6
+.{1}	abbbbc	y	$&	a
+.{1}	abbbbc	y	$-[0]	0
+.{1}	abbbbc	y	$+[0]	1
+.{3,4}	abbbbc	y	$&	abbb
+.{3,4}	abbbbc	y	$-[0]	0
+.{3,4}	abbbbc	y	$+[0]	4
+ab{0,}bc	abbbbc	y	$&	abbbbc
+ab{0,}bc	abbbbc	y	$-[0]	0
+ab{0,}bc	abbbbc	y	$+[0]	6
+ab+bc	abbc	y	$&	abbc
+ab+bc	abbc	y	$-[0]	0
+ab+bc	abbc	y	$+[0]	4
+ab+bc	abc	n	-	-
+ab+bc	abq	n	-	-
+ab{1,}bc	abq	n	-	-
+ab+bc	abbbbc	y	$&	abbbbc
+ab+bc	abbbbc	y	$-[0]	0
+ab+bc	abbbbc	y	$+[0]	6
+ab{1,}bc	abbbbc	y	$&	abbbbc
+ab{1,}bc	abbbbc	y	$-[0]	0
+ab{1,}bc	abbbbc	y	$+[0]	6
+ab{1,3}bc	abbbbc	y	$&	abbbbc
+ab{1,3}bc	abbbbc	y	$-[0]	0
+ab{1,3}bc	abbbbc	y	$+[0]	6
+ab{3,4}bc	abbbbc	y	$&	abbbbc
+ab{3,4}bc	abbbbc	y	$-[0]	0
+ab{3,4}bc	abbbbc	y	$+[0]	6
+ab{4,5}bc	abbbbc	n	-	-
+ab?bc	abbc	y	$&	abbc
+ab?bc	abc	y	$&	abc
+ab{0,1}bc	abc	y	$&	abc
+ab?bc	abbbbc	n	-	-
+ab?c	abc	y	$&	abc
+ab{0,1}c	abc	y	$&	abc
+^abc$	abc	y	$&	abc
+^abc$	abcc	n	-	-
+^abc	abcc	y	$&	abc
+^abc$	aabc	n	-	-
+abc$	aabc	y	$&	abc
+abc$	aabcd	n	-	-
+^	abc	y	$&
+$	abc	y	$&
+a.c	abc	y	$&	abc
+a.c	axc	y	$&	axc
+a.*c	axyzc	y	$&	axyzc
+a.*c	axyzd	n	-	-
+a[bc]d	abc	n	-	-
+a[bc]d	abd	y	$&	abd
+a[b-d]e	abd	n	-	-
+a[b-d]e	ace	y	$&	ace
+a[b-d]	aac	y	$&	ac
+a[-b]	a-	y	$&	a-
+a[b-]	a-	y	$&	a-
+a[b-a]	-	c	-	Invalid [] range "b-a"
+a[]b	-	ci	-	Unmatched [
+a[	-	c	-	Unmatched [
+a]	a]	y	$&	a]
+a[]]b	a]b	y	$&	a]b
+a[^bc]d	aed	y	$&	aed
+a[^bc]d	abd	n	-	-
+a[^-b]c	adc	y	$&	adc
+a[^-b]c	a-c	n	-	-
+a[^]b]c	a]c	n	-	-
+a[^]b]c	adc	y	$&	adc
+\ba\b	a-	y	-	-
+\ba\b	-a	y	-	-
+\ba\b	-a-	y	-	-
+\by\b	xy	n	-	-
+\by\b	yz	n	-	-
+\by\b	xyz	n	-	-
+\Ba\B	a-	n	-	-
+\Ba\B	-a	n	-	-
+\Ba\B	-a-	n	-	-
+\By\b	xy	y	-	-
+\By\b	xy	y	$-[0]	1
+\By\b	xy	y	$+[0]	2
+\By\b	xy	y	-	-
+\by\B	yz	y	-	-
+\By\B	xyz	y	-	-
+\w	a	y	-	-
+\w	-	n	-	-
+\W	a	n	-	-
+\W	-	y	-	-
+a\sb	a b	y	-	-
+a\sb	a-b	n	-	-
+a\Sb	a b	n	-	-
+a\Sb	a-b	y	-	-
+\d	1	y	-	-
+\d	-	n	-	-
+\D	1	n	-	-
+\D	-	y	-	-
+[\w]	a	y	-	-
+[\w]	-	n	-	-
+[\W]	a	n	-	-
+[\W]	-	y	-	-
+a[\s]b	a b	y	-	-
+a[\s]b	a-b	n	-	-
+a[\S]b	a b	n	-	-
+a[\S]b	a-b	y	-	-
+[\d]	1	y	-	-
+[\d]	-	n	-	-
+[\D]	1	n	-	-
+[\D]	-	y	-	-
+ab|cd	abc	y	$&	ab
+ab|cd	abcd	y	$&	ab
+()ef	def	y	$&-$1	ef-
+()ef	def	y	$-[0]	1
+()ef	def	y	$+[0]	3
+()ef	def	y	$-[1]	1
+()ef	def	y	$+[1]	1
+*a	-	c	-	Quantifier follows nothing
+(*)b	-	c	-	Quantifier follows nothing
+$b	b	n	-	-
+a\	-	c	-	Search pattern not terminated
+a\(b	a(b	y	$&-$1	a(b-
+a\(*b	ab	y	$&	ab
+a\(*b	a((b	y	$&	a((b
+a\\b	a\b	y	$&	a\b
+abc)	-	c	-	Unmatched )
+(abc	-	c	-	Unmatched (
+((a))	abc	y	$&-$1-$2	a-a-a
+((a))	abc	y	$-[0]-$-[1]-$-[2]	0-0-0
+((a))	abc	y	$+[0]-$+[1]-$+[2]	1-1-1
+((a))	abc	by	@-	0 0 0
+((a))	abc	by	@+	1 1 1
+(a)b(c)	abc	y	$&-$1-$2	abc-a-c
+(a)b(c)	abc	y	$-[0]-$-[1]-$-[2]	0-0-2
+(a)b(c)	abc	y	$+[0]-$+[1]-$+[2]	3-1-3
+a+b+c	aabbabc	y	$&	abc
+a{1,}b{1,}c	aabbabc	y	$&	abc
+a**	-	c	-	Nested quantifiers
+a.+?c	abcabc	y	$&	abc
+(a+|b)*	ab	y	$&-$1	ab-b
+(a+|b)*	ab	y	$-[0]	0
+(a+|b)*	ab	y	$+[0]	2
+(a+|b)*	ab	y	$-[1]	1
+(a+|b)*	ab	y	$+[1]	2
+(a+|b){0,}	ab	y	$&-$1	ab-b
+(a+|b)+	ab	y	$&-$1	ab-b
+(a+|b){1,}	ab	y	$&-$1	ab-b
+(a+|b)?	ab	y	$&-$1	a-a
+(a+|b){0,1}	ab	y	$&-$1	a-a
+)(	-	c	-	Unmatched )
+[^ab]*	cde	y	$&	cde
+abc		n	-	-
+a*		y	$&
+([abc])*d	abbbcd	y	$&-$1	abbbcd-c
+([abc])*bcd	abcd	y	$&-$1	abcd-a
+a|b|c|d|e	e	y	$&	e
+(a|b|c|d|e)f	ef	y	$&-$1	ef-e
+(a|b|c|d|e)f	ef	y	$-[0]	0
+(a|b|c|d|e)f	ef	y	$+[0]	2
+(a|b|c|d|e)f	ef	y	$-[1]	0
+(a|b|c|d|e)f	ef	y	$+[1]	1
+abcd*efg	abcdefg	y	$&	abcdefg
+ab*	xabyabbbz	y	$&	ab
+ab*	xayabbbz	y	$&	a
+(ab|cd)e	abcde	y	$&-$1	cde-cd
+[abhgefdc]ij	hij	y	$&	hij
+^(ab|cd)e	abcde	n	x$1y	xy
+(abc|)ef	abcdef	y	$&-$1	ef-
+(a|b)c*d	abcd	y	$&-$1	bcd-b
+(ab|ab*)bc	abc	y	$&-$1	abc-a
+a([bc]*)c*	abc	y	$&-$1	abc-bc
+a([bc]*)(c*d)	abcd	y	$&-$1-$2	abcd-bc-d
+a([bc]*)(c*d)	abcd	y	$-[0]	0
+a([bc]*)(c*d)	abcd	y	$+[0]	4
+a([bc]*)(c*d)	abcd	y	$-[1]	1
+a([bc]*)(c*d)	abcd	y	$+[1]	3
+a([bc]*)(c*d)	abcd	y	$-[2]	3
+a([bc]*)(c*d)	abcd	y	$+[2]	4
+a([bc]+)(c*d)	abcd	y	$&-$1-$2	abcd-bc-d
+a([bc]*)(c+d)	abcd	y	$&-$1-$2	abcd-b-cd
+a([bc]*)(c+d)	abcd	y	$-[0]	0
+a([bc]*)(c+d)	abcd	y	$+[0]	4
+a([bc]*)(c+d)	abcd	y	$-[1]	1
+a([bc]*)(c+d)	abcd	y	$+[1]	2
+a([bc]*)(c+d)	abcd	y	$-[2]	2
+a([bc]*)(c+d)	abcd	y	$+[2]	4
+a[bcd]*dcdcde	adcdcde	y	$&	adcdcde
+a[bcd]+dcdcde	adcdcde	n	-	-
+(ab|a)b*c	abc	y	$&-$1	abc-ab
+(ab|a)b*c	abc	y	$-[0]	0
+(ab|a)b*c	abc	y	$+[0]	3
+(ab|a)b*c	abc	y	$-[1]	0
+(ab|a)b*c	abc	y	$+[1]	2
+((a)(b)c)(d)	abcd	y	$1-$2-$3-$4	abc-a-b-d
+((a)(b)c)(d)	abcd	y	$-[0]	0
+((a)(b)c)(d)	abcd	y	$+[0]	4
+((a)(b)c)(d)	abcd	y	$-[1]	0
+((a)(b)c)(d)	abcd	y	$+[1]	3
+((a)(b)c)(d)	abcd	y	$-[2]	0
+((a)(b)c)(d)	abcd	y	$+[2]	1
+((a)(b)c)(d)	abcd	y	$-[3]	1
+((a)(b)c)(d)	abcd	y	$+[3]	2
+((a)(b)c)(d)	abcd	y	$-[4]	3
+((a)(b)c)(d)	abcd	y	$+[4]	4
+[a-zA-Z_][a-zA-Z0-9_]*	alpha	y	$&	alpha
+^a(bc+|b[eh])g|.h$	abh	y	$&-$1	bh-
+(bc+d$|ef*g.|h?i(j|k))	effgz	y	$&-$1-$2	effgz-effgz-
+(bc+d$|ef*g.|h?i(j|k))	ij	y	$&-$1-$2	ij-ij-j
+(bc+d$|ef*g.|h?i(j|k))	effg	n	-	-
+(bc+d$|ef*g.|h?i(j|k))	bcdd	n	-	-
+(bc+d$|ef*g.|h?i(j|k))	reffgz	y	$&-$1-$2	effgz-effgz-
+((((((((((a))))))))))	a	y	$10	a
+((((((((((a))))))))))	a	y	$-[0]	0
+((((((((((a))))))))))	a	y	$+[0]	1
+((((((((((a))))))))))	a	y	$-[10]	0
+((((((((((a))))))))))	a	y	$+[10]	1
+((((((((((a))))))))))\10	aa	y	$&	aa
+((((((((((a))))))))))${bang}	aa	n	-	-
+((((((((((a))))))))))${bang}	a!	y	$&	a!
+(((((((((a)))))))))	a	y	$&	a
+multiple words of text	uh-uh	n	-	-
+multiple words	multiple words, yeah	y	$&	multiple words
+(.*)c(.*)	abcde	y	$&-$1-$2	abcde-ab-de
+\((.*), (.*)\)	(a, b)	y	($2, $1)	(b, a)
+[k]	ab	n	-	-
+abcd	abcd	y	$&-\$&-\\$&	abcd-$&-\abcd
+a(bc)d	abcd	y	$1-\$1-\\$1	bc-$1-\bc
+a[-]?c	ac	y	$&	ac
+(abc)\1	abcabc	y	$1	abc
+([a-c]*)\1	abcabc	y	$1	abc
+\1	-	c	-	Reference to nonexistent group
+\2	-	c	-	Reference to nonexistent group
+(a)|\1	a	y	-	-
+(a)|\1	x	n	-	-
+(a)|\2	-	c	-	Reference to nonexistent group
+(([a-c])b*?\2)*	ababbbcbc	y	$&-$1-$2	ababb-bb-b
+(([a-c])b*?\2){3}	ababbbcbc	y	$&-$1-$2	ababbbcbc-cbc-c
+((\3|b)\2(a)x)+	aaxabxbaxbbx	n	-	-
+((\3|b)\2(a)x)+	aaaxabaxbaaxbbax	y	$&-$1-$2-$3	bbax-bbax-b-a
+((\3|b)\2(a)){2,}	bbaababbabaaaaabbaaaabba	y	$&-$1-$2-$3	bbaaaabba-bba-b-a
+(a)|(b)	b	y	$-[0]	0
+(a)|(b)	b	y	$+[0]	1
+(a)|(b)	b	y	x$-[1]	x
+(a)|(b)	b	y	x$+[1]	x
+(a)|(b)	b	y	$-[2]	0
+(a)|(b)	b	y	$+[2]	1
+'abc'i	ABC	y	$&	ABC
+'abc'i	XBC	n	-	-
+'abc'i	AXC	n	-	-
+'abc'i	ABX	n	-	-
+'abc'i	XABCY	y	$&	ABC
+'abc'i	ABABC	y	$&	ABC
+'ab*c'i	ABC	y	$&	ABC
+'ab*bc'i	ABC	y	$&	ABC
+'ab*bc'i	ABBC	y	$&	ABBC
+'ab*?bc'i	ABBBBC	y	$&	ABBBBC
+'ab{0,}?bc'i	ABBBBC	y	$&	ABBBBC
+'ab+?bc'i	ABBC	y	$&	ABBC
+'ab+bc'i	ABC	n	-	-
+'ab+bc'i	ABQ	n	-	-
+'ab{1,}bc'i	ABQ	n	-	-
+'ab+bc'i	ABBBBC	y	$&	ABBBBC
+'ab{1,}?bc'i	ABBBBC	y	$&	ABBBBC
+'ab{1,3}?bc'i	ABBBBC	y	$&	ABBBBC
+'ab{3,4}?bc'i	ABBBBC	y	$&	ABBBBC
+'ab{4,5}?bc'i	ABBBBC	n	-	-
+'ab??bc'i	ABBC	y	$&	ABBC
+'ab??bc'i	ABC	y	$&	ABC
+'ab{0,1}?bc'i	ABC	y	$&	ABC
+'ab??bc'i	ABBBBC	n	-	-
+'ab??c'i	ABC	y	$&	ABC
+'ab{0,1}?c'i	ABC	y	$&	ABC
+'^abc$'i	ABC	y	$&	ABC
+'^abc$'i	ABCC	n	-	-
+'^abc'i	ABCC	y	$&	ABC
+'^abc$'i	AABC	n	-	-
+'abc$'i	AABC	y	$&	ABC
+'^'i	ABC	y	$&
+'$'i	ABC	y	$&
+'a.c'i	ABC	y	$&	ABC
+'a.c'i	AXC	y	$&	AXC
+'a.*?c'i	AXYZC	y	$&	AXYZC
+'a.*c'i	AXYZD	n	-	-
+'a[bc]d'i	ABC	n	-	-
+'a[bc]d'i	ABD	y	$&	ABD
+'a[b-d]e'i	ABD	n	-	-
+'a[b-d]e'i	ACE	y	$&	ACE
+'a[b-d]'i	AAC	y	$&	AC
+'a[-b]'i	A-	y	$&	A-
+'a[b-]'i	A-	y	$&	A-
+'a[b-a]'i	-	c	-	Invalid [] range "b-a"
+'a[]b'i	-	ci	-	Unmatched [
+'a['i	-	c	-	Unmatched [
+'a]'i	A]	y	$&	A]
+'a[]]b'i	A]B	y	$&	A]B
+'a[^bc]d'i	AED	y	$&	AED
+'a[^bc]d'i	ABD	n	-	-
+'a[^-b]c'i	ADC	y	$&	ADC
+'a[^-b]c'i	A-C	n	-	-
+'a[^]b]c'i	A]C	n	-	-
+'a[^]b]c'i	ADC	y	$&	ADC
+'ab|cd'i	ABC	y	$&	AB
+'ab|cd'i	ABCD	y	$&	AB
+'()ef'i	DEF	y	$&-$1	EF-
+'*a'i	-	c	-	Quantifier follows nothing
+'(*)b'i	-	c	-	Quantifier follows nothing
+'$b'i	B	n	-	-
+'a\'i	-	c	-	Search pattern not terminated
+'a\(b'i	A(B	y	$&-$1	A(B-
+'a\(*b'i	AB	y	$&	AB
+'a\(*b'i	A((B	y	$&	A((B
+'a\\b'i	A\B	y	$&	A\B
+'abc)'i	-	c	-	Unmatched )
+'(abc'i	-	c	-	Unmatched (
+'((a))'i	ABC	y	$&-$1-$2	A-A-A
+'(a)b(c)'i	ABC	y	$&-$1-$2	ABC-A-C
+'a+b+c'i	AABBABC	y	$&	ABC
+'a{1,}b{1,}c'i	AABBABC	y	$&	ABC
+'a**'i	-	c	-	Nested quantifiers
+'a.+?c'i	ABCABC	y	$&	ABC
+'a.*?c'i	ABCABC	y	$&	ABC
+'a.{0,5}?c'i	ABCABC	y	$&	ABC
+'(a+|b)*'i	AB	y	$&-$1	AB-B
+'(a+|b){0,}'i	AB	y	$&-$1	AB-B
+'(a+|b)+'i	AB	y	$&-$1	AB-B
+'(a+|b){1,}'i	AB	y	$&-$1	AB-B
+'(a+|b)?'i	AB	y	$&-$1	A-A
+'(a+|b){0,1}'i	AB	y	$&-$1	A-A
+'(a+|b){0,1}?'i	AB	y	$&-$1	-
+')('i	-	c	-	Unmatched )
+'[^ab]*'i	CDE	y	$&	CDE
+'abc'i		n	-	-
+'a*'i		y	$&
+'([abc])*d'i	ABBBCD	y	$&-$1	ABBBCD-C
+'([abc])*bcd'i	ABCD	y	$&-$1	ABCD-A
+'a|b|c|d|e'i	E	y	$&	E
+'(a|b|c|d|e)f'i	EF	y	$&-$1	EF-E
+'abcd*efg'i	ABCDEFG	y	$&	ABCDEFG
+'ab*'i	XABYABBBZ	y	$&	AB
+'ab*'i	XAYABBBZ	y	$&	A
+'(ab|cd)e'i	ABCDE	y	$&-$1	CDE-CD
+'[abhgefdc]ij'i	HIJ	y	$&	HIJ
+'^(ab|cd)e'i	ABCDE	n	x$1y	XY
+'(abc|)ef'i	ABCDEF	y	$&-$1	EF-
+'(a|b)c*d'i	ABCD	y	$&-$1	BCD-B
+'(ab|ab*)bc'i	ABC	y	$&-$1	ABC-A
+'a([bc]*)c*'i	ABC	y	$&-$1	ABC-BC
+'a([bc]*)(c*d)'i	ABCD	y	$&-$1-$2	ABCD-BC-D
+'a([bc]+)(c*d)'i	ABCD	y	$&-$1-$2	ABCD-BC-D
+'a([bc]*)(c+d)'i	ABCD	y	$&-$1-$2	ABCD-B-CD
+'a[bcd]*dcdcde'i	ADCDCDE	y	$&	ADCDCDE
+'a[bcd]+dcdcde'i	ADCDCDE	n	-	-
+'(ab|a)b*c'i	ABC	y	$&-$1	ABC-AB
+'((a)(b)c)(d)'i	ABCD	y	$1-$2-$3-$4	ABC-A-B-D
+'[a-zA-Z_][a-zA-Z0-9_]*'i	ALPHA	y	$&	ALPHA
+'^a(bc+|b[eh])g|.h$'i	ABH	y	$&-$1	BH-
+'(bc+d$|ef*g.|h?i(j|k))'i	EFFGZ	y	$&-$1-$2	EFFGZ-EFFGZ-
+'(bc+d$|ef*g.|h?i(j|k))'i	IJ	y	$&-$1-$2	IJ-IJ-J
+'(bc+d$|ef*g.|h?i(j|k))'i	EFFG	n	-	-
+'(bc+d$|ef*g.|h?i(j|k))'i	BCDD	n	-	-
+'(bc+d$|ef*g.|h?i(j|k))'i	REFFGZ	y	$&-$1-$2	EFFGZ-EFFGZ-
+'((((((((((a))))))))))'i	A	y	$10	A
+'((((((((((a))))))))))\10'i	AA	y	$&	AA
+'((((((((((a))))))))))${bang}'i	AA	n	-	-
+'((((((((((a))))))))))${bang}'i	A!	y	$&	A!
+'(((((((((a)))))))))'i	A	y	$&	A
+'(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))'i	A	y	$1	A
+'(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))'i	C	y	$1	C
+'multiple words of text'i	UH-UH	n	-	-
+'multiple words'i	MULTIPLE WORDS, YEAH	y	$&	MULTIPLE WORDS
+'(.*)c(.*)'i	ABCDE	y	$&-$1-$2	ABCDE-AB-DE
+'\((.*), (.*)\)'i	(A, B)	y	($2, $1)	(B, A)
+'[k]'i	AB	n	-	-
+'abcd'i	ABCD	y	$&-\$&-\\$&	ABCD-$&-\ABCD
+'a(bc)d'i	ABCD	y	$1-\$1-\\$1	BC-$1-\BC
+'a[-]?c'i	AC	y	$&	AC
+'(abc)\1'i	ABCABC	y	$1	ABC
+'([a-c]*)\1'i	ABCABC	y	$1	ABC
+a(?!b).	abad	y	$&	ad
+a(?=d).	abad	y	$&	ad
+a(?=c|d).	abad	y	$&	ad
+a(?:b|c|d)(.)	ace	y	$1	e
+a(?:b|c|d)*(.)	ace	y	$1	e
+a(?:b|c|d)+?(.)	ace	y	$1	e
+a(?:b|c|d)+?(.)	acdbcdbe	y	$1	d
+a(?:b|c|d)+(.)	acdbcdbe	y	$1	e
+a(?:b|c|d){2}(.)	acdbcdbe	y	$1	b
+a(?:b|c|d){4,5}(.)	acdbcdbe	y	$1	b
+a(?:b|c|d){4,5}?(.)	acdbcdbe	y	$1	d
+((foo)|(bar))*	foobar	y	$1-$2-$3	bar-foo-bar
+:(?:	-	c	-	Sequence (? incomplete
+a(?:b|c|d){6,7}(.)	acdbcdbe	y	$1	e
+a(?:b|c|d){6,7}?(.)	acdbcdbe	y	$1	e
+a(?:b|c|d){5,6}(.)	acdbcdbe	y	$1	e
+a(?:b|c|d){5,6}?(.)	acdbcdbe	y	$1	b
+a(?:b|c|d){5,7}(.)	acdbcdbe	y	$1	e
+a(?:b|c|d){5,7}?(.)	acdbcdbe	y	$1	b
+a(?:b|(c|e){1,2}?|d)+?(.)	ace	y	$1$2	ce
+^(.+)?B	AB	y	$1	A
+^([^a-z])|(\^)$	.	y	$1	.
+^[<>]&	<&OUT	y	$&	<&
+^(a\1?){4}$	aaaaaaaaaa	y	$1	aaaa
+^(a\1?){4}$	aaaaaaaaa	n	-	-
+^(a\1?){4}$	aaaaaaaaaaa	n	-	-
+^(a(?(1)\1)){4}$	aaaaaaaaaa	y	$1	aaaa
+^(a(?(1)\1)){4}$	aaaaaaaaa	n	-	-
+^(a(?(1)\1)){4}$	aaaaaaaaaaa	n	-	-
+((a{4})+)	aaaaaaaaa	y	$1	aaaaaaaa
+(((aa){2})+)	aaaaaaaaaa	y	$1	aaaaaaaa
+(((a{2}){2})+)	aaaaaaaaaa	y	$1	aaaaaaaa
+(?:(f)(o)(o)|(b)(a)(r))*	foobar	y	$1:$2:$3:$4:$5:$6	f:o:o:b:a:r
+(?<=a)b	ab	y	$&	b
+(?<=a)b	cb	n	-	-
+(?<=a)b	b	n	-	-
+(?<!c)b	ab	y	$&	b
+(?<!c)b	cb	n	-	-
+(?<!c)b	b	y	-	-
+(?<!c)b	b	y	$&	b
+(?<%)b	-	c	-	Sequence (?<%...) not recognized
+(?:..)*a	aba	y	$&	aba
+(?:..)*?a	aba	y	$&	a
+^(?:b|a(?=(.)))*\1	abc	y	$&	ab
+^(){3,5}	abc	y	a$1	a
+^(a+)*ax	aax	y	$1	a
+^((a|b)+)*ax	aax	y	$1	a
+^((a|bc)+)*ax	aax	y	$1	a
+(a|x)*ab	cab	y	y$1	y
+(a)*ab	cab	y	y$1	y
+(?:(?i)a)b	ab	y	$&	ab
+((?i)a)b	ab	y	$&:$1	ab:a
+(?:(?i)a)b	Ab	y	$&	Ab
+((?i)a)b	Ab	y	$&:$1	Ab:A
+(?:(?i)a)b	aB	n	-	-
+((?i)a)b	aB	n	-	-
+(?i:a)b	ab	y	$&	ab
+((?i:a))b	ab	y	$&:$1	ab:a
+(?i:a)b	Ab	y	$&	Ab
+((?i:a))b	Ab	y	$&:$1	Ab:A
+(?i:a)b	aB	n	-	-
+((?i:a))b	aB	n	-	-
+'(?:(?-i)a)b'i	ab	y	$&	ab
+'((?-i)a)b'i	ab	y	$&:$1	ab:a
+'(?:(?-i)a)b'i	aB	y	$&	aB
+'((?-i)a)b'i	aB	y	$&:$1	aB:a
+'(?:(?-i)a)b'i	Ab	n	-	-
+'((?-i)a)b'i	Ab	n	-	-
+'(?:(?-i)a)b'i	aB	y	$&	aB
+'((?-i)a)b'i	aB	y	$1	a
+'(?:(?-i)a)b'i	AB	n	-	-
+'((?-i)a)b'i	AB	n	-	-
+'(?-i:a)b'i	ab	y	$&	ab
+'((?-i:a))b'i	ab	y	$&:$1	ab:a
+'(?-i:a)b'i	aB	y	$&	aB
+'((?-i:a))b'i	aB	y	$&:$1	aB:a
+'(?-i:a)b'i	Ab	n	-	-
+'((?-i:a))b'i	Ab	n	-	-
+'(?-i:a)b'i	aB	y	$&	aB
+'((?-i:a))b'i	aB	y	$1	a
+'(?-i:a)b'i	AB	n	-	-
+'((?-i:a))b'i	AB	n	-	-
+'((?-i:a.))b'i	a\nB	n	-	-
+'((?s-i:a.))b'i	a\nB	y	$1	a\n
+'((?s-i:a.))b'i	B\nB	n	-	-
+(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b)))	cabbbb	y	$&	cabbbb
+(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))	caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb	y	$&	caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+'(ab)\d\1'i	Ab4ab	y	$1	Ab
+'(ab)\d\1'i	ab4Ab	y	$1	ab
+foo\w*\d{4}baz	foobar1234baz	y	$&	foobar1234baz
+a(?{})b	cabd	y	$&	ab
+a(?{)b	-	c	-	Sequence (?{...}) not terminated or not {}-balanced
+a(?{{})b	-	c	-	Sequence (?{...}) not terminated or not {}-balanced
+a(?{}})b	-	c	-
+a(?{"{"})b	-	c	-	Sequence (?{...}) not terminated or not {}-balanced
+a(?{"\{"})b	cabd	y	$&	ab
+a(?{"{"}})b	-	c	-	Unmatched right curly bracket
+a(?{$bl="\{"}).b	caxbd	y	$bl	{
+x(~~)*(?:(?:F)?)?	x~~	y	-	-
+^a(?#xxx){3}c	aaac	y	$&	aaac
+'^a (?#xxx) (?#yyy) {3}c'x	aaac	y	$&	aaac
+(?<![cd])b	dbcb	n	-	-
+(?<![cd])[ab]	dbaacb	y	$&	a
+(?<!(c|d))b	dbcb	n	-	-
+(?<!(c|d))[ab]	dbaacb	y	$&	a
+(?<!cd)[ab]	cdaccb	y	$&	b
+^(?:a?b?)*$	a--	n	-	-
+((?s)^a(.))((?m)^b$)	a\nb\nc\n	y	$1;$2;$3	a\n;\n;b
+((?m)^b$)	a\nb\nc\n	y	$1	b
+(?m)^b	a\nb\n	y	$&	b
+(?m)^(b)	a\nb\n	y	$1	b
+((?m)^b)	a\nb\n	y	$1	b
+\n((?m)^b)	a\nb\n	y	$1	b
+((?s).)c(?!.)	a\nb\nc\n	y	$1	\n
+((?s).)c(?!.)	a\nb\nc\n	y	$1:$&	\n:\nc
+((?s)b.)c(?!.)	a\nb\nc\n	y	$1	b\n
+((?s)b.)c(?!.)	a\nb\nc\n	y	$1:$&	b\n:b\nc
+^b	a\nb\nc\n	n	-	-
+()^b	a\nb\nc\n	n	-	-
+((?m)^b)	a\nb\nc\n	y	$1	b
+(?(1)a|b)	a	n	-	-
+(?(1)b|a)	a	y	$&	a
+(x)?(?(1)a|b)	a	n	-	-
+(x)?(?(1)b|a)	a	y	$&	a
+()?(?(1)b|a)	a	y	$&	a
+()(?(1)b|a)	a	n	-	-
+()?(?(1)a|b)	a	y	$&	a
+^(\()?blah(?(1)(\)))$	(blah)	y	$2	)
+^(\()?blah(?(1)(\)))$	blah	y	($2)	()
+^(\()?blah(?(1)(\)))$	blah)	n	-	-
+^(\()?blah(?(1)(\)))$	(blah	n	-	-
+^(\(+)?blah(?(1)(\)))$	(blah)	y	$2	)
+^(\(+)?blah(?(1)(\)))$	blah	y	($2)	()
+^(\(+)?blah(?(1)(\)))$	blah)	n	-	-
+^(\(+)?blah(?(1)(\)))$	(blah	n	-	-
+(?(1?)a|b)	a	c	-	Switch condition not recognized
+(?(1)a|b|c)	a	c	-	Switch (?(condition)... contains too many branches
+(?(?{0})a|b)	a	n	-	-
+(?(?{0})b|a)	a	y	$&	a
+(?(?{1})b|a)	a	n	-	-
+(?(?{1})a|b)	a	y	$&	a
+(?(?!a)a|b)	a	n	-	-
+(?(?!a)b|a)	a	y	$&	a
+(?(?=a)b|a)	a	n	-	-
+(?(?=a)a|b)	a	y	$&	a
+(?=(a+?))(\1ab)	aaab	y	$2	aab
+^(?=(a+?))\1ab	aaab	n	-	-
+(\w+:)+	one:	y	$1	one:
+$(?<=^(a))	a	y	$1	a
+(?=(a+?))(\1ab)	aaab	y	$2	aab
+^(?=(a+?))\1ab	aaab	n	-	-
+([\w:]+::)?(\w+)$	abcd:	n	-	-
+([\w:]+::)?(\w+)$	abcd	y	$1-$2	-abcd
+([\w:]+::)?(\w+)$	xy:z:::abcd	y	$1-$2	xy:z:::-abcd
+^[^bcd]*(c+)	aexycd	y	$1	c
+(a*)b+	caab	y	$1	aa
+([\w:]+::)?(\w+)$	abcd:	n	-	-
+([\w:]+::)?(\w+)$	abcd	y	$1-$2	-abcd
+([\w:]+::)?(\w+)$	xy:z:::abcd	y	$1-$2	xy:z:::-abcd
+^[^bcd]*(c+)	aexycd	y	$1	c
+(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a})	yaaxxaaaacd	y	$b	3
+(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a})	yaaxxaaaacd	y	$b	4
+(>a+)ab	aaab	n	-	-
+(?>a+)b	aaab	y	-	-
+([[:]+)	a:[b]:	yi	$1	:[	 Java and ICU dont escape [[xyz
+([[=]+)	a=[b]=	yi	$1	=[	 Java and ICU dont escape [[xyz
+([[.]+)	a.[b].	yi	$1	.[	 Java and ICU dont escape [[xyz
+[a[:xyz:	-	c	-	Unmatched [
+[a[:xyz:]	-	c	-	POSIX class [:xyz:] unknown
+[a[:]b[:c]	abc	yi	$&	abc	  Java and ICU embedded [ is nested set
+([a[:xyz:]b]+)	pbaq	c	-	POSIX class [:xyz:] unknown
+[a[:]b[:c]	abc	iy	$&	abc	  Java and ICU embedded [ is nested set
+([[:alpha:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	ABcd
+([[:alnum:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	ABcd01Xy
+([[:ascii:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	ABcd01Xy__--  ${nulnul}
+([[:cntrl:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	${nulnul}
+([[:digit:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	01
+([[:graph:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	ABcd01Xy__--
+([[:lower:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	cd
+([[:print:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	ABcd01Xy__--  
+([[:punct:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	__--
+([[:space:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	  
+([[:word:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	yi	$1	ABcd01Xy__
+([[:upper:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	AB
+([[:xdigit:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	ABcd01
+([[:^alpha:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	01
+([[:^alnum:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	__--  ${nulnul}${ffff}
+([[:^ascii:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	${ffff}
+([[:^cntrl:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	ABcd01Xy__--  
+([[:^digit:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	ABcd
+([[:^lower:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	AB
+([[:^print:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	${nulnul}${ffff}
+([[:^punct:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	ABcd01Xy
+([[:^space:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	ABcd01Xy__--
+([[:^word:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	yi	$1	--  ${nulnul}${ffff}
+([[:^upper:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	cd01
+([[:^xdigit:]]+)	ABcd01Xy__--  ${nulnul}${ffff}	y	$1	Xy__--  ${nulnul}${ffff}
+[[:foo:]]	-	c	-	POSIX class [:foo:] unknown
+[[:^foo:]]	-	c	-	POSIX class [:^foo:] unknown
+((?>a+)b)	aaab	y	$1	aaab
+(?>(a+))b	aaab	y	$1	aaa
+((?>[^()]+)|\([^()]*\))+	((abc(ade)ufh()()x	y	$&	abc(ade)ufh()()x
+(?<=x+)y	-	c	-	Variable length lookbehind not implemented
+a{37,17}	-	c	-	Can't do {n,m} with n > m
+\Z	a\nb\n	y	$-[0]	3
+\z	a\nb\n	y	$-[0]	4
+$	a\nb\n	y	$-[0]	3
+\Z	b\na\n	y	$-[0]	3
+\z	b\na\n	y	$-[0]	4
+$	b\na\n	y	$-[0]	3
+\Z	b\na	y	$-[0]	3
+\z	b\na	y	$-[0]	3
+$	b\na	y	$-[0]	3
+'\Z'm	a\nb\n	y	$-[0]	3
+'\z'm	a\nb\n	y	$-[0]	4
+'$'m	a\nb\n	y	$-[0]	1
+'\Z'm	b\na\n	y	$-[0]	3
+'\z'm	b\na\n	y	$-[0]	4
+'$'m	b\na\n	y	$-[0]	1
+'\Z'm	b\na	y	$-[0]	3
+'\z'm	b\na	y	$-[0]	3
+'$'m	b\na	y	$-[0]	1
+a\Z	a\nb\n	n	-	-
+a\z	a\nb\n	n	-	-
+a$	a\nb\n	n	-	-
+a\Z	b\na\n	y	$-[0]	2
+a\z	b\na\n	n	-	-
+a$	b\na\n	y	$-[0]	2
+a\Z	b\na	y	$-[0]	2
+a\z	b\na	y	$-[0]	2
+a$	b\na	y	$-[0]	2
+'a\Z'm	a\nb\n	n	-	-
+'a\z'm	a\nb\n	n	-	-
+'a$'m	a\nb\n	y	$-[0]	0
+'a\Z'm	b\na\n	y	$-[0]	2
+'a\z'm	b\na\n	n	-	-
+'a$'m	b\na\n	y	$-[0]	2
+'a\Z'm	b\na	y	$-[0]	2
+'a\z'm	b\na	y	$-[0]	2
+'a$'m	b\na	y	$-[0]	2
+aa\Z	aa\nb\n	n	-	-
+aa\z	aa\nb\n	n	-	-
+aa$	aa\nb\n	n	-	-
+aa\Z	b\naa\n	y	$-[0]	2
+aa\z	b\naa\n	n	-	-
+aa$	b\naa\n	y	$-[0]	2
+aa\Z	b\naa	y	$-[0]	2
+aa\z	b\naa	y	$-[0]	2
+aa$	b\naa	y	$-[0]	2
+'aa\Z'm	aa\nb\n	n	-	-
+'aa\z'm	aa\nb\n	n	-	-
+'aa$'m	aa\nb\n	y	$-[0]	0
+'aa\Z'm	b\naa\n	y	$-[0]	2
+'aa\z'm	b\naa\n	n	-	-
+'aa$'m	b\naa\n	y	$-[0]	2
+'aa\Z'm	b\naa	y	$-[0]	2
+'aa\z'm	b\naa	y	$-[0]	2
+'aa$'m	b\naa	y	$-[0]	2
+aa\Z	ac\nb\n	n	-	-
+aa\z	ac\nb\n	n	-	-
+aa$	ac\nb\n	n	-	-
+aa\Z	b\nac\n	n	-	-
+aa\z	b\nac\n	n	-	-
+aa$	b\nac\n	n	-	-
+aa\Z	b\nac	n	-	-
+aa\z	b\nac	n	-	-
+aa$	b\nac	n	-	-
+'aa\Z'm	ac\nb\n	n	-	-
+'aa\z'm	ac\nb\n	n	-	-
+'aa$'m	ac\nb\n	n	-	-
+'aa\Z'm	b\nac\n	n	-	-
+'aa\z'm	b\nac\n	n	-	-
+'aa$'m	b\nac\n	n	-	-
+'aa\Z'm	b\nac	n	-	-
+'aa\z'm	b\nac	n	-	-
+'aa$'m	b\nac	n	-	-
+aa\Z	ca\nb\n	n	-	-
+aa\z	ca\nb\n	n	-	-
+aa$	ca\nb\n	n	-	-
+aa\Z	b\nca\n	n	-	-
+aa\z	b\nca\n	n	-	-
+aa$	b\nca\n	n	-	-
+aa\Z	b\nca	n	-	-
+aa\z	b\nca	n	-	-
+aa$	b\nca	n	-	-
+'aa\Z'm	ca\nb\n	n	-	-
+'aa\z'm	ca\nb\n	n	-	-
+'aa$'m	ca\nb\n	n	-	-
+'aa\Z'm	b\nca\n	n	-	-
+'aa\z'm	b\nca\n	n	-	-
+'aa$'m	b\nca\n	n	-	-
+'aa\Z'm	b\nca	n	-	-
+'aa\z'm	b\nca	n	-	-
+'aa$'m	b\nca	n	-	-
+ab\Z	ab\nb\n	n	-	-
+ab\z	ab\nb\n	n	-	-
+ab$	ab\nb\n	n	-	-
+ab\Z	b\nab\n	y	$-[0]	2
+ab\z	b\nab\n	n	-	-
+ab$	b\nab\n	y	$-[0]	2
+ab\Z	b\nab	y	$-[0]	2
+ab\z	b\nab	y	$-[0]	2
+ab$	b\nab	y	$-[0]	2
+'ab\Z'm	ab\nb\n	n	-	-
+'ab\z'm	ab\nb\n	n	-	-
+'ab$'m	ab\nb\n	y	$-[0]	0
+'ab\Z'm	b\nab\n	y	$-[0]	2
+'ab\z'm	b\nab\n	n	-	-
+'ab$'m	b\nab\n	y	$-[0]	2
+'ab\Z'm	b\nab	y	$-[0]	2
+'ab\z'm	b\nab	y	$-[0]	2
+'ab$'m	b\nab	y	$-[0]	2
+ab\Z	ac\nb\n	n	-	-
+ab\z	ac\nb\n	n	-	-
+ab$	ac\nb\n	n	-	-
+ab\Z	b\nac\n	n	-	-
+ab\z	b\nac\n	n	-	-
+ab$	b\nac\n	n	-	-
+ab\Z	b\nac	n	-	-
+ab\z	b\nac	n	-	-
+ab$	b\nac	n	-	-
+'ab\Z'm	ac\nb\n	n	-	-
+'ab\z'm	ac\nb\n	n	-	-
+'ab$'m	ac\nb\n	n	-	-
+'ab\Z'm	b\nac\n	n	-	-
+'ab\z'm	b\nac\n	n	-	-
+'ab$'m	b\nac\n	n	-	-
+'ab\Z'm	b\nac	n	-	-
+'ab\z'm	b\nac	n	-	-
+'ab$'m	b\nac	n	-	-
+ab\Z	ca\nb\n	n	-	-
+ab\z	ca\nb\n	n	-	-
+ab$	ca\nb\n	n	-	-
+ab\Z	b\nca\n	n	-	-
+ab\z	b\nca\n	n	-	-
+ab$	b\nca\n	n	-	-
+ab\Z	b\nca	n	-	-
+ab\z	b\nca	n	-	-
+ab$	b\nca	n	-	-
+'ab\Z'm	ca\nb\n	n	-	-
+'ab\z'm	ca\nb\n	n	-	-
+'ab$'m	ca\nb\n	n	-	-
+'ab\Z'm	b\nca\n	n	-	-
+'ab\z'm	b\nca\n	n	-	-
+'ab$'m	b\nca\n	n	-	-
+'ab\Z'm	b\nca	n	-	-
+'ab\z'm	b\nca	n	-	-
+'ab$'m	b\nca	n	-	-
+abb\Z	abb\nb\n	n	-	-
+abb\z	abb\nb\n	n	-	-
+abb$	abb\nb\n	n	-	-
+abb\Z	b\nabb\n	y	$-[0]	2
+abb\z	b\nabb\n	n	-	-
+abb$	b\nabb\n	y	$-[0]	2
+abb\Z	b\nabb	y	$-[0]	2
+abb\z	b\nabb	y	$-[0]	2
+abb$	b\nabb	y	$-[0]	2
+'abb\Z'm	abb\nb\n	n	-	-
+'abb\z'm	abb\nb\n	n	-	-
+'abb$'m	abb\nb\n	y	$-[0]	0
+'abb\Z'm	b\nabb\n	y	$-[0]	2
+'abb\z'm	b\nabb\n	n	-	-
+'abb$'m	b\nabb\n	y	$-[0]	2
+'abb\Z'm	b\nabb	y	$-[0]	2
+'abb\z'm	b\nabb	y	$-[0]	2
+'abb$'m	b\nabb	y	$-[0]	2
+abb\Z	ac\nb\n	n	-	-
+abb\z	ac\nb\n	n	-	-
+abb$	ac\nb\n	n	-	-
+abb\Z	b\nac\n	n	-	-
+abb\z	b\nac\n	n	-	-
+abb$	b\nac\n	n	-	-
+abb\Z	b\nac	n	-	-
+abb\z	b\nac	n	-	-
+abb$	b\nac	n	-	-
+'abb\Z'm	ac\nb\n	n	-	-
+'abb\z'm	ac\nb\n	n	-	-
+'abb$'m	ac\nb\n	n	-	-
+'abb\Z'm	b\nac\n	n	-	-
+'abb\z'm	b\nac\n	n	-	-
+'abb$'m	b\nac\n	n	-	-
+'abb\Z'm	b\nac	n	-	-
+'abb\z'm	b\nac	n	-	-
+'abb$'m	b\nac	n	-	-
+abb\Z	ca\nb\n	n	-	-
+abb\z	ca\nb\n	n	-	-
+abb$	ca\nb\n	n	-	-
+abb\Z	b\nca\n	n	-	-
+abb\z	b\nca\n	n	-	-
+abb$	b\nca\n	n	-	-
+abb\Z	b\nca	n	-	-
+abb\z	b\nca	n	-	-
+abb$	b\nca	n	-	-
+'abb\Z'm	ca\nb\n	n	-	-
+'abb\z'm	ca\nb\n	n	-	-
+'abb$'m	ca\nb\n	n	-	-
+'abb\Z'm	b\nca\n	n	-	-
+'abb\z'm	b\nca\n	n	-	-
+'abb$'m	b\nca\n	n	-	-
+'abb\Z'm	b\nca	n	-	-
+'abb\z'm	b\nca	n	-	-
+'abb$'m	b\nca	n	-	-
+(^|x)(c)	ca	y	$2	c
+a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz	x	n	-	-
+a(?{$a=2;$b=3;($b)=$a})b	yabz	y	$b	2
+round\(((?>[^()]+))\)	_I(round(xs * sz),1)	y	$1	xs * sz
+'((?x:.) )'	x 	y	$1-	x -
+'((?-x:.) )'x	x 	y	$1-	x-
+foo.bart	foo.bart	y	-	-
+'^d[x][x][x]'m	abcd\ndxxx	y	-	-
+.X(.+)+X	bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	yi	-	-	# TODO:  ICU doesn't optimize on trailing literals in pattern.
+.X(.+)+XX	bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	yi	-	-
+.XX(.+)+X	bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	yi	-	-
+.X(.+)+X	bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
+.X(.+)+XX	bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
+.XX(.+)+X	bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
+.X(.+)+[X]	bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	yi	-	-
+.X(.+)+[X][X]	bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	yi	-	-
+.XX(.+)+[X]	bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	yi	-	-
+.X(.+)+[X]	bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
+.X(.+)+[X][X]	bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
+.XX(.+)+[X]	bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
+.[X](.+)+[X]	bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	yi	-	-
+.[X](.+)+[X][X]	bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	yi	-	-
+.[X][X](.+)+[X]	bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	yi	-	-
+.[X](.+)+[X]	bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
+.[X](.+)+[X][X]	bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
+.[X][X](.+)+[X]	bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	ni	-	-
+tt+$	xxxtt	y	-	-
+([a-\d]+)	za-9z	yi	$1	a-9
+([\d-z]+)	a0-za	y	$1	0-z
+([\d-\s]+)	a0- z	y	$1	0- 
+([a-[:digit:]]+)	za-9z	y	$1	a-9
+([[:digit:]-z]+)	=0-z=	y	$1	0-z
+([[:digit:]-[:alpha:]]+)	=0-z=	iy	$1	0-z	 Set difference in ICU
+\GX.*X	aaaXbX	n	-	-
+(\d+\.\d+)	3.1415926	y	$1	3.1415926
+(\ba.{0,10}br)	have a web browser	y	$1	a web br
+'\.c(pp|xx|c)?$'i	Changes	n	-	-
+'\.c(pp|xx|c)?$'i	IO.c	y	-	-
+'(\.c(pp|xx|c)?$)'i	IO.c	y	$1	.c
+^([a-z]:)	C:/	n	-	-
+'^\S\s+aa$'m	\nx aa	y	-	-
+(^|a)b	ab	y	-	-
+^([ab]*?)(b)?(c)$	abac	y	-$2-	--
+(\w)?(abc)\1b	abcab	n	-	-
+^(?:.,){2}c	a,b,c	y	-	-
+^(.,){2}c	a,b,c	y	$1	b,
+^(?:[^,]*,){2}c	a,b,c	y	-	-
+^([^,]*,){2}c	a,b,c	y	$1	b,
+^([^,]*,){3}d	aaa,b,c,d	y	$1	c,
+^([^,]*,){3,}d	aaa,b,c,d	y	$1	c,
+^([^,]*,){0,3}d	aaa,b,c,d	y	$1	c,
+^([^,]{1,3},){3}d	aaa,b,c,d	y	$1	c,
+^([^,]{1,3},){3,}d	aaa,b,c,d	y	$1	c,
+^([^,]{1,3},){0,3}d	aaa,b,c,d	y	$1	c,
+^([^,]{1,},){3}d	aaa,b,c,d	y	$1	c,
+^([^,]{1,},){3,}d	aaa,b,c,d	y	$1	c,
+^([^,]{1,},){0,3}d	aaa,b,c,d	y	$1	c,
+^([^,]{0,3},){3}d	aaa,b,c,d	y	$1	c,
+^([^,]{0,3},){3,}d	aaa,b,c,d	y	$1	c,
+^([^,]{0,3},){0,3}d	aaa,b,c,d	y	$1	c,
+(?i)		y	-	-
+'(?!\A)x'm	a\nxb\n	y	-	-
+^(a(b)?)+$	aba	yi	-$1-$2-	-a--	Java disagrees.  Not clear who is right.
+'^.{9}abc.*\n'm	123\nabcabcabcabc\n	y	-	-
+^(a)?a$	a	y	-$1-	--
+^(a)?(?(1)a|b)+$	a	n	-	-
+^(a\1?)(a\1?)(a\2?)(a\3?)$	aaaaaa	y	$1,$2,$3,$4	a,aa,a,aa
+^(a\1?){4}$	aaaaaa	y	$1	aa
+^(0+)?(?:x(1))?	x1	y	-	-
+^([0-9a-fA-F]+)(?:x([0-9a-fA-F]+)?)(?:x([0-9a-fA-F]+))?	012cxx0190	y	-	-
+^(b+?|a){1,2}c	bbbac	y	$1	a
+^(b+?|a){1,2}c	bbbbac	y	$1	a
+\((\w\. \w+)\)	cd. (A. Tw)	y	-$1-	-A. Tw-
+((?:aaaa|bbbb)cccc)?	aaaacccc	y	-	-
+((?:aaaa|bbbb)cccc)?	bbbbcccc	y	-	-
+(a)?(a)+	a	y	$1:$2	:a	-
+(ab)?(ab)+	ab	y	$1:$2	:ab	-
+(abc)?(abc)+	abc	y	$1:$2	:abc	-
+'b\s^'m	a\nb\n	n	-	-
+\ba	a	y	-	-
+^(a(??{"(?!)"})|(a)(?{1}))b	ab	yi	$2	a	# [ID 20010811.006]
+ab(?i)cd	AbCd	n	-	-	# [ID 20010809.023]
+ab(?i)cd	abCd	y	-	-
+(A|B)*(?(1)(CD)|(CD))	CD	y	$2-$3	-CD
+(A|B)*(?(1)(CD)|(CD))	ABCD	y	$2-$3	CD-
+(A|B)*?(?(1)(CD)|(CD))	CD	y	$2-$3	-CD	# [ID 20010803.016]
+(A|B)*?(?(1)(CD)|(CD))	ABCD	y	$2-$3	CD-
+'^(o)(?!.*\1)'i	Oo	n	-	-
+(.*)\d+\1	abc12bc	y	$1	bc
+(?m:(foo\s*$))	foo\n bar	y	$1	foo
+(.*)c	abcd	y	$1	ab
+(.*)(?=c)	abcd	y	$1	ab
+(.*)(?=c)c	abcd	yB	$1	ab
+(.*)(?=b|c)	abcd	y	$1	ab
+(.*)(?=b|c)c	abcd	y	$1	ab
+(.*)(?=c|b)	abcd	y	$1	ab
+(.*)(?=c|b)c	abcd	y	$1	ab
+(.*)(?=[bc])	abcd	y	$1	ab
+(.*)(?=[bc])c	abcd	yB	$1	ab
+(.*)(?<=b)	abcd	y	$1	ab
+(.*)(?<=b)c	abcd	y	$1	ab
+(.*)(?<=b|c)	abcd	y	$1	abc
+(.*)(?<=b|c)c	abcd	y	$1	ab
+(.*)(?<=c|b)	abcd	y	$1	abc
+(.*)(?<=c|b)c	abcd	y	$1	ab
+(.*)(?<=[bc])	abcd	y	$1	abc
+(.*)(?<=[bc])c	abcd	y	$1	ab
+(.*?)c	abcd	y	$1	ab
+(.*?)(?=c)	abcd	y	$1	ab
+(.*?)(?=c)c	abcd	yB	$1	ab
+(.*?)(?=b|c)	abcd	y	$1	a
+(.*?)(?=b|c)c	abcd	y	$1	ab
+(.*?)(?=c|b)	abcd	y	$1	a
+(.*?)(?=c|b)c	abcd	y	$1	ab
+(.*?)(?=[bc])	abcd	y	$1	a
+(.*?)(?=[bc])c	abcd	yB	$1	ab
+(.*?)(?<=b)	abcd	y	$1	ab
+(.*?)(?<=b)c	abcd	y	$1	ab
+(.*?)(?<=b|c)	abcd	y	$1	ab
+(.*?)(?<=b|c)c	abcd	y	$1	ab
+(.*?)(?<=c|b)	abcd	y	$1	ab
+(.*?)(?<=c|b)c	abcd	y	$1	ab
+(.*?)(?<=[bc])	abcd	y	$1	ab
+(.*?)(?<=[bc])c	abcd	y	$1	ab
+2(]*)?$\1	2	y	$&	2
+(??{})	x	yi	-	-
diff --git a/go/mysql/icuregex/testdata/regextst.txt b/go/mysql/icuregex/testdata/regextst.txt
new file mode 100644
index 00000000000..8d5d2c34a8e
--- /dev/null
+++ b/go/mysql/icuregex/testdata/regextst.txt
@@ -0,0 +1,2793 @@
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Copyright (c) 2001-2015 International Business Machines
+# Corporation and others. All Rights Reserved.
+#
+#  file:
+#
+#   ICU regular expression test cases.
+#
+#   format:   one test case per line,
+#               <test case>    =  <pattern>   <flags>  <match string>  [# comment]
+#               <pattern>      =  "<regular expression pattern>"
+#               <match string> =  "<tagged string>"
+#                                 the quotes on the pattern and match string can be " or ' or /
+#               <tagged string> = text, with the start and end of each
+#                                 capture group tagged with <n>...</n>.  The overall match,
+#                                 if any, is group 0, as in <0>matched text</0>
+#                                  A region can be specified with <r>...</r> tags.
+#                                 Standard ICU unescape will be applied, allowing \u, \U, etc. to appear.
+#
+#               <flags>         = any combination of
+#                                   i      case insensitive match
+#                                   x      free spacing and comments
+#                                   s      dot-matches-all mode
+#                                   m      multi-line mode.
+#                                            ($ and ^ match at embedded new-lines)
+#                                   D      Unix Lines mode (only recognize 0x0a as new-line)
+#                                   Q      UREGEX_LITERAL flag.  Entire pattern is literal string.
+#                                   v      If icu configured without break iteration, this
+#                                          regex test pattern should not compile.
+#                                   e      set the UREGEX_ERROR_ON_UNKNOWN_ESCAPES flag
+#                                   d      dump the compiled pattern
+#                                   t      trace operation of match engine.
+#                                   2-9    a digit between 2 and 9, specifies the number of
+#                                          times to execute find().  The expected results are
+#                                          for the last find() in the sequence.
+#                                   G      Only check match / no match.  Do not check capture groups.
+#                                   E      Pattern compilation error expected
+#                                   L      Use LookingAt() rather than find()
+#                                   M      Use matches() rather than find().
+#
+#                                   a      Use non-Anchoring Bounds.
+#                                   b      Use Transparent Bounds.
+#                                          The a and b options only make a difference if
+#                                          a <r>region</r> has been specified in the string.
+#                                   z|Z    hitEnd was expected(z) or not expected (Z).
+#                                          With neither, hitEnd is not checked.
+#                                   y|Y    Require End expected(y) or not expected (Y).
+#
+#                                 White space must be present between the flags and the match string.
+#
+
+# Look-ahead expressions
+#
+"(?!0{5})(\d{5})"              "<0><1>00001</1></0>zzzz"
+"(?!0{5})(\d{5})z"             "<0><1>00001</1>z</0>zzz"
+"(?!0{5})(\d{5})(?!y)"         "<0><1>00001</1></0>zzzz"
+"abc(?=def)"                   "<0>abc</0>def"
+"(.*)(?=c)"                    "<0><1>ab</1></0>cdef"
+
+"(?:.*)(?=c)"                  "<r>ab</r>cdef"
+"(?:.*)(?=c)"             b    "<r><0>ab</0></r>cdef"      # transparent bounds
+"(?:.*)(?=c)"             bM   "<r><0>ab</0></r>cdef"      # transparent bounds
+
+"(?:.*)(?=(c))"           b    "<0>ab</0><1>c</1>def"      # Capture in look-ahead
+"(?=(.)\1\1)\1"                "abcc<0><1>d</1></0>ddefg"  # Backrefs to look-ahead capture
+
+".(?!\p{L})"                   "abc<0>d</0> "              # Negated look-ahead
+".(?!(\p{L}))"                 "abc<0>d</0> "              # Negated look-ahead, no capture
+                                                           #   visible outside of look-ahead
+"and(?=roid)"            L     "<0>and</0>roid"
+"and(?=roid)"            M     "<r>and</r>roid"
+"and(?=roid)"            bM    "<r><0>and</0></r>roid"
+
+"and(?!roid)"            L     "<0>and</0>roix"
+"and(?!roid)"            L     "android"
+
+"and(?!roid)"            M     "<r><0>and</0></r>roid"     # Opaque bounds
+"and(?!roid)"            bM    "<r>and</r>roid"
+"and(?!roid)"            bM    "<r><0>and</0></r>roix"
+
+#
+# Negated Lookahead, various regions and region transparency
+#
+"abc(?!def)"                   "<0>abc</0>xyz"
+"abc(?!def)"                   "abcdef"
+"abc(?!def)"                   "<r><0>abc</0></r>def"
+"abc(?!def)"              b    "<r>abc</r>def"
+"abc(?!def)"              b    "<r><0>abc</0></r>xyz"
+
+#
+# Nested Lookahead / Behind
+#
+"one(?=(?:(?!<out>).)*</out>)"  "<out><0>one</0> stuff</out>"
+"one(?=(?:(?!<out>).)*</out>)"  "<out>one  <out></out>"
+
+# More nesting lookaround: pattern matches "qq" when not preceded by 'a' and followed by 'z'
+"(?<!a(?!...z))qq"               "<0>qq</0>c"
+"(?<!a(?!...z))qq"               "f<0>qq</0>c"
+"(?<!a(?!...z))qq"               "aqqz"
+
+# More nested lookaround: match any two chars preceded and followed by an upper case letter.
+# With gratuitous nesting of look-arounds and capture from the look-arounds.
+
+"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).."     "<1>A</1><0>jk</0><2>B</2>"
+"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).."     "ajkB"
+"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).."     "Ajkb"
+
+# Nested lookaround cases from bug ICU-20564
+"(?<=(?<=((?=)){0}+))"         "<0></0>abc"
+"(?<=c(?<=c((?=c)){1}+))"      "c<0><1></1></0>cc"
+
+#
+#  Anchoring Bounds
+#
+"^def$"                        "abc<r><0>def</0></r>ghi"           # anchoring (default) bounds
+"^def$"                  a     "abc<r>def</r>ghi"                  # non-anchoring bounds
+"^def"                   a     "<r><0>def</0></r>ghi"              # non-anchoring bounds
+"def$"                   a     "abc<r><0>def</0></r>"              # non-anchoring bounds
+
+"^.*$"                   m     "<0>line 1</0>\n line 2"
+"^.*$"                   m2    "line 1\n<0> line 2</0>"
+"^.*$"                   m3    "line 1\n line 2"
+"^.*$"                   m     "li<r><0>ne </0></r>1\n line 2"     # anchoring bounds
+"^.*$"                   m2    "li<r>ne </r>1\n line 2"            # anchoring bounds
+"^.*$"                  am     "li<r>ne </r>1\n line 2"            # non-anchoring bounds
+"^.*$"                  am     "li\n<r><0>ne </0></r>\n1\n line 2" # non-anchoring bounds
+
+#
+#  HitEnd and RequireEnd for new-lines just before end-of-input
+#
+"xyz$"                  yz     "<0>xyz</0>\n"
+"xyz$"                  yz     "<0>xyz</0>\x{d}\x{a}"
+
+"xyz$"                 myz     "<0>xyz</0>"                        # multi-line mode
+"xyz$"                 mYZ     "<0>xyz</0>\n"
+"xyz$"                 mYZ     "<0>xyz</0>\r\n"
+"xyz$"                 mYZ     "<0>xyz</0>\x{85}abcd"
+
+"xyz$"                  Yz     "xyz\nx"
+"xyz$"                  Yz     "xyza"
+"xyz$"                  yz     "<0>xyz</0>"
+
+#
+#  HitEnd
+#
+"abcd"                  Lz      "a"
+"abcd"                  Lz      "ab"
+"abcd"                  Lz      "abc"
+"abcd"                  LZ      "<0>abcd</0>"
+"abcd"                  LZ      "<0>abcd</0>e"
+"abcd"                  LZ      "abcx"
+"abcd"                  LZ      "abx"
+"abcd"                  Lzi     "a"
+"abcd"                  Lzi     "ab"
+"abcd"                  Lzi     "abc"
+"abcd"                  LZi     "<0>abcd</0>"
+"abcd"                  LZi     "<0>abcd</0>e"
+"abcd"                  LZi     "abcx"
+"abcd"                  LZi     "abx"
+
+#
+#  All Unicode line endings recognized.
+#     0a, 0b, 0c, 0d, 0x85, 0x2028, 0x2029
+#     Multi-line and non-multiline mode take different paths, so repeated tests.
+#
+"^def$"                 mYZ    "abc\x{a}<0>def</0>\x{a}ghi"
+"^def$"                 mYZ    "abc\x{b}<0>def</0>\x{b}ghi"
+"^def$"                 mYZ    "abc\x{c}<0>def</0>\x{c}ghi"
+"^def$"                 mYZ    "abc\x{d}<0>def</0>\x{d}ghi"
+"^def$"                 mYZ    "abc\x{85}<0>def</0>\x{85}ghi"
+"^def$"                 mYZ    "abc\x{2028}<0>def</0>\x{2028}ghi"
+"^def$"                 mYZ    "abc\x{2029}<0>def</0>\x{2029}ghi"
+"^def$"                 mYZ    "abc\r\n<0>def</0>\r\nghi"
+
+"^def$"                 yz     "<0>def</0>\x{a}"
+"^def$"                 yz     "<0>def</0>\x{b}"
+"^def$"                 yz     "<0>def</0>\x{c}"
+"^def$"                 yz     "<0>def</0>\x{d}"
+"^def$"                 yz     "<0>def</0>\x{85}"
+"^def$"                 yz     "<0>def</0>\x{2028}"
+"^def$"                 yz     "<0>def</0>\x{2029}"
+"^def$"                 yz     "<0>def</0>\r\n"
+"^def$"                 yz     "<0>def</0>"
+
+
+# "^def$"                       "<0>def</0>\x{2028"    #TODO: should be an error of some sort.
+
+#
+#  UNIX_LINES mode
+#
+"abc$"                 D      "<0>abc</0>\n"
+"abc$"                 D      "abc\r"
+"abc$"                 D      "abc\u0085"
+"a.b"                  D      "<0>a\rb</0>"
+"a.b"                  D      "a\nb"
+"(?d)abc$"                    "<0>abc</0>\n"
+"(?d)abc$"                    "abc\r"
+"abc$"                 mD     "<0>abc</0>\ndef"
+"abc$"                 mD     "abc\rdef"
+
+".*def"                L      "abc\r def xyz"          # Normal mode, LookingAt() stops at \r
+".*def"                DL     "<0>abc\r def</0> xyz"   # Unix Lines mode, \r not line end.
+".*def"                DL     "abc\n def xyz"
+
+"(?d)a.b"                     "a\nb"
+"(?d)a.b"                     "<0>a\rb</0>"
+
+"^abc"                 m      "xyz\r<0>abc</0>"
+"^abc"                 Dm     "xyz\rabc"
+"^abc"                 Dm     "xyz\n<0>abc</0>"
+
+
+
+# Capturing parens
+".(..)."                       "<0>a<1>bc</1>d</0>"
+ ".*\A( +hello)"               "<0><1>      hello</1></0>"
+"(hello)|(goodbye)"            "<0><1>hello</1></0>"
+"(hello)|(goodbye)"            "<0><2>goodbye</2></0>"
+"abc( +(  inner(X?) +)  xyz)"  "leading cruft <0>abc<1>     <2>  inner<3></3>    </2>  xyz</1></0> cruft"
+"\s*([ixsmdt]*)([:letter:]*)"  "<0>   <1>d</1><2></2></0>  "
+"(a|b)c*d"                     "a<0><1>b</1>cd</0>"
+
+# Non-capturing parens (?: stuff).   Groups, but does not capture.
+"(?:abc)*(tail)"               "<0>abcabcabc<1>tail</1></0>"
+
+# Non-greedy  *? quantifier
+".*?(abc)"                     "<0>    abx    <1>abc</1></0> abc abc abc"
+".*(abc)"                      "<0>    abx     abc abc abc <1>abc</1></0>"
+
+"((?:abc |xyz )*?)abc "        "<0><1>xyz </1>abc </0>abc abc "
+"((?:abc |xyz )*)abc "         "<0><1>xyz abc abc </1>abc </0>"
+
+# Non-greedy  +? quantifier
+"(a+?)(a*)"                    "<0><1>a</1><2>aaaaaaaaaaaa</2></0>"
+"(a+)(a*)"                     "<0><1>aaaaaaaaaaaaa</1><2></2></0>"
+
+"((ab)+?)((ab)*)"              "<0><1><2>ab</2></1><3>ababababab<4>ab</4></3></0>"
+"((ab)+)((ab)*)"               "<0><1>abababababab<2>ab</2></1><3></3></0>"
+
+# Non-greedy ?? quantifier
+"(ab)(ab)??(ab)??(ab)??(ab)??c"      "<0><1>ab</1><4>ab</4><5>ab</5>c</0>"
+
+# Unicode Properties as naked elements in a pattern
+"\p{Lu}+"                      "here we go ... <0>ABC</0> and no more."
+"(\p{L}+)(\P{L}*?) (\p{Zs}*)"  "7999<0><1>letters</1><2>4949%^&*(</2> <3>   </3></0>"
+
+# \w and \W
+"\w+"                          "  $%^&*( <0>hello123</0>%^&*("
+"\W+"                          "<0>  $%^&*( </0>hello123%^&*("
+
+# \A   match at beginning of input only.
+ ".*\Ahello"                   "<0>hello</0> hello"
+ ".*hello"                     "<0>hello hello</0>"
+".*\Ahello"                    "stuff\nhello" # don't match after embedded new-line.
+
+# \b \B
+#
+".*?\b(.).*"                   "<0>  $%^&*( <1>h</1>ello123%^&*()gxx</0>"
+"\ba\b"                        "-<0>a</0>"
+"\by\b"                        "xy"
+"[ \b]"                        "<0>b</0>"     # in a set, \b is a literal b.
+
+# Finds first chars of up to 5 words
+"(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?"   "<0><1>T</1>the <2>q</2>ick <3>b</3>rown <4>f</4></0>ox"
+
+"H.*?((?:\B.)+)"              "<0>H<1>ello</1></0> "
+".*?((?:\B.)+).*?((?:\B.)+).*?((?:\B.)+)"    "<0>H<1>ello</1> <2>    </2>g<3>oodbye</3></0> "
+
+"(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?.*"   "<0>   \u0301 \u0301<1>A</1>\u0302BC\u0303\u0304<2> </2>\u0305 \u0306<3>X</3>\u0307Y\u0308</0>"
+
+
+#
+#  Unicode word boundary mode
+#
+"(?w).*?\b"                      v   "<0></0>hello, world"
+"(?w).*?(\b.+?\b).*"             v   "<0><1>  </1>123.45   </0>"
+"(?w).*?(\b\d.*?\b).*"           v   "<0>  <1>123.45</1>   </0>"
+".*?(\b.+?\b).*"                     "<0>  <1>123</1>.45   </0>"
+"(?w:.*?(\b\d.*?\b).*)"          v   "<0>  <1>123.45</1>   </0>"
+"(?w:.*?(\b.+?\b).*)"            v   "<0><1>don't</1>   </0>"
+"(?w:.+?(\b\S.+?\b).*)"          v   "<0>  <1>don't</1>   </0>"
+"(?w:(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?).*)"     v "<0><1>.</1><2> </2><3>,</3><4>:</4><5>$</5><6>37,000.50</6><7> </7>   </0>"
+
+#
+#  Unicode word boundaries with Regions
+#
+"(?w).*?\b"                      v   "abc<r><0>def</0></r>ghi"
+"(?w).*?\b"                      v2  "abc<r>def<0></0></r>ghi"
+"(?w).*?\b"                      v3  "abc<r>def</r>ghi"
+#"(?w).*?\b"                      vb  "abc<r><0>def</0></r>ghi"    # TODO:  bug.  Ticket 6073
+#"(?w).*?\b"                      vb2 "abc<r>def</r>ghi"
+
+
+
+# . does not match new-lines
+"."                            "\u000a\u000d\u0085\u000c\u000b\u2028\u2029<0>X</0>\u000aY"
+"A."                           "A\u000a "# no match
+
+# \d for decimal digits
+"\d*"                          "<0>0123456789\u0660\u06F9\u0969\u0A66\u17E2\uFF10\U0001D7CE\U0001D7FF</0>non-digits"
+"\D+"                          "<0>non digits</0>"
+"\D*(\d*)(\D*)"                "<0>non-digits<1>3456666</1><2>more non digits</2></0>"
+
+# \Q...\E quote mode
+"hel\Qlo, worl\Ed"             "<0>hello, world</0>"
+"\Q$*^^(*)?\A\E(a*)"           "<0>$*^^(*)?\\A<1>aaaaaaaaaaaaaaa</1></0>"
+"[abc\Q]\r\E]+"                "<0>aaaccc]]]\\\\\\</0>\r..."   # \Q ... \E escape in a [set]
+
+# UREGEX_LITERAL - entire pattern is a literal string, no escapes recognized.
+#                  Note that data strings in test cases still get escape processing.
+"abc\an\r\E\\abcd\u0031bye"     Q  "lead<0>abc\\an\\r\\E\\\\abcd\\u0031bye</0>extra"
+"case insensitive \\ (l)iteral" Qi "stuff!! <0>cAsE InSenSiTiVE \\\\ (L)ITeral</0>"
+
+# \S and \s  space characters
+"\s+"                          "not_space<0> \t \r \n \u3000 \u2004 \u2028 \u2029</0>xyz"
+"(\S+).*?(\S+).*"              "<0><1>Not-spaces</1>   <2>more-non-spaces</2>  </0>"
+
+# \X  consume one Grapheme Cluster.
+"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?"  v  "<0><1>A</1><2>B</2><3> </3><4>\r\n</4></0>"
+"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?"  v  "<0><1>A\u0301</1><2>\n</2><3>\u0305</3><4>a\u0302\u0303\u0304</4></0>"
+"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?"  v  "<0><1>\u1100\u1161\u11a8</1><2>\u115f\u11a2\u11f9</2></0>"
+"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?"  v  "<0><1>\u1100\uac01</1><2>\uac02</2><3>\uac03\u11b0</3></0>"
+"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?"  v  "<0><1>\u1100\u1101\uac02\u0301</1><2>\u1100</2></0>"
+# Regional indicator pairs are grapheme clusters
+"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?"  v  "<0><1>\U0001f1e6\U0001f1e8</1><2>\U0001f1ea\U0001f1ff</2></0>"
+# Grapheme Break rule 9b:  Prepend x
+"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?"	v  "<0><1>\U000111C2x</1></0>"
+
+# Grapheme clusters that straddle a match region. Matching is pinned to the region limits,
+# giving boundaries inside grapheme clusters
+"(\X)?(\X)?(\X)?"        v      "a\u0301<r><0><1>\u0301\u0301</1><2>z\u0302</2></0></r>\u0302\u0302"
+# Same as previous test case, but without the region limits.
+"(\X)?(\X)?(\X)?"        v      "<0><1>a\u0301\u0301\u0301</1><2>z\u0302\u0302\u0302</2></0>"
+
+# ^ matches only at beginning of line
+".*^(Hello)"                   "<0><1>Hello</1></0> Hello Hello Hello Goodbye"
+".*(Hello)"                    "<0>Hello Hello Hello <1>Hello</1></0> Goodbye"
+".*^(Hello)"                   " Hello Hello Hello Hello Goodbye"# No Match
+
+# $ matches only at end of line, or before a newline preceding the end of line
+".*?(Goodbye)$"           zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
+".*?(Goodbye)"            ZY   "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye"
+".*?(Goodbye)$"           z    "Hello Goodbye> Goodbye Goodbye "# No Match
+
+".*?(Goodbye)$"           zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
+".*?(Goodbye)$"           zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
+".*?(Goodbye)$"           zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\r\n"
+".*?(Goodbye)$"           z    "Hello Goodbye Goodbye Goodbye\n\n"# No Match
+
+# \Z matches at end of input, like $ with default flags.
+".*?(Goodbye)\Z"          zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
+".*?(Goodbye)"            ZY   "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye"
+".*?(Goodbye)\Z"          z    "Hello Goodbye> Goodbye Goodbye "# No Match
+"here$"                   z    "here\nthe end"# No Match
+
+".*?(Goodbye)\Z"               "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
+".*?(Goodbye)\Z"               "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
+".*?(Goodbye)\Z"               "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\r\n"
+".*?(Goodbye)\Z"               "Hello Goodbye Goodbye Goodbye\n\n"# No Match
+
+# \z matches only at the end of string.
+#    no special treatment of new lines.
+#    no dependencies on flag settings.
+".*?(Goodbye)\z"          zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
+".*?(Goodbye)\z"          z    "Hello Goodbye Goodbye Goodbye "# No Match
+"here$"                   z    "here\nthe end"# No Match
+
+".*?(Goodbye)\z"          z    "Hello Goodbye Goodbye Goodbye\n"# No Match
+".*?(Goodbye)\n\z"        zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1>\n</0>"
+"abc\z|def"               ZY   "abc<0>def</0>"
+
+# (?# comment) doesn't muck up pattern
+"Hello (?# this is a comment) world"  "  <0>Hello  world</0>..."
+
+# Check some implementation corner cases base on the way literal strings are compiled.
+"A"                            "<0>A</0>"
+"AB"                           "<0>AB</0>ABABAB"
+"AB+"                          "<0>ABBB</0>A"
+"AB+"                          "<0>AB</0>ABAB"
+"ABC+"                         "<0>ABC</0>ABC"
+"ABC+"                         "<0>ABCCCC</0>ABC"
+"(?:ABC)+"                     "<0>ABCABCABC</0>D"
+"(?:ABC)DEF+"                  "<0>ABCDEFFF</0>D"
+"AB\.C\eD\u0666E"              "<0>AB.C\u001BD\u0666E</0>F"
+"ab\Bde"                        "<0>abde</0>"
+
+# loop breaking
+"(a?)*"                        "<0><1></1></0>xyz"
+"(a?)+"                        "<0><1></1></0>xyz"
+"^(?:a?b?)*$"	               "a--"
+"(x?)*xyz"                     "<0>xx<1></1>xyz</0>"    # Sligthtly weird, but correct.  The "last" time through (x?),
+                                                        #   it matches the empty string.
+
+# Set expressions, basic operators and escapes work
+#
+"[\d]+"                        "<0>0123</0>abc/.,"
+"[^\d]+"                       "0123<0>abc/.,</0>"
+"[\D]+"                        "0123<0>abc/.,</0>"
+"[^\D]+"                       "<0>0123</0>abc/.,"
+
+"[\s]+"                        "<0> \t</0>abc/.,"
+"[^\s]+"                       " \t<0>abc/.,</0>"
+"[\S]+"                        " \t<0>abc/.,</0>"
+"[^\S]+"                       "<0> \t</0>abc/.,"
+
+"[\w]+"                        "<0>abc123</0> .,;"
+"[^\w]+"                       "abc123<0> .,;</0>"
+"[\W]+"                        "abc123<0> .,;</0>"
+"[^\W]+"                       "<0>abc123</0> .,;"
+
+"[\z]+"                        "abc<0>zzz</0>def"     # \z has no special meaning
+"[^\z]+"                       "<0>abc</0>zzzdef"
+"[\^]+"                        "abc<0>^^</0>"
+"[^\^]+"                       "<0>abc</0>^^"
+
+"[\u0041c]+"                   "<0>AcAc</0>def"
+"[\U00010002]+"                "<0>\ud800\udc02</0>\U00010003"
+"[^\U00010002]+"               "<0>Hello</0>\x{10002}"
+"[\x61b]+"                     "<0>abab</0>cde"
+#"[\x6z]+"                      "\x06"                  #TODO:  single hex digits should fail
+"[\x{9}\x{75}\x{6d6}\x{6ba6}\x{6146B}\x{10ffe3}]+"  "<0>\u0009\u0075\u06d6\u6ba6\U0006146B\U0010ffe3</0>abc"
+
+"[\N{LATIN CAPITAL LETTER TONE SIX}ab\N{VARIATION SELECTOR-70} ]+"       "x<0> \u0184\U000E0135 ab</0>c"
+"[\N{LATIN SMALL LETTER C}-\N{LATIN SMALL LETTER F}]+"    "ab<0>cdef</0>ghi"
+
+
+
+#
+#  [set expressions], check the precedence of '-', '&', '--', '&&'
+#      '-' and '&', for compatibility with ICU UnicodeSet, have the same
+#                   precedence as the implicit Union between adjacent items.
+#      '--' and '&&', for compatibility with Java, have lower precedence than
+#                   the implicit Union operations.  '--' and '&&' themselves
+#                   have the same precedence, and group left to right.
+#
+"[[a-m]-[f-w]p]+"              "<0>dep</0>fgwxyz"
+"[^[a-m]-[f-w]p]+"             "dep<0>fgwxyz</0>"
+
+"[[a-m]--[f-w]p]+"             "<0>de</0>pfgwxyz"
+"[^[a-m]--[f-w]p]+"            "de<0>pfgwxyz</0>"
+
+"[[a-m]&[e-s]w]+"              "<0>efmw</0>adnst"
+"[^[a-m]&[e-s]w]+"             "efmw<0>adnst</0>"
+
+"[[a-m]&[e-s]]+"              "<0>efm</0>adnst"
+
+
+
+# {min,max} iteration qualifier
+"A{3}BC"                       "<0>AAABC</0>"
+
+"(ABC){2,3}AB"                 "no matchAB"
+"(ABC){2,3}AB"                 "ABCAB"
+"(ABC){2,3}AB"                 "<0>ABC<1>ABC</1>AB</0>"
+"(ABC){2,3}AB"                 "<0>ABCABC<1>ABC</1>AB</0>"
+"(ABC){2,3}AB"                 "<0>ABCABC<1>ABC</1>AB</0>CAB"
+
+"(ABC){2}AB"                   "ABCAB"
+"(ABC){2}AB"                   "<0>ABC<1>ABC</1>AB</0>"
+"(ABC){2}AB"                   "<0>ABC<1>ABC</1>AB</0>CAB"
+"(ABC){2}AB"                   "<0>ABC<1>ABC</1>AB</0>CABCAB"
+
+"(ABC){2,}AB"                  "ABCAB"
+"(ABC){2,}AB"                  "<0>ABC<1>ABC</1>AB</0>"
+"(ABC){2,}AB"                  "<0>ABCABC<1>ABC</1>AB</0>"
+"(ABC){2,}AB"                  "<0>ABCABCABC<1>ABC</1>AB</0>"
+
+"X{0,0}ABC"                    "<0>ABC</0>"
+"X{0,1}ABC"                    "<0>ABC</0>"
+
+"(?:Hello(!{1,3}) there){1}"   "Hello there"
+"(?:Hello(!{1,3}) there){1}"   "<0>Hello<1>!</1> there</0>"
+"(?:Hello(!{1,3}) there){1}"   "<0>Hello<1>!!</1> there</0>"
+"(?:Hello(!{1,3}) there){1}"   "<0>Hello<1>!!!</1> there</0>"
+"(?:Hello(!{1,3}) there){1}"   "Hello!!!! there"
+
+# Nongreedy {min,max}? intervals
+"(ABC){2,3}?AB"                "no matchAB"
+"(ABC){2,3}?AB"                "ABCAB"
+"(ABC){2,3}?AB"                "<0>ABC<1>ABC</1>AB</0>"
+"(ABC){2,3}?AB"                "<0>ABC<1>ABC</1>AB</0>CAB"
+"(ABC){2,3}?AB"                "<0>ABC<1>ABC</1>AB</0>CABCAB"
+"(ABC){2,3}?AX"                "<0>ABCABC<1>ABC</1>AX</0>"
+"(ABC){2,3}?AX"                "ABC<0>ABCABC<1>ABC</1>AX</0>"
+
+# Possessive {min,max}+ intervals
+"(ABC){2,3}+ABC"               "ABCABCABC"
+"(ABC){1,2}+ABC"               "<0>ABC<1>ABC</1>ABC</0>"
+"(?:(.)\1){2,5}+."              "<0>aabbcc<1>d</1>de</0>x"
+
+
+# Atomic Grouping
+"(?>.*)abc"                    "abcabcabc"  # no match.  .* consumed entire string.
+"(?>(abc{2,4}?))(c*)"          "<0><1>abcc</1><2>ccc</2></0>ddd"
+"(\.\d\d(?>[1-9]?))\d+"        "1.625"
+"(\.\d\d(?>[1-9]?))\d+"        "1<0><1>.625</1>0</0>"
+
+# Possessive *+
+"(abc)*+a"                     "abcabcabc"
+"(abc)*+a"                     "<0>abc<1>abc</1>a</0>b"
+"(a*b)*+a"                     "<0><1>aaaab</1>a</0>aaa"
+
+# Possessive ?+
+"c?+ddd"                       "<0>cddd</0>"
+"c?+cddd"                      "cddd"
+"c?cddd"                       "<0>cddd</0>"
+
+# Back Reference
+"(?:ab(..)cd\1)*"              "<0>ab23cd23ab<1>ww</1>cdww</0>abxxcdyy"
+"ab(?:c|(d?))(\1)"             "<0>ab<1><2></2></1></0>c"
+"ab(?:c|(d?))(\1)"             "<0>ab<1>d</1><2>d</2></0>"
+"ab(?:c|(d?))(\1)"             "<0>ab<1></1><2></2></0>e"
+"ab(?:c|(d?))(\1)"             "<0>ab<1></1><2></2></0>"
+
+# Back References that hit/don't hit end
+"(abcd) \1"                z   "abcd abc"
+"(abcd) \1"                Z   "<0><1>abcd</1> abcd</0>"
+"(abcd) \1"                Z   "<0><1>abcd</1> abcd</0> "
+
+# Case Insensitive back references that hit/don't hit end.
+"(abcd) \1"                zi  "abcd abc"
+"(abcd) \1"                Zi  "<0><1>abcd</1> ABCD</0>"
+"(abcd) \1"                Zi  "<0><1>abcd</1> ABCD</0> "
+
+# Back references that hit/don't hit boundary limits.
+
+"(abcd) \1"                z   "<r>abcd abc</r>d "
+"(abcd) \1"                Z   "<r><0><1>abcd</1> abcd</0></r> "
+"(abcd) \1"                Z   "<r><0><1>abcd</1> abcd</0> </r>"
+
+"(abcd) \1"                zi  "<r>abcd abc</r>d "
+"(abcd) \1"                Zi  "<r><0><1>abcd</1> abcd</0></r> "
+"(abcd) \1"                Zi  "<r><0><1>abcd</1> abcd</0> </r>"
+
+# Back reference that fails match near the end of input without actually hitting the end.
+"(abcd) \1"                ZL  "abcd abd"
+"(abcd) \1"                ZLi "abcd abd"
+
+# Back reference to a zero-length match.  They are always a successful match.
+"ab(x?)cd(\1)ef"               "<0>ab<1></1>cd<2></2>ef</0>"
+"ab(x?)cd(\1)ef"            i  "<0>ab<1></1>cd<2></2>ef</0>"
+
+# Back refs to capture groups that didn't participate in the match.
+"ab(?:(c)|(d))\1"              "abde"
+"ab(?:(c)|(d))\1"              "<0>ab<1>c</1>c</0>e"
+"ab(?:(c)|(d))\1"            i "abde"
+"ab(?:(c)|(d))\1"            i "<0>ab<1>c</1>c</0>e"
+
+# Named back references
+"(?<one>abcd)\k<one>"          "<0><1>abcd</1>abcd</0>"
+"(no)?(?<one>abcd)\k<one>"     "<0><2>abcd</2>abcd</0>"
+
+"(?<a_1>...)"               E  "  "   # backref names are ascii letters & numbers only"
+"(?<1a>...)"                E  "  "   # backref names must begin with a letter"
+"(?<a>.)(?<a>.)"            E  "  "   # Repeated names are illegal.
+
+
+# Case Insensitive
+"aBc"                    i      "<0>ABC</0>"
+"a[^bc]d"                i      "ABD"
+'((((((((((a))))))))))\10' i    "<0><1><2><3><4><5><6><7><8><9><10>A</10></9></8></7></6></5></4></3></2></1>A</0>"
+
+"(?:(?i)a)b"                    "<0>Ab</0>"
+"ab(?i)cd"	                    "<0>abCd</0>"
+"ab$cd"                         "abcd"
+
+"ssl"                      i    "abc<0>ßl</0>xyz"
+"ssl"                      i    "abc<0>ẞl</0>xyz"
+"FIND"                     i    "can <0>ﬁnd</0> ?"  # fi ligature, \ufb01
+"ﬁnd"                      i    "can <0>FIND</0> ?"
+"ῧ"                        i    "xxx<0>ῧ</0>xxx"    # Composed char (match string) decomposes when case-folded (pattern)
+
+# White space handling
+"a b"                           "ab"
+"abc "                          "abc"
+"abc "                          "<0>abc </0>"
+"ab[cd e]z"                     "<0>ab z</0>"
+"ab\ c"                         "<0>ab c</0> "
+"ab c"                          "<0>ab c</0> "
+"ab c"                        x "ab c "
+"ab\ c"                       x "<0>ab c</0> "
+
+#
+# Pattern Flags
+#
+"(?u)abc"                       "<0>abc</0>"
+"(?-u)abc"                      "<0>abc</0>"
+
+#
+#  \c escapes  (Control-whatever)
+#
+"\cA"                           "<0>\u0001</0>"
+"\ca"                           "<0>\u0001</0>"
+"\c\x"                          "<0>\u001cx</0>"
+
+
+#Multi-line mode
+'b\s^'                        m "a\nb\n"
+"(?m)^abc$"                     "abc \n abc\n<0>abc</0>\nabc"
+"(?m)^abc$"                   2 "abc \n abc\nabc\n<0>abc</0>"
+"^abc$"                       2 "abc \n abc\nabc\nabc"
+
+# Empty and full range
+"[\u0000-\U0010ffff]+"          "<0>abc\u0000\uffff\U00010000\U0010ffffzz</0>"
+"[^\u0000-\U0010ffff]"          "abc\u0000\uffff\U00010000\U0010ffffzz"
+"[^a--a]+"                      "<0>abc\u0000\uffff\U00010000\U0010ffffzz</0>"
+
+# Free-spacing mode
+"a b c  # this is a comment"  x "<0>abc</0> "
+'^a (?#xxx) (?#yyy) {3}c'  x    "<0>aaac</0>"
+"a b c [x y z]"               x "abc "
+"a b c [x y z]"               x "a b c "
+"a b c [x y z]"               x "<0>abcx</0>yz"
+"a b c [x y z]"               x "<0>abcy</0>yz"
+
+#
+#  Look Behind
+#
+"(?<=a)b"	                   "a<0>b</0>"
+"(.*)(?<=[bc])"                "<0><1>abc</1></0>d"
+"(?<=(abc))def"                "<1>abc</1><0>def</0>"   # lookbehind precedes main match.
+"(?<=ab|abc)xyz"               "abwxyz"                 # ab matches, but not far enough.
+"(?<=abc)cde"                  "abcde"
+"(?<=abc|ab)cde"               "ab<0>cde</0>"
+"(?<=abc|ab)cde"               "abc<0>cde</0>"
+
+"(?<=bc?c?c?)cd"               "ab<0>cd</0>"
+"(?<=bc?c?c?)cd"               "abc<0>cd</0>"
+"(?<=bc?c?c?)cd"               "abcc<0>cd</0>"
+"(?<=bc?c?c?)cd"               "abccc<0>cd</0>"
+"(?<=bc?c?c?)cd"               "abcccccd"
+"(?<=bc?c?c?)c+d"              "ab<0>cccccd</0>"
+
+".*(?<=: ?)(\w*)"                "<0>1:one  2: two 3:<1>three</1></0>   "
+
+#
+# Named Characters
+#
+"a\N{LATIN SMALL LETTER B}c"    "<0>abc</0>"
+"a\N{LATIN SMALL LETTER B}c"  i  "<0>abc</0>"
+"a\N{LATIN SMALL LETTER B}c"  i  "<0>aBc</0>"
+"a\N{LATIN SMALL LETTER B}c"     "aBc"
+
+"\N{FULL STOP}*"                 "<0>...</0>abc"
+
+"$"                              "abc<0></0>"
+
+#
+#  Optimizations of .* at end of patterns
+#
+"abc.*"                          "<0>abcdef</0>"
+"abc.*$"                         "<0>abcdef</0>"
+"abc(.*)"                        "<0>abc<1>def</1></0>"
+"abc(.*)"                        "<0>abc<1></1></0>"
+"abc.*"                          "<0>abc</0>\ndef"
+"abc.*"                     s    "<0>abc\ndef</0>"
+"abc.*$"                    s    "<0>abc\ndef</0>"
+"abc.*$"                         "abc\ndef"
+"abc.*$"                    m    "<0>abc</0>\ndef"
+"abc.*\Z"                   m    "abc\ndef"
+"abc.*\Z"                   sm   "<0>abc\ndef</0>"
+
+"abc*"                           "<0>abccc</0>d"
+"abc*$"                          "<0>abccc</0>"
+"ab(?:ab[xyz]\s)*"               "<0>ababy abx </0>abc"
+
+"(?:(abc)|a)(?:bc)+"             "<0>abc</0>"
+"(?:(abc)|a)(?:bc)*"             "<0><1>abc</1></0>"
+"^[+\-]?[0-9]*\.?[0-9]*"         "<0>123.456</0>"
+
+"ab.+yz"                         "<0>abc12345xyz</0>ttt"
+"ab.+yz"                    s    "<0>abc12345xyz</0>ttt"
+
+"ab.+yz"                         "abc123\n45xyzttt"
+"ab.+yz"                    s    "<0>abc12\n345xyz</0>ttt"
+
+"ab[0-9]+yz"                     "---abyz+++"
+"ab[0-9]+yz"                     "---<0>ab1yz</0>+++"
+"ab[0-9]+yz"                     "---<0>ab12yz</0>+++"
+"ab[0-9]+yz"                     "---<0>ab123456yz</0>+++"
+
+"ab([0-9]+|[A-Z]+)yz"            "---abyz+++"
+"ab([0-9]+|[A-Z]+)yz"            "---<0>ab<1>1</1>yz</0>+++"
+"ab([0-9]+|[A-Z]+)yz"            "---<0>ab<1>12</1>yz</0>+++"
+"ab([0-9]+|[A-Z]+)yz"            "---<0>ab<1>A</1>yz</0>+++"
+"ab([0-9]+|[A-Z]+)yz"            "---<0>ab<1>AB</1>yz</0>+++"
+"ab([0-9]+|[A-Z]+)yz"            "---<0>ab<1>ABCDE</1>yz</0>+++"
+
+#
+#  Hex format \x escaping
+#
+"ab\x63"                         "<0>abc</0>"
+"ab\x09w"                        "<0>ab\u0009w</0>"
+"ab\xabcdc"                      "<0>ab\u00abcdc</0>"
+"ab\x{abcd}c"                    "<0>ab\uabcdc</0>"
+"ab\x{101234}c"                  "<0>ab\U00101234c</0>"
+"abα"                            "<0>abα</0>"
+
+#
+#  Octal Escaping.   This conforms to Java conventions, not Perl.
+"\0101\00\03\073\0154\01442"      "<0>A\u0000\u0003\u003b\u006c\u0064\u0032</0>"
+"\0776"                          "<0>\u003f\u0036</0>"  # overflow, the 6 is literal.
+"\0376xyz"                       "<0>\u00fexyz</0>"
+"\08"                        E   "<0>\u00008</0>"
+"\0"                         E   "x"
+
+#
+#  \u Surrogate Pairs
+#
+"\ud800\udc00"                    "<0>\U00010000</0>"
+"\ud800\udc00*"                   "<0>\U00010000\U00010000\U00010000</0>\U00010001"
+# TODO (Vitess): The next case has invalid UTF-8, so it's not supported right now for testing. It likely works in practice though!
+# "\ud800\ud800\udc00"              "<0>\ud800\U00010000</0>\U00010000\U00010000\U00010001"
+"(\ud800)(\udc00)"                "\U00010000"
+"\U00010001+"                     "<0>\U00010001\U00010001</0>\udc01"
+
+#
+# hitEnd with find()
+#
+"abc"                        Z    "aa<0>abc</0>  abcab"
+"abc"                       2Z    "aaabc  <0>abc</0>ab"
+"abc"                       3z    "aa>abc  abcab"
+
+#
+# \ escaping
+#
+"abc\jkl"                         "<0>abcjkl</0>"    # escape of a non-special letter is just itself.
+"abc[ \j]kl"                      "<0>abcjkl</0>"
+
+#
+# \R  all newline sequences.
+#
+"abc\Rxyz"                        "<0>abc\u000axyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u000bxyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u000cxyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u000dxyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u0085xyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u2028xyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u2029xyz</0>gh"
+"abc\Rxyz"                        "<0>abc\u000d\u000axyz</0>gh"
+
+"abc\R\nxyz"                      "abc\u000d\u000axyzgh"          # \R cannot match only the CR from a CR/LF sequence.
+"abc\r\nxyz"                      "<0>abc\u000d\u000axyz</0>gh"
+
+"abc\Rxyz"                        "abc\u0009xyz"                  # Assorted non-matches.
+"abc\Rxyz"                        "abc\u000exyz"
+"abc\Rxyz"                        "abc\u202axyz"
+
+# \v \V single character new line sequences.
+
+"abc\vxyz"                        "<0>abc\u000axyz</0>gh"
+"abc\vxyz"                        "<0>abc\u000bxyz</0>gh"
+"abc\vxyz"                        "<0>abc\u000cxyz</0>gh"
+"abc\vxyz"                        "<0>abc\u000dxyz</0>gh"
+"abc\vxyz"                        "<0>abc\u0085xyz</0>gh"
+"abc\vxyz"                        "<0>abc\u2028xyz</0>gh"
+"abc\vxyz"                        "<0>abc\u2029xyz</0>gh"
+"abc\vxyz"                        "abc\u000d\u000axyzgh"
+"abc\vxyz"                        "abc?xyzgh"
+
+"abc[\v]xyz"                      "<0>abc\u000axyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u000bxyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u000cxyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u000dxyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u0085xyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u2028xyz</0>gh"
+"abc[\v]xyz"                      "<0>abc\u2029xyz</0>gh"
+"abc[\v]xyz"                      "abc\u000d\u000axyzgh"
+"abc[\v]xyz"                      "abc?xyzgh"
+
+"abc\Vxyz"                        "abc\u000axyzgh"
+"abc\Vxyz"                        "abc\u000bxyzgh"
+"abc\Vxyz"                        "abc\u000cxyzgh"
+"abc\Vxyz"                        "abc\u000dxyzgh"
+"abc\Vxyz"                        "abc\u0085xyzgh"
+"abc\Vxyz"                        "abc\u2028xyzgh"
+"abc\Vxyz"                        "abc\u2029xyzgh"
+"abc\Vxyz"                        "abc\u000d\u000axyzgh"
+"abc\Vxyz"                        "<0>abc?xyz</0>gh"
+
+# \h \H horizontal white space. Defined as gc=space_separator plus ascii tab
+
+"abc\hxyz"                        "<0>abc xyz</0>gh"
+"abc\Hxyz"                        "abc xyzgh"
+"abc\hxyz"                        "<0>abc\u2003xyz</0>gh"
+"abc\Hxyz"                        "abc\u2003xyzgh"
+"abc\hxyz"                        "<0>abc\u0009xyz</0>gh"
+"abc\Hxyz"                        "abc\u0009xyzgh"
+"abc\hxyz"                        "abc?xyzgh"
+"abc\Hxyz"                        "<0>abc?xyz</0>gh"
+
+"abc[\h]xyz"                      "<0>abc xyz</0>gh"
+"abc[\H]xyz"                      "abc xyzgh"
+"abc[\h]xyz"                      "<0>abc\u2003xyz</0>gh"
+"abc[\H]xyz"                      "abc\u2003xyzgh"
+"abc[\h]xyz"                      "<0>abc\u0009xyz</0>gh"
+"abc[\H]xyz"                      "abc\u0009xyzgh"
+"abc[\h]xyz"                      "abc?xyzgh"
+"abc[\H]xyz"                      "<0>abc?xyz</0>gh"
+
+
+#
+# Bug xxxx
+#
+"(?:\-|(\-?\d+\d\d\d))?(?:\-|\-(\d\d))?(?:\-|\-(\d\d))?(T)?(?:(\d\d):(\d\d):(\d\d)(\.\d+)?)?(?:(?:((?:\+|\-)\d\d):(\d\d))|(Z))?"   MG  "<0>-1234-21-31T41:51:61.789+71:81</0>"
+
+
+#
+# A random, complex, meaningless pattern that should at least compile
+#
+"(?![^\<C\f\0146\0270\}&&[|\02-\x3E\}|X-\|]]{7,}+)[|\\\x98\<\?\u4FCFr\,\0025\}\004|\0025-\0521]|(?<![|\01-\u829E])|(?<!\p{Alpha})|^|(?-s:[^\x15\\\x24F\a\,\a\u97D8[\x38\a[\0224-\0306[^\0020-\u6A57]]]]??)(?xix:[^|\{\[\0367\t\e\x8C\{\[\074c\]V[|b\fu\r\0175\<\07f\066s[^D-\x5D]]])(?xx:^{5,}+)(?d)(?=^\D)|(?!\G)(?>\G)(?![^|\]\070\ne\{\t\[\053\?\\\x51\a\075\0023-\[&&[|\022-\xEA\00-\u41C2&&[^|a-\xCC&&[^\037\uECB3\u3D9A\x31\|\<b\0206\uF2EC\01m\,\ak\a\03&&\p{Punct}]]]])(?-dxs:[|\06-\07|\e-\x63&&[|Tp\u18A3\00\|\xE4\05\061\015\0116C|\r\{\}\006\xEA\0367\xC4\01\0042\0267\xBB\01T\}\0100\?[|\[-\u459B|\x23\x91\rF\0376[|\?-\x94\0113-\\\s]]]]{6}?)(?<=[^\t-\x42H\04\f\03\0172\?i\u97B6\e\f\uDAC2])(?=\B)(?>[^\016\r\{\,\uA29D\034\02[\02-\[|\t\056\uF599\x62\e\<\032\uF0AC\0026\0205Q\|\\\06\0164[|\057-\u7A98&&[\061-g|\|\0276\n\042\011\e\xE8\x64B\04\u6D0EDW^\p{Lower}]]]]?)(?<=[^\n\\\t\u8E13\,\0114\u656E\xA5\]&&[\03-\026|\uF39D\01\{i\u3BC2\u14FE]])(?<=[^|\uAE62\054H\|\}&&^\p{Space}])(?sxx)(?<=[\f\006\a\r\xB4]{1,5})|(?x-xd:^{5}+)()"  "<0></0>abc"
+
+
+#
+# Bug 3225
+
+"1|9"                             "<0>1</0>"
+"1|9"                             "<0>9</0>"
+"1*|9"                            "<0>1</0>"
+"1*|9"                            "<0></0>9"
+
+"(?:a|ac)d"                       "<0>acd</0>"
+"a|ac"                            "<0>a</0>c"
+
+#
+# Bug 3320
+#
+"(a([^ ]+)){0,} (c)"              "<0><1>a<2>b</2></1> <3>c</3></0> "
+"(a([^ ]+))* (c)"                 "<0><1>a<2>b</2></1> <3>c</3></0> "
+
+#
+# Bug 3436
+#
+"(.*?) *$"                        "<0><1>test</1>    </0>"
+
+#
+# Bug 4034
+#
+"\D"                              "<0>A</0>BC\u00ffDEF"
+"\d"                              "ABC\u00ffDEF"
+"\D"                              "<0>\u00ff</0>DEF"
+"\d"                              "\u00ffDEF"
+"\D"                              "123<0>\u00ff</0>DEF"
+"\D"                              "<0>\u0100</0>DEF"
+"\D"                              "123<0>\u0100</0>DEF"
+
+#
+#bug 4024, new line sequence handling
+#
+"(?m)^"                           "<0></0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+"(?m)^"                       2   "AA\u000d\u000a<0></0>BB\u000d\u000aCC\u000d\u000a"
+"(?m)^"                       3   "AA\u000d\u000aBB\u000d\u000a<0></0>CC\u000d\u000a"
+"(?m)^"                       4   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+
+"(?m)$"                           "AA<0></0>\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+"(?m)$"                       2   "AA\u000d\u000aBB<0></0>\u000d\u000aCC\u000d\u000a"
+"(?m)$"                       3   "AA\u000d\u000aBB\u000d\u000aCC<0></0>\u000d\u000a"
+"(?m)$"                       4   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a<0></0>"
+"(?m)$"                       5   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+
+"$"                               "AA\u000d\u000aBB\u000d\u000aCC<0></0>\u000d\u000a"
+"$"                           2   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a<0></0>"
+"$"                           3   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+
+"$"                               "\u000a\u0000a<0></0>\u000a"
+"$"                           2   "\u000a\u0000a\u000a<0></0>"
+"$"                           3   "\u000a\u0000a\u000a"
+
+"$"                               "<0></0>"
+"$"                           2   ""
+
+"$"                               "<0></0>\u000a"
+"$"                           2   "\u000a<0></0>"
+"$"                           3   "\u000a"
+
+"^"                               "<0></0>"
+"^"                           2   ""
+
+"\Z"                              "<0></0>"
+"\Z"                          2   ""
+"\Z"                          2   "\u000a<0></0>"
+"\Z"                              "<0></0>\u000d\u000a"
+"\Z"                          2   "\u000d\u000a<0></0>"
+
+
+# No matching ^ at interior new-lines if not in multi-line mode.
+"^"                               "<0></0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+"^"                           2   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+
+#
+# Dot-matches-any mode, and stopping at new-lines if off.
+#
+"."                               "<0>1</0>23\u000aXYZ"
+"."                           2   "1<0>2</0>3\u000aXYZ"
+"."                           3   "12<0>3</0>\u000aXYZ"
+"."                           4   "123\u000a<0>X</0>YZ"    # . doesn't match newlines
+"."                           4   "123\u000b<0>X</0>YZ"
+"."                           4   "123\u000c<0>X</0>YZ"
+"."                           4   "123\u000d<0>X</0>YZ"
+"."                           4   "123\u000d\u000a<0>X</0>YZ"
+"."                           4   "123\u0085<0>X</0>YZ"
+"."                           4   "123\u2028<0>X</0>YZ"
+"."                           4   "123\u2029<0>X</0>YZ"
+"."                           4s  "123<0>\u000a</0>XYZ"    # . matches any
+"."                           4s  "123<0>\u000b</0>XYZ"
+"."                           4s  "123<0>\u000c</0>XYZ"
+"."                           4s  "123<0>\u000d</0>XYZ"
+"."                           4s  "123<0>\u000d\u000a</0>XYZ"
+"."                           4s  "123<0>\u0085</0>XYZ"
+"."                           4s  "123<0>\u2028</0>XYZ"
+"."                           4s  "123<0>\u2029</0>XYZ"
+".{6}"                            "123\u000a\u000dXYZ"
+".{6}"                         s  "<0>123\u000a\u000dX</0>Y"
+
+
+#
+# Ranges
+#
+".*"                              "abc<r><0>def</0></r>ghi"
+"a"                               "aaa<r><0>a</0>aa</r>aaa"
+"a"                           2   "aaa<r>a<0>a</0>a</r>aaa"
+"a"                           3   "aaa<r>aa<0>a</0></r>aaa"
+"a"                           4   "aaa<r>aaa</r>aaa"
+"a"                               "aaa<r><0>a</0>aa</r>aaa"
+
+#
+# [set] parsing, systematically run through all of the parser states.
+#
+#
+"[def]+"                          "abc<0>ddeeff</0>ghi"       # set-open
+"[^def]+"                         "<0>abc</0>defghi"
+"[:digit:]+"                      "abc<0>123</0>def"
+"[:^digit:]+"                     "<0>abc</0>123def"
+"[\u005edef]+"                    "abc<0>de^f</0>ghi"
+
+"[]]+"                            "abc<0>]]]</0>[def"         # set-open2
+"[^]]+"                           "<0>abc</0>]]][def"
+
+"[:Lu:]+"                         "abc<0>ABC</0>def"          # set-posix
+"[:Lu]+"                          "abc<0>uL::Lu</0>"
+"[:^Lu]+"                         "abc<0>uL:^:Lu</0>"
+"[:]+"                            "abc<0>:::</0>def"
+"[:whats this:]"               E  " "
+"[--]+"                       dE  "-------"
+
+"[[nested]]+"                      "xyz[<0>nnetsteed</0>]abc"   #set-start
+"[\x{41}]+"                        "CB<0>AA</0>ZYX"
+"[\[\]\\]+"                        "&*<0>[]\\</0>..."
+"[*({<]+"                          "^&<0>{{(<<*</0>)))"
+
+
+"[-def]+"                          "abc<0>def-ef-d</0>xyz"     # set-start-dash
+"[abc[--def]]"                 E   " "
+
+"[x[&def]]+"                        "abc<0>def&</0>ghi"        # set-start-amp
+"[&& is bad at start]"         E   " "
+
+"[abc"                         E   " "                         # set-after-lit
+"[def]]"                           "abcdef"
+"[def]]"                           "abcde<0>f]</0>]"
+
+"[[def][ghi]]+"                    "abc]<0>defghi</0>[xyz"     # set-after-set
+"[[def]ghi]+"                      "abc]<0>defghi</0>[xyz"
+"[[[[[[[[[[[abc]"              E   " "
+"[[abc]\p{Lu}]+"                   "def<0>abcABC</0>xyz"
+
+"[d-f]+"                           "abc<0>def</0>ghi"          # set-after-range
+"[d-f[x-z]]+"                      "abc<0>defxyzzz</0>gw"
+"[\s\d]+"                          "abc<0>  123</0>def"
+"[d-f\d]+"                         "abc<0>def123</0>ghi"
+"[d-fr-t]+"                        "abc<0>defrst</0>uvw"
+
+"[abc--]"                      E   " "                         # set-after-op
+"[[def]&&]"                    E   " "
+"[-abcd---]+"                     "<0>abc</0>--"                 #[-abcd]--[-]
+"[&abcd&&&ac]+"                   "b<0>ac&&ca</0>d"              #[&abcd]&&[&ac]
+
+"[[abcd]&[ac]]+"                  "b<0>acac</0>d"              # set-set-amp
+"[[abcd]&&[ac]]+"                 "b<0>acac</0>d"
+"[[abcd]&&ac]+"                   "b<0>acac</0>d"
+"[[abcd]&ac]+"                    "<0>bacacd&&&</0>"
+
+"[abcd&[ac]]+"                    "<0>bacacd&&&</0>"           #set-lit-amp
+"[abcd&&[ac]]+"                   "b<0>acac</0>d"
+"[abcd&&ac]+"                     "b<0>acac</0>d"
+
+"[[abcd]-[ac]]+"                  "a<0>bdbd</0>c"              # set-set-dash
+"[[abcd]--[ac]]+"                 "a<0>bdbd</0>c"
+"[[abcd]--ac]+"                   "a<0>bdbd</0>c"
+"[[abcd]-ac]+"                    "<0>bacacd---</0>"
+
+"[a-d--[b-c]]+"                   "b<0>adad</0>c"              # set-range-dash
+"[a-d--b-c]+"                     "b<0>adad</0>c"
+"[a-d-[b-c]]+"                    "<0>bad-adc</0>"
+"[a-d-b-c]+"                      "<0>bad-adc</0>"
+"[\w--[b-c]]+"                    "b<0>adad</0>c"
+"[\w--b-c]+"                      "b<0>adad</0>c"
+"[\w-[b-c]]+"                     "<0>bad-adc</0>"
+"[\w-b-c]+"                       "<0>bad-adc</0>"
+
+"[a-d&&[b-c]]+"                   "a<0>bcbc</0>d"              # set-range-amp
+"[a-d&&b-c]+"                     "a<0>bcbc</0>d"
+"[a-d&[b-c]]+"                    "<0>abc&bcd</0>"
+"[a-d&b-c]+"                      "<0>abc&bcd</0>"
+
+"[abcd--bc]+"                     "b<0>adda</0>c"              # set-lit-dash
+"[abcd--[bc]]+"                   "b<0>adda</0>c"
+"[abcd-[bc]]+"                    "<0>bad--dac</0>xyz"
+"[abcd-]+"                        "<0>bad--dac</0>xyz"
+
+"[abcd-\s]+"                 E    "xyz<0>abcd  --</0>xyz"      # set-lit-dash-esc
+"[abcd-\N{LATIN SMALL LETTER G}]+"  "xyz-<0>abcdefg</0>hij-"
+"[bcd-\{]+"                       "a<0>bcdefyz{</0>|}"
+
+"[\p{Ll}]+"                       "ABC<0>abc</0>^&*&"          # set-escape
+"[\P{Ll}]+"                       "abc<0>ABC^&*&</0>xyz"
+"[\N{LATIN SMALL LETTER Q}]+"     "mnop<0>qqq</0>rst"
+"[\sa]+"                          "cb<0>a  a  </0>(*&"
+"[\S]+"                           "   <0>hello</0>  "
+"[\w]+"                           "   <0>hello_world</0>!  "
+"[\W]+"                           "a<0>   *$%#,</0>hello "
+"[\d]+"                           "abc<0>123</0>def"
+"[\D]+"                           "123<0>abc</0>567"
+"[\$\#]+"                         "123<0>$#$#</0>\\"
+
+#
+#  Try each of the Java compatibility properties.
+#    These are checked here, while normal Unicode properties aren't, because
+#    these Java compatibility properties are implemented directly by regexp, while other
+#    properties are handled by ICU's Property and UnicodeSet APIs.
+#
+#    These tests are only to verify that the names are recognized and the
+#    implementation isn't dead.  They are not intended to verify that the
+#    function definitions are 100% correct.
+#
+"[:InBasic Latin:]+"               "ΓΔΕΖΗΘ<0>hello, world.</0>ニヌネノハバパ"
+"[:^InBasic Latin:]+"              "<0>ΓΔΕΖΗΘ</0>hello, world.ニヌネノハバパ"
+"\p{InBasicLatin}+"                "ΓΔΕΖΗΘ<0>hello, world.</0>ニヌネノハバパ"
+"\P{InBasicLatin}+"                "<0>ΓΔΕΖΗΘ</0>hello, world.ニヌネノハバパ"
+"\p{InGreek}+"                     "<0>ΓΔΕΖΗΘ</0>hello, world.ニヌネノハバパ"
+"\p{InCombining Marks for Symbols}" "<0>\u20d0</0>"
+"\p{Incombiningmarksforsymbols}"    "<0>\u20d0</0>"
+
+
+"\p{javaDefined}+"                 "\uffff<0>abcd</0>\U00045678"
+"\p{javaDigit}+"                   "abc<0>1234</0>xyz"
+"\p{javaIdentifierIgnorable}+"     "abc<0>\u0000\u000e\u009f</0>xyz"
+"\p{javaISOControl}+"              "abc<0>\u0000\u000d\u0083</0>xyz"
+"\p{javaJavaIdentifierPart}+"      "#@!<0>abc123_$</0>;"
+"\p{javaJavaIdentifierStart}+"     "123\u0301<0>abc$_</0>%^&"
+"\p{javaLetter}+"                  "123<0>abcDEF</0>&*()("
+"\p{javaLetterOrDigit}+"           "$%^&*<0>123abcகஙசஜஞ</0>☺♘♚☔☎♬⚄⚡"
+"\p{javaLowerCase}+"               "ABC<0>def</0>&^%#:="
+"\p{javaMirrored}+"                "ab$%<0>(){}[]</0>xyz"
+"\p{javaSpaceChar}+"               "abc<0> \u00a0\u2028</0>!@#"
+"\p{javaSupplementaryCodePoint}+"  "abc\uffff<0>\U00010000\U0010ffff</0>\u0000"
+"\p{javaTitleCase}+"               "abCE<0>ǅῌᾨ</0>123"
+"\p{javaUnicodeIdentifierStart}+"  "123<0>abcⅣ</0>%^&&*"
+"\p{javaUnicodeIdentifierPart}+"   "%&&^<0>abc123\u0301\u0002</0>..."
+"\p{javaUpperCase}+"               "abc<0>ABC</0>123"
+"\p{javaValidCodePoint}+"          "<0>\u0000abc\ud800 unpaired \udfff |\U0010ffff</0>"
+"\p{javaWhitespace}+"              "abc\u00a0\u2007\u202f<0> \u0009\u001c\u001f\u2028</0>42"
+"\p{all}+"                         "<0>123\u0000\U0010ffff</0>"
+"\P{all}+"                         "123\u0000\U0010ffff"
+
+# [:word:] is implemented directly by regexp.  Not a java compat property, but PCRE and others.
+
+"[:word:]+"                        ".??$<0>abc123ΓΔΕΖΗ_</0>%%%"
+"\P{WORD}+"                        "<0>.??$</0>abc123ΓΔΕΖΗ_%%%"
+
+#
+#  Errors on unrecognized ASCII letter escape sequences.
+#
+"[abc\Y]+"                         "<0>abcY</0>"
+"[abc\Y]+"                     eE  "<0>abcY</0>"
+
+"(?:a|b|c|\Y)+"                    "<0>abcY</0>"
+"(?:a|b|c|\Y)+"                eE  "<0>abcY</0>"
+
+"\Q\Y\E"                       e   "<0>\\Y</0>"
+
+#
+# Reported problem
+#
+"[a-\w]"                       E  "x"
+
+#
+# Bug 4045
+#
+"A*"                              "<0>AAAA</0>"
+"A*"                           2  "AAAA<0></0>"
+"A*"                           3  "AAAA"
+"A*"                           4  "AAAA"
+"A*"                           5  "AAAA"
+"A*"                           6  "AAAA"
+"A*"                              "<0></0>"
+"A*"                           2  ""
+"A*"                           3  ""
+"A*"                           4  ""
+"A*"                           5  ""
+
+#
+# Bug 4046
+#
+"(?m)^"                           "<0></0>AA\u000dBB\u000dCC\u000d"
+"(?m)^"                        2  "AA\u000d<0></0>BB\u000dCC\u000d"
+"(?m)^"                        3  "AA\u000dBB\u000d<0></0>CC\u000d"
+"(?m)^"                        4  "AA\u000dBB\u000dCC\u000d"
+"(?m)^"                        5  "AA\u000dBB\u000dCC\u000d"
+"(?m)^"                        6  "AA\u000dBB\u000dCC\u000d"
+
+"(?m)^"                           "<0></0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+"(?m)^"                        2  "AA\u000d\u000a<0></0>BB\u000d\u000aCC\u000d\u000a"
+"(?m)^"                        3  "AA\u000d\u000aBB\u000d\u000a<0></0>CC\u000d\u000a"
+"(?m)^"                        4  "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+
+#
+# Bug 4059
+#
+"\w+"				  "<0>イチロー</0>"
+"\b....\b."                       "<0>イチロー?</0>"
+
+
+#
+# Bug 4058    ICU Unicode Set patterns have an odd feature -
+#             A $ as the last character before the close bracket means match
+#             a \uffff, which means off the end of the string in transliterators.
+#             Didn't make sense for regular expressions, and is now fixed.
+#
+"[\$](P|C|D);"                    "<0>$<1>P</1>;</0>"
+"[$](P|C|D);"                     "<0>$<1>P</1>;</0>"
+"[$$](P|C|D);"                    "<0>$<1>P</1>;</0>"
+
+#
+# bug 4888    Flag settings lost in some cases.
+#
+"((a){2})|(#)"              is    "no"
+"((a){2})|(#)"              is    "<0><1>a<2>a</2></1></0>#"
+"((a){2})|(#)"              is    "a<0><3>#</3></0>"
+
+"((a|b){2})|c"              is    "<0>c</0>"
+"((a|b){2})|c"              is    "<0>C</0>"
+"((a|b){2})|c"              s     "C"
+
+#
+# bug 5617  ZWJ \u200d shouldn't cause word boundaries
+#
+".+?\b"                           "<0> </0>\u0935\u0915\u094D\u200D\u0924\u0947 "
+".+?\b"                       2   " <0>\u0935\u0915\u094D\u200D\u0924\u0947</0> "
+".+?\b"                       3   " \u0935\u0915\u094D\u200D\u0924\u0947 "
+
+#
+# bug 5386  "^.*$" should match empty input
+#
+"^.*$"                            "<0></0>"
+"^.*$"                     m      "<0></0>"
+"^.*$"                            "<0></0>\n"
+"(?s)^.*$"                        "<0>\n</0>"
+
+#
+# bug 5386  Empty pattern and empty input should match.
+#
+""                                "<0></0>abc"
+""                                "<0></0>"
+
+#
+# bug 5386   Range upper and lower bounds can be equal
+#
+"[a-a]"                           "<0>a</0>"
+
+#
+# bug 5386  $* should not fail, should match empty string.
+#
+"$*"                              "<0></0>abc"
+
+#
+# bug 5386  \Q ... \E escaping problem
+#
+"[a-z\Q-$\E]+"                    "QE<0>abc-def$</0>."
+
+# More reported 5386 Java comaptibility failures
+#
+"[^]*abb]*"                       "<0>kkkk</0>"
+"\xa"                             "huh"              # Java would like to be warned.
+"^.*$"                            "<0></0>"
+
+#
+# bug 5386  Empty left alternation should produce a zero length match.
+#
+"|a"                              "<0></0>a"
+"$|ab"                            "<0>ab</0>"
+"$|ba"                            "ab<0></0>"
+
+#
+# bug 5386  Java compatibility for set expressions
+#
+"[a-z&&[cde]]+"                   "ab<0>cde</0>fg"
+
+#
+# bug 6019  matches() needs to backtrack and check for a longer match if the
+#                     first match(es) found don't match the entire input.
+#
+"a?|b"                            "<0></0>b"
+"a?|b"                         M  "<0>b</0>"
+"a?|.*?u|stuff|d"              M  "<0>stuff</0>"
+"a?|.*?(u)|stuff|d"            M  "<0>stuff<1>u</1></0>"
+"a+?"                             "<0>a</0>aaaaaaaaaaaa"
+"a+?"                          M  "<0>aaaaaaaaaaaaa</0>"
+
+#
+#   Bug 7724.  Expression to validate zip codes.
+#
+"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?"    "<0><1>94040</1><2>-3344</2></0>"
+"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?"    "94040-0000"
+"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?"    "00000-3344"
+
+#
+#    Bug 8666.  Assertion failure on match, bad operand to JMP_SAV_X opcode.
+#
+"((.??)+|A)*"                     "<0><1><2></2></1></0>AAAAABBBBBCCCCCDDDDEEEEE"
+
+#
+#    Bug 8826.  Incorrect results with case insensitive matches.
+#
+"AS(X)"                         i "aßx"
+"AS.*"                          i "aßx"           # Expansion of sharp s can't split between pattern terms.
+"ASßS"                          i "<0>aßß</0>"    # All one literal string, does match.
+"ASß{1}S"                       i "aßß"           # Pattern with terms, no match.
+"aßx"                           i "<0>assx</0>"
+"aßx"                           i "<0>ASSX</0>"
+"aßx"                           i "<0>aßx</0>"
+"ASS(.)"                        i "<0>aß<1>x</1></0>"
+
+# Case Insensitive, probe some corner cases.
+"ass+"                          i "aß"            # Second 's' in pattern is qualified, can't combine with first.
+"as+"                           i "aß"
+"aßs"                           i "as"            # Can't match half of a ß
+"aß+"                           i "<0>assssssss</0>s"
+"aß+"                           i "<0>assßSssSSS</0>s"
+"a(ß?)+"                        i "<0>assssssss<1></1></0>s"
+"a(ß?)+"                        i "<0>a<1></1></0>zzzzzzzzs"
+
+"\U00010400"                    i "<0>\U00010428</0>"   # case folded supplemental code point.
+
+"sstuff"                        i "<0>ßtuff</0>"    # exercise optimizations on what chars can start a match.
+"sstuff"                        i "s<0>ßtuff</0>"    # exercise optimizations on what chars can start a match.
+"ßtuff"                         i "s<0>sstuff</0>"
+"ßtuff"                         i "s<0>Sstuff</0>"
+
+"a(..)\1"                       i "<0>A<1>bc</1>BC</0>def"
+"(ß)\1"                         i "aa<0><1>ss</1>ß</0>zz"          # Case insensitive back reference
+"..(.)\1"                       i "<0>aa<1>ß</1>ss</0>"
+"ab(..)\1"                      i "xx<0>ab<1>ss</1>ß</0>ss"
+
+" (ss) ((\1.*)|(.*))"           i "<0> <1>ss</1> <2><4>sß</4></2></0>"       # The back reference 'ss' must not match in 'sß'
+
+# Bug 9057
+#   \u200c and \u200d should be word characters.
+#
+"\w+"                             "  <0>abc\u200cdef\u200dghi</0>   "
+"\w+"                           i "  <0>abc\u200cdef\u200dghi</0>   "
+"[\w]+"                           "  <0>abc\u200cdef\u200dghi</0>   "
+"[\w]+"                         i "  <0>abc\u200cdef\u200dghi</0>   "
+
+# Bug 9283
+#  uregex_open fails for look-behind assertion + case-insensitive
+
+"(ab)?(?<=ab)cd|ef"             i  "<0><1>ab</1>cd</0>"
+
+# Bug 9719  Loop breaking on (zero length match){3,}   (unlimited upper bound).
+#
+
+"(?:abc){1,}abc"                   "<0>abcabcabcabcabc</0>"
+"(?:2*){2,}?a2\z"                  "<0>2a2</0>"
+"(?:2*){2,}?a2\z"                  "2a3"
+"(?:x?+){3,}+yz"                   "w<0>yz</0>"
+"(2*){2,}?a2\\z"                   "2a3"
+"(2*){2,}?a2\\z"                   "<0>2<1></1>a2\\z</0>"
+"(2*){2,}?a2\z"                    "<0>2<1></1>a2</0>"
+
+
+# Bug 10024
+#   Incorrect (unbounded) longest match length with {1, 20} style quantifiers.
+#   Unbounded match is disallowed in look-behind expressions.
+#   Max match length is used to limit where to check for look-behind matches.
+
+"(?<=a{1,5})bc"                   "aaaa<0>bc</0>def"
+"(?<=(?:aa){3,20})bc"             "aaaaaa<0>bc</0>def"
+"(?<!abc {1,100}|def {1,100}|ghi {1,100})jkl"      "def jkl"
+"(?<!abc {1,100}|def {1,100}|ghi {1,100})jkl"      "rst <0>jkl</0>"
+"(?<=a{11})bc"                   "aaaaaaaaaaa<0>bc</0>"
+"(?<=a{11})bc"                   "aaaaaaaaaabc"
+"(?<=a{1,})bc"           E       "aaaa<0>bc</0>def"   # U_REGEX_LOOK_BEHIND_LIMIT error.
+"(?<=(?:){11})bc"                "<0>bc</0>"          # Empty (?:) expression.
+
+# Bug 10835
+#   Match Start Set not being correctly computed for case insensitive patterns.
+#   (Test here is to dump the compiled pattern & manually check the start set.)
+
+"(private|secret|confidential|classified|restricted)"  i   "hmm, <0><1>Classified</1></0> stuff"
+"(private|secret|confidential|classified|restricted)"      "hmm, Classified stuff"
+
+# Bug 10844
+
+"^([\w\d:]+)$"                  "<0><1>DiesIst1Beispiel:text</1></0>"
+"^([\w\d:]+)$"           i      "<0><1>DiesIst1Beispiel:text</1></0>"
+"^(\w+\d\w+:\w+)$"              "<0><1>DiesIst1Beispiel:text</1></0>"
+"^(\w+\d\w+:\w+)$"       i      "<0><1>DiesIst1Beispiel:text</1></0>"
+
+# Bug 11049
+#   Edge cases in find() when pattern match begins with set of code points
+#   and the match begins at the end of the string.
+
+"A|B|C"                         "hello <0>A</0>"
+"A|B|C"                         "hello \U00011234"
+"A|B|\U00012345"                "hello <0>\U00012345</0>"
+"A|B|\U00010000"                "hello \ud800"
+
+# Bug 11369
+#   Incorrect optimization of patterns with a zero length quantifier {0}
+
+"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)"   "AAAAABBBBBCCCCCDDDDEEEEE"
+"(|b)ab(c)"                     "<0><1></1>ab<2>c</2></0>"
+"(|b){0}a{3}(D*)"               "<0>aaa<2></2></0>"
+"(|b){0,1}a{3}(D*)"             "<0><1></1>aaa<2></2></0>"
+"((|b){0})a{3}(D*)"             "<0><1></1>aaa<3></3></0>"
+
+# Bug 11370
+#   Max match length computation of look-behind expression gives result that is too big to fit in the
+#   in the 24 bit operand portion of the compiled code. Expressions should fail to compile
+#   (Look-behind match length must be bounded. This case is treated as unbounded, an error.)
+
+"(?<!(0123456789a){10000000})x"         E  "no match"
+"(?<!\\ubeaf(\\ubeaf{11000}){11000})"   E  "no match"
+
+# Bug 11374 Bad integer overflow check in number conversion.
+#           4294967300 converts to 4 with 32 bit overflow.
+
+"x{4294967300}"                         E  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+"x{0,4294967300}"                       E  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+# Bug 11373
+#
+#    Overflow checking in max match length computation for loops.
+#    Value here is 10 * 100000 * 3000 = 3E9, overflowing a 32 bit signed value.
+#    Before fixing, this case gave an assertion failure.
+
+"(?<=((0123456789){100000}){3000})abc"  E  "abc"
+
+# Bug 11507  Capture of an unpaired surrogate shouldn't allow a back reference to
+#            match half of a surrogate pair, but only another unpaired surrogate.
+#
+"pre(.)post\1"                  "pre\ud800post\ud800\udc00"
+"pre(.)post\1"                  "<0>pre<1>\ud800</1>post\ud800</0> fin"
+"pre(.)post\1"          i       "pre\ud800post\ud800\udc00"         # case insensiteve backrefs take a different code path
+"pre(.)post\1"          i       "<0>pre<1>\ud800</1>post\ud800</0> fin"
+
+# Bug 11554
+#
+#    Maximum match length computation was assuming UTF-16.
+#    Used in look-behind matches to constrain how far back to look.
+
+"(?<=a\x{100000})spam"          "***a\x{100000}<0>spam</0>**"
+"(?<=aą)spam"                   "**aą<0>spam</0>**"
+"(?<=ąabc)spam"                 "**ąabc<0>spam</0>**"
+
+"(?<=a\x{100000})spam"          "***a\x{100001}spam**"
+"(?<=aą)spam"                   "**bąspam**"
+"(?<=ąabc)spam"                 "**ąabxspam**"
+
+# with negative look-behind
+
+"(?<!a\x{100000})spam"          "***a\x{100000}spam**"
+"(?<!aą)spam"                   "**aąspam**"
+"(?<!ąabc)spam"                 "**ąabcspam**"
+
+"(?<!a\x{100000})spam"          "***a\x{100001}<0>spam</0>**"
+"(?<!aą)spam"                   "**bą<0>spam</0>**"
+"(?<!ąabc)spam"                 "**ąabx<0>spam</0>**"
+
+# Bug #12930
+#
+#   Minimum Match Length computation, int32_t overflow on an empty set in the pattern.
+#   The empty set, with no match possible, has a min match length of INT32_MAX.
+#   Was incremented subsequently. Caused assertion failure on pattern compile.
+
+"[^\u0000-\U0010ffff]bc?"       "bc no match"
+"[^\u0000-\U0010ffff]?bc?"      "<0>bc</0> has a match"
+
+# Bug #12160  Hit End behavior after find fails to find.
+#             To match Java, should be true if find fails to find.
+#
+"abc"                  Z        "<0>abc</0> abc abc xyz"
+"abc"                  Z2       "abc <0>abc</0> abc xyz"
+"abc"                  Z3       "abc abc <0>abc</0> xyz"
+"abc"                  z4       "abc abc abc xyz"
+
+# Bug #13844  Verify that non-standard Java property names are recognized.
+"[\p{IsAlphabetic}]"            " <0>A</0>"
+"[\P{IsAlphabetic}]"            "A<0> </0>"
+"[\p{IsIdeographic}]"           "A<0>〆</0>"
+"[\P{IsIdeographic}]"           "〆<0>A</0>"
+"[\p{IsLetter}]"                " <0>A</0>"
+"[\P{IsLetter}]"                "A<0> </0>"
+"[\p{Letter}]"                  " <0>A</0>"
+"[\p{IsLowercase}]"             "A<0>a</0>"
+"[\P{IsLowercase}]"             "a<0>A</0>"
+"[\p{IsUppercase}]"             "a<0>A</0>"
+"[\P{IsUppercase}]"             "A<0>a</0>"
+"[\p{IsTitlecase}]"             "D<0>ǲ</0>"
+"[\P{IsTitlecase}]"             "ǲ<0>D</0>"
+"[\p{IsPunctuation}]"           " <0>&</0>"
+"[\P{IsPunctuation}]"           "&<0> </0>"
+"[\p{IsControl}]"               " <0>\x{82}</0>"
+"[\P{IsControl}]"               "\x{82}<0> </0>"
+"[\p{IsWhite_Space}]"           "x<0> </0>"
+"[\P{IsWhite_Space}]"           " <0>x</0>"
+"[\p{IsDigit}]"                 " <0>4</0>"
+"[\P{IsDigit}]"                 "4<0> </0>"
+"[\p{IsHex_Digit}]"             " <0>F</0>"
+"[\P{IsHex_Digit}]"             "F<0> </0>"
+"[\p{IsJoin_Control}]"          " <0>\x{200d}</0>"
+"[\P{IsJoin_Control}]"          "\x{200d}<0> </0>"
+"[\p{IsNoncharacter_Code_Point}]"     "A<0>\x{5fffe}</0>"
+"[\p{IsAssigned}]"              "\x{10ffff}<0>a</0>"
+"[\P{IsAssigned}]"              "a<0>\x{10ffff}</0>"
+
+"[\p{InBasic Latin}]"           "〆<0>A</0>"
+"[\p{InBasicLatin}]"            "〆<0>A</0>"
+"[\p{InBasic-Latin}]"           "〆<0>A</0>"    # ICU accepts '-'; Java does not.
+"[\p{InBasic_Latin}]"           "〆<0>A</0>"
+"[\p{Inbasiclatin}]"            "〆<0>A</0>"
+"[\p{inbasiclatin}]"       E    "〆<0>A</0>"    # "In" must be cased as shown. Property name part is case insensitive.
+"[\p{InCombining_Marks_for_Symbols}]"    "a<0>\x{20DD}</0>"    # COMBINING ENCLOSING CIRCLE
+
+"[\p{all}]*"                    "<0>\x{00}abc\x{10ffff}</0>"
+"[\p{javaBadProperty}]"    E    "whatever"
+"[\p{IsBadProperty}]"      E    "whatever"
+"[\p{InBadBlock}]"         E    "whatever"
+"[\p{In}]"                 E    "whatever"
+"[\p{Is}]"                 E    "whatever"
+"[\p{java}]"                    "x<0>ꦉ</0>"      # Note: "java" is a valid script code.
+
+"[\p{javaLowerCase}]+"             "A<0>a</0>"
+"[\p{javaLowerCase}]+"     i       "<0>Aa</0>"
+"[\P{javaLowerCase}]+"             "<0>A</0>a"
+"[\P{javaLowerCase}]+"     i       "Aa"          # No Match because case fold of the set happens first, then negation.
+                                                 #  JDK is not case insensitive w named properties, even though
+                                                 #  the insensitive match flag is set. A JDK bug?
+
+"[a-z]+"                   i       "<0>Aa</0>"   # Matches JDK behavior.
+"[^a-z]+"                  i       "Aa"          # (no match) which is JDK behavior. Case fold first, then negation.
+
+# Bug 20385.  Assertion failure while compiling a negative look-behind expression consisting of a set with
+#             no contents. Meaning the [set] can never match. There is no syntax to directly express
+#             an empty set, so generate it by negating (^) a set of all code points.
+#             Also check empty sets in other contexts.
+
+"(?<![^[^a]a])"                    "<0></0>abc"
+
+"(?<![^\u0000-\U0010ffff])"        "<0></0>abc"
+"x(?<![^\u0000-\U0010ffff])"       "<0>x</0>abc"
+"x(?<![^\u0000-\U0010ffff]{1,5})"  "<0>x</0>abc"
+"x(?<![^\u0000-\U0010ffff]{0,5})"  "xabc"
+
+"(?<=[^\u0000-\U0010ffff])"        "abc"
+"(x?<=[^\u0000-\U0010ffff])"       "abc"
+"x(?<=[^\u0000-\U0010ffff]{1,5})"  "xabc"
+"x(?<=[^\u0000-\U0010ffff]{0,5})"  "<0>x</0>abc"
+
+"[^\u0000-\U0010ffff]"             "a"
+"[^[^\u0000-\U0010ffff]]"          "<0>a</0>"
+
+"This is a string with (?:one |two |three )endings"   "<0>This is a string with two endings</0>"
+
+# Bug ICU-20544. Similar to 20385, above. Assertion failure with a negative look-behind assertion containing
+#                a set with no contents. Look-behind pattern includes more than just the empty set.
+
+"(?<![ⰿ&&m]c)"                     "<0></0>abc"   # note: first 'ⰿ' is \u2c3f, hence empty set.
+"(?<![^\u0000-\U0010ffff]c)"       "<0></0>abc"
+"(?<=[^[^]]†)"                 "abc"          # Problem also exists w positive look-behind
+
+# Bug ICU-20391. Crash in computation of minimum match length with nested look-around patterns.
+#
+"(?<=(?<=((?=)){0}+)"         E    "aaa"
+"(?<=(?<=((?=)){0}+))"             "<0></0>"
+"(?<=c(?<=b((?=a)){1}+))"          "aaa"
+"abc(?=de(?=f))...g"               "<0>abcdefg</0>"
+"abc(?=de(?=f))...g"               "abcdxfg"
+
+# Bug ICU-20618 Assertion failure with nested look-around expressions.
+#
+"(?<=(?<=b?(?=a)))"               "hello, world."
+
+# Bug ICU-20939
+# Incorrect word \b boundaries w UTF-8 input and non-ASCII text
+#
+"(?w)\b"                     v2     "äää<0></0> äää"
+
+# Bug ICU-21492 Assertion failure with nested look-around expressions.
+#
+"(?<=(?:(?<=(?:(?<=(?:(?<=)){2})){3})){4}"   E  "<0></0>"  # orig failure from bug report, w mismatched parens.
+"(?:(?<=(?:(?<=)){2}))"            "<0></0>"               # Simplified case, with a valid pattern.
+
+#  Random debugging, Temporary
+#
+
+#
+#  Regexps from http://www.regexlib.com
+#
+"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$"   G "<0>G1 1AA</0>"
+"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$"   G "<0>EH10 2QQ</0>"
+"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$"   G "<0>SW1 1ZZ</0>"
+"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$"     "G111 1AA"
+"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$"     "X10 WW"
+"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$"     "DDD 5WW"
+#"^[\w\-]+(?:\.[\w\-]+)*@(?:[\w\-]+\.)+[a-zA-Z]{2,7}$"   dG "<0>joe.tillis@unit.army.mil</0>"   # TODO:  \w in pattern
+#"^[\w-]+(?:\.[\w-]+)*@(?:[\w-]+\.)+[a-zA-Z]{2,7}$"   G "<0>jack_rabbit@slims.com</0>"   # TODO:  \w in pattern
+#"^[\w-]+(?:\.[\w-]+)*@(?:[\w-]+\.)+[a-zA-Z]{2,7}$"   G "<0>foo99@foo.co.uk</0>"   # TODO:  \w in pattern
+#"^[\w-]+(?:\.[\w-]+)*@(?:[\w-]+\.)+[a-zA-Z]{2,7}$"     "find_the_mistake.@foo.org"   # TODO:  \w in pattern
+#"^[\w-]+(?:\.[\w-]+)*@(?:[\w-]+\.)+[a-zA-Z]{2,7}$"     ".prefix.@some.net"
+"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$"   G "<0>asmith@mactec.com</0>"
+"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$"   G "<0>foo12@foo.edu</0>"
+"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$"   G "<0>bob.smith@foo.tv</0>"
+"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$"     "joe"
+"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$"     "@foo.com"
+"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$"     "a@a"
+"^\d{1,2}\/\d{1,2}\/\d{4}$"   G "<0>4/1/2001</0>"
+"^\d{1,2}\/\d{1,2}\/\d{4}$"   G "<0>12/12/2001</0>"
+"^\d{1,2}\/\d{1,2}\/\d{4}$"   G "<0>55/5/3434</0>"
+"^\d{1,2}\/\d{1,2}\/\d{4}$"     "1/1/01"
+"^\d{1,2}\/\d{1,2}\/\d{4}$"     "12 Jan 01"
+"^\d{1,2}\/\d{1,2}\/\d{4}$"     "1-1-2001"
+"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"   G "<0>01.1.02</0>"
+"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"   G "<0>11-30-2001</0>"
+"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"   G "<0>2/29/2000</0>"
+"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"     "02/29/01"
+"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"     "13/01/2002"
+"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"     "11/00/02"
+"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$"   G "<0>127.0.0.1</0>"
+"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$"   G "<0>255.255.255.0</0>"
+"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$"   G "<0>192.168.0.1</0>"
+"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$"     "1200.5.4.3"
+"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$"     "abc.def.ghi.jkl"
+"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$"     "255.foo.bar.1"
+"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$"   G "<0>COM1</0>"
+"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$"   G "<0>AUX</0>"
+"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$"   G "<0>LPT1</0>"
+"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$"     "image.jpg"
+"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$"     "index.html"
+"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$"     "readme.txt"
+"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"   G "<0>29/02/1972</0>"
+"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"   G "<0>5-9-98</0>"
+"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"   G "<0>10-11-2002</0>"
+"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"     "29/02/2003"
+"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"     "12/13/2002"
+"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$"     "1-1-1500"
+"^(user=([a-z0-9]+,)*(([a-z0-9]+){1});)?(group=([a-z0-9]+,)*(([a-z0-9]+){1});)?(level=[0-9]+;)?$"   G "<0>user=foo,bar,quux;group=manager,admin;level=100;</0>"
+"^(user=([a-z0-9]+,)*(([a-z0-9]+){1});)?(group=([a-z0-9]+,)*(([a-z0-9]+){1});)?(level=[0-9]+;)?$"   G "<0>group=nobody;level=24;</0>"
+"^(user=([a-z0-9]+,)*(([a-z0-9]+){1});)?(group=([a-z0-9]+,)*(([a-z0-9]+){1});)?(level=[0-9]+;)?$"     "user=foo"
+"^(user=([a-z0-9]+,)*(([a-z0-9]+){1});)?(group=([a-z0-9]+,)*(([a-z0-9]+){1});)?(level=[0-9]+;)?$"     "blahh"
+"^(\(?\+?[0-9]*\)?)?[0-9_\- \(\)]*$"   G "<0>(+44)(0)20-12341234</0>"
+"^(\(?\+?[0-9]*\)?)?[0-9_\- \(\)]*$"   G "<0>02012341234</0>"
+"^(\(?\+?[0-9]*\)?)?[0-9_\- \(\)]*$"   G "<0>+44 (0) 1234-1234</0>"
+"^(\(?\+?[0-9]*\)?)?[0-9_\- \(\)]*$"     "(44+)020-12341234"
+"^(\(?\+?[0-9]*\)?)?[0-9_\- \(\)]*$"     "12341234(+020)"
+"\b(\w+)\s+\1\b"   G "<0>Tell the the preacher</0>"
+"\b(\w+)\s+\1\b"   G "<0>some some</0>"
+"\b(\w+)\s+\1\b"   G "<0>hubba hubba</0>"
+"\b(\w+)\s+\1\b"     "once an annual report"
+"\b(\w+)\s+\1\b"     "mandate dated submissions"
+"\b(\w+)\s+\1\b"     "Hubba hubba"
+"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)"   G "<0>+31235256677</0>"
+"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)"   G "<0>+31(0)235256677</0>"
+"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)"   G "<0>023-5256677</0>"
+"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)"     "+3123525667788999"
+"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)"     "3123525667788"
+"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)"     "232-2566778"
+"^[-+]?\d*\.?\d*$"   G "<0>123</0>"
+"^[-+]?\d*\.?\d*$"   G "<0>+3.14159</0>"
+"^[-+]?\d*\.?\d*$"   G "<0>-3.14159</0>"
+"^[-+]?\d*\.?\d*$"     "abc"
+"^[-+]?\d*\.?\d*$"     "3.4.5"
+"^[-+]?\d*\.?\d*$"     "$99.95"
+"^\$?([1-9]{1}[0-9]{0,2}(\,[0-9]{3})*(\.[0-9]{0,2})?|[1-9]{1}[0-9]{0,}(\.[0-9]{0,2})?|0(\.[0-9]{0,2})?|(\.[0-9]{1,2})?)$"   G "<0>$1,234.50</0>"
+"^\$?([1-9]{1}[0-9]{0,2}(\,[0-9]{3})*(\.[0-9]{0,2})?|[1-9]{1}[0-9]{0,}(\.[0-9]{0,2})?|0(\.[0-9]{0,2})?|(\.[0-9]{1,2})?)$"   G "<0>$0.70</0>"
+"^\$?([1-9]{1}[0-9]{0,2}(\,[0-9]{3})*(\.[0-9]{0,2})?|[1-9]{1}[0-9]{0,}(\.[0-9]{0,2})?|0(\.[0-9]{0,2})?|(\.[0-9]{1,2})?)$"   G "<0>.7</0>"
+"^\$?([1-9]{1}[0-9]{0,2}(\,[0-9]{3})*(\.[0-9]{0,2})?|[1-9]{1}[0-9]{0,}(\.[0-9]{0,2})?|0(\.[0-9]{0,2})?|(\.[0-9]{1,2})?)$"     "$0,123.50"
+"^\$?([1-9]{1}[0-9]{0,2}(\,[0-9]{3})*(\.[0-9]{0,2})?|[1-9]{1}[0-9]{0,}(\.[0-9]{0,2})?|0(\.[0-9]{0,2})?|(\.[0-9]{1,2})?)$"     "$00.5"
+"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$"   G "<0>AB123456D</0>"
+"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$"   G "<0>AB123456F</0>"
+"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$"   G "<0>AB123456M</0>"
+"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$"     "AB123456E"
+"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$"     "ab123456d"
+#"(http|ftp|https):\/\/[\w]+(.[\w]+)([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?"   G "<0>http://regxlib.com/Default.aspx</0>"     # TODO:  \w in pattern
+#"(http|ftp|https):\/\/[\w]+(.[\w]+)([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?"   G "<0>http://electronics.cnet.com/electronics/0-6342366-8-8994967-1.html</0>"     # TODO:  \w in pattern
+#"(http|ftp|https):\/\/[\w]+(.[\w]+)([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?"     "www.yahoo.com"     # TODO:  \w in pattern
+"^[0-9]{4}\s{0,1}[a-zA-Z]{2}$"   G "<0>2034AK</0>"
+"^[0-9]{4}\s{0,1}[a-zA-Z]{2}$"   G "<0>2034 AK</0>"
+"^[0-9]{4}\s{0,1}[a-zA-Z]{2}$"   G "<0>2034 ak</0>"
+"^[0-9]{4}\s{0,1}[a-zA-Z]{2}$"     "2034     AK"
+"^[0-9]{4}\s{0,1}[a-zA-Z]{2}$"     "321321 AKSSAA"
+"((\d{2})|(\d))\/((\d{2})|(\d))\/((\d{4})|(\d{2}))"   G "<0>4/5/91</0>"
+"((\d{2})|(\d))\/((\d{2})|(\d))\/((\d{4})|(\d{2}))"   G "<0>04/5/1991</0>"
+"((\d{2})|(\d))\/((\d{2})|(\d))\/((\d{4})|(\d{2}))"   G "<0>4/05/89</0>"
+"((\d{2})|(\d))\/((\d{2})|(\d))\/((\d{4})|(\d{2}))"     "4/5/1"
+#"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}"   G "<0>01/01/2001 </0>"   #TODO - \s in pattern.
+"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}"   G "<0>01-01-2001:</0>"
+"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}"   G "<0>(1-1-01)</0>"
+"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}"     "13/1/2001"
+"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}"     "1-32-2001"
+"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}"     "1-1-1801"
+"^\d{3}\s?\d{3}$"   G "<0>400 099</0>"
+"^\d{3}\s?\d{3}$"   G "<0>400099</0>"
+"^\d{3}\s?\d{3}$"   G "<0>400050</0>"
+"^\d{3}\s?\d{3}$"     "2345678"
+"^\d{3}\s?\d{3}$"     "12345"
+"^\d{3}\s?\d{3}$"     "asdf"
+"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$"   G "<0>(111) 222-3333</0>"
+"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$"   G "<0>1112223333</0>"
+"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$"   G "<0>111-222-3333</0>"
+"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$"     "11122223333"
+"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$"     "11112223333"
+"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$"     "11122233333"
+"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$"   G "<0>#00ccff</0>"
+"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$"   G "<0>#039</0>"
+"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$"   G "<0>ffffcc</0>"
+"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$"     "blue"
+"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$"     "0x000000"
+"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$"     "#ff000"
+"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$"   G "<0>01:23:45:67:89:ab</0>"
+"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$"   G "<0>01:23:45:67:89:AB</0>"
+"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$"   G "<0>fE:dC:bA:98:76:54</0>"
+"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$"     "01:23:45:67:89:ab:cd"
+"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$"     "01:23:45:67:89:Az"
+"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$"     "01:23:45:56:"
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"   G "<0>http://www.blah.com/~joe</0>"
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"   G "<0>ftp://ftp.blah.co.uk:2828/blah%20blah.gif</0>"
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"   G "<0>https://blah.gov/blah-blah.as</0>"
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"     "www.blah.com"
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"     "http://www.blah.com/I have spaces!"
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"     "ftp://blah_underscore/[nope]"
+"^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"   G "<0>12/01/2002</0>"
+"^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"   G "<0>12/01/2002 12:32:10</0>"
+"^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"     "32/12/2002"
+"^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"     "12/13/2001"
+"^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"     "12/02/06"
+"^[0-9](\.[0-9]+)?$"   G "<0>1.2345</0>"
+"^[0-9](\.[0-9]+)?$"   G "<0>0.00001</0>"
+"^[0-9](\.[0-9]+)?$"   G "<0>7</0>"
+"^[0-9](\.[0-9]+)?$"     "12.2"
+"^[0-9](\.[0-9]+)?$"     "1.10.1"
+"^[0-9](\.[0-9]+)?$"     "15.98"
+"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$"   G "<0>III</0>"
+"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$"   G "<0>xiv</0>"
+"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$"   G "<0>MCMLXLIX</0>"
+"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$"     "iiV"
+"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$"     "MCCM"
+"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$"     "XXXX"
+"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$"   G "<0>123</0>"
+"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$"   G "<0>-123.35</0>"
+"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$"   G "<0>-123.35e-2</0>"
+"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$"     "abc"
+"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$"     "123.32e"
+"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$"     "123.32.3"
+"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$"   G "<0>T.F. Johnson</0>"
+"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$"   G "<0>John O'Neil</0>"
+"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$"   G "<0>Mary-Kate Johnson</0>"
+"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$"     "sam_johnson"
+"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$"     "Joe--Bob Jones"
+"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$"     "dfjsd0rd"
+"^(20|21|22|23|[0-1]\d)[0-5]\d$"   G "<0>1200</0>"
+"^(20|21|22|23|[0-1]\d)[0-5]\d$"   G "<0>1645</0>"
+"^(20|21|22|23|[0-1]\d)[0-5]\d$"   G "<0>2359</0>"
+"^(20|21|22|23|[0-1]\d)[0-5]\d$"     "2400"
+"^(20|21|22|23|[0-1]\d)[0-5]\d$"     "asbc"
+"^(20|21|22|23|[0-1]\d)[0-5]\d$"     "12:45"
+/<[^>]*\n?.*=("|')?(.*\.jpg)("|')?.*\n?[^<]*>/   G '<0><td background="../img/img.jpg" ></0>'
+/<[^>]*\n?.*=("|')?(.*\.jpg)("|')?.*\n?[^<]*>/   G "<0><img src=img.jpg ></0>"
+/<[^>]*\n?.*=("|')?(.*\.jpg)("|')?.*\n?[^<]*>/   G "<0><img src='img.jpg'></0>"
+/<[^>]*\n?.*=("|')?(.*\.jpg)("|')?.*\n?[^<]*>/     "= img.jpg"
+/<[^>]*\n?.*=("|')?(.*\.jpg)("|')?.*\n?[^<]*>/     "img.jpg"
+"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$"   G "<0>78754</0>"
+"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$"   G "<0>78754-1234</0>"
+"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$"   G "<0>G3H 6A3</0>"
+"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$"     "78754-12aA"
+"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$"     "7875A"
+"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$"     "g3h6a3"
+#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$"   G "<0>bob@somewhere.com</0>"    # TODO:  \w in pattern
+#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$"   G "<0>bob.jones@[1.1.1.1]</0   # TODO:  \w in pattern>"
+#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$"   G "<0>bob@a.b.c.d.info</0>"   # TODO:  \w in pattern
+#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$"     "bob@com"   # TODO:  \w in pattern
+#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$"     "bob.jones@some.where"   # TODO:  \w in pattern
+#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$"     "bob@1.1.1.123"   # TODO:  \w in pattern
+#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$"   G "<0><ab@cd.ef></0>"   # TODO:  \w in pattern
+#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$"   G "<0>bob A. jones <ab@cd.ef></0>"   # TODO:  \w in pattern
+#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$"   G "<0>bob A. jones <ab@[1.1.1.111]></0>"   # TODO:  \w in pattern
+#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$"     "ab@cd.ef"   # TODO:  \w in pattern
+#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$"     ""bob A. jones <ab@cd.ef>"   # TODO:  \w in pattern
+#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$"     "bob A. jones <ab@1.1.1.111>"   # TODO:  \w in pattern
+"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$"   G "<0>SW112LE</0>"
+"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$"   G "<0>SW11 2LE</0>"
+"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$"   G "<0>CR05LE</0>"
+"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$"     "12CR0LE"
+"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$"     "12CR 0LE"
+"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$"     "SWLE05"
+"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])"   G "<0>2099-12-31T23:59:59</0>"
+"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])"   G "<0>2002/02/09 16:30:00</0>"
+"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])"   G "<0>2000-01-01T00:00:00</0>"
+"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])"     "2000-13-31T00:00:00"
+"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])"     "2002/02/33 24:00:00"
+"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])"     "2000-01-01 60:00:00"
+"^((?:4\d{3})|(?:5[1-5]\d{2})|(?:6011)|(?:3[68]\d{2})|(?:30[012345]\d))[ -]?(\d{4})[ -]?(\d{4})[ -]?(\d{4}|3[4,7]\d{13})$"   G "<0>6011567812345678</0>"
+"^((?:4\d{3})|(?:5[1-5]\d{2})|(?:6011)|(?:3[68]\d{2})|(?:30[012345]\d))[ -]?(\d{4})[ -]?(\d{4})[ -]?(\d{4}|3[4,7]\d{13})$"   G "<0>6011 5678 1234 5678</0>"
+"^((?:4\d{3})|(?:5[1-5]\d{2})|(?:6011)|(?:3[68]\d{2})|(?:30[012345]\d))[ -]?(\d{4})[ -]?(\d{4})[ -]?(\d{4}|3[4,7]\d{13})$"   G "<0>6011-5678-1234-5678</0>"
+"^((?:4\d{3})|(?:5[1-5]\d{2})|(?:6011)|(?:3[68]\d{2})|(?:30[012345]\d))[ -]?(\d{4})[ -]?(\d{4})[ -]?(\d{4}|3[4,7]\d{13})$"     "1234567890123456"
+"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$"   G "<0>01/01/2001</0>"
+"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$"   G "<0>02/29/2002</0>"
+"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$"   G "<0>12/31/2002</0>"
+"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$"     "1/1/02"
+"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$"     "02/30/2002"
+"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$"     "1/25/2002"
+#"^(?=[^\&])(?:(?<scheme>[^:/?#]+):)?(?://(?<authority>[^/?#]*))?(?<path>[^?#]*)(?:\?(?<query>[^#]*))?(?:#(?<fragment>.*))?"   G "<0>http://regexlib.com/REDetails.aspx?regexp_id=x#Details</0>"  # out of context, can't work stand-alone
+#"^(?=[^\&])(?:(?<scheme>[^:/?#]+):)?(?://(?<authority>[^/?#]*))?(?<path>[^?#]*)(?:\?(?<query>[^#]*))?(?:#(?<fragment>.*))?"     "&"           # out of context, can't work stand-alone
+"^[-+]?\d+(\.\d+)?$"   G "<0>123</0>"
+"^[-+]?\d+(\.\d+)?$"   G "<0>-123.45</0>"
+"^[-+]?\d+(\.\d+)?$"   G "<0>+123.56</0>"
+"^[-+]?\d+(\.\d+)?$"     "123x"
+"^[-+]?\d+(\.\d+)?$"     ".123"
+"^[-+]?\d+(\.\d+)?$"     "-123."
+"^(\d{4}[- ]){3}\d{4}|\d{16}$"   G "<0>1234-1234-1234-1234</0>"
+"^(\d{4}[- ]){3}\d{4}|\d{16}$"   G "<0>1234 1234 1234 1234</0>"
+"^(\d{4}[- ]){3}\d{4}|\d{16}$"   G "<0>1234123412341234</0>"
+"^(\d{4}[- ]){3}\d{4}|\d{16}$"     "Visa"
+"^(\d{4}[- ]){3}\d{4}|\d{16}$"     "1234"
+"^(\d{4}[- ]){3}\d{4}|\d{16}$"     "123-1234-12345"
+"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$"   G "<0>6011-1111-1111-1111</0>"
+"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$"   G "<0>5423-1111-1111-1111</0>"
+"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$"   G "<0>341111111111111</0>"
+"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$"     "4111-111-111-111"
+"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$"     "3411-1111-1111-111"
+"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$"     "Visa"
+"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$"   G "<0>4D28C5AD-6482-41CD-B84E-4573F384BB5C</0>"
+"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$"   G "<0>B1E1282C-A35C-4D5A-BF8B-7A3A51D9E388</0>"
+"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$"   G "91036A4A-A0F4-43F0-8CD"
+"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$"     "{B1E1282C-A35C-4D3A-BF8B-7A3A51D9E388}"
+"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$"     "AAAAAAAAAAAAAAAAA"
+"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$"     "B;E1282C-A35C-4D3A-BF8B-7A3A51D9E38"
+"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))"   G "<0>4111-1234-1234-1234</0>"
+"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))"   G "<0>6011123412341234</0>"
+"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))"   G "<0>3711-123456-12345</0>"
+"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))"     "1234567890123456"
+"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))"     "4111-123-1234-1234"
+"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))"     "412-1234-1234-1234"
+#'\[link="(?<link>((.|\n)*?))"\](?<text>((.|\n)*?))\[\/link\]'   G '<0>[link="http://www.yahoo.com"]Yahoo[/link]</0>'   #named capture
+#'\[link="(?<link>((.|\n)*?))"\](?<text>((.|\n)*?))\[\/link\]'     "[link]http://www.yahoo.com[/link]"                  #named capture
+#'\[link="(?<link>((.|\n)*?))"\](?<text>((.|\n)*?))\[\/link\]'     "[link=http://www.yahoo.com]Yahoo[/link]"            #named capture
+"^[a-zA-Z0-9]+$"   G "<0>10a</0>"
+"^[a-zA-Z0-9]+$"   G "<0>ABC</0>"
+"^[a-zA-Z0-9]+$"   G "<0>A3fg</0>"
+"^[a-zA-Z0-9]+$"     "45.3"
+"^[a-zA-Z0-9]+$"     "this or that"
+"^[a-zA-Z0-9]+$"     "$23"
+"((\(\d{3}\) ?)|(\d{3}-))?\d{3}-\d{4}"   G "<0>(123) 456-7890</0>"
+"((\(\d{3}\) ?)|(\d{3}-))?\d{3}-\d{4}"   G "<0>123-456-7890</0>"
+"((\(\d{3}\) ?)|(\d{3}-))?\d{3}-\d{4}"     "1234567890"
+"^[a-zA-Z]\w{3,14}$"   G "<0>abcd</0>"
+"^[a-zA-Z]\w{3,14}$"   G "<0>aBc45DSD_sdf</0>"
+"^[a-zA-Z]\w{3,14}$"   G "<0>password</0>"
+"^[a-zA-Z]\w{3,14}$"     "afv"
+"^[a-zA-Z]\w{3,14}$"     "1234"
+"^[a-zA-Z]\w{3,14}$"     "reallylongpassword"
+"^[A-Z]{1,2}[1-9][0-9]?[A-Z]? [0-9][A-Z]{2,}|GIR 0AA$"   G "<0>G1 1AA </0>"
+"^[A-Z]{1,2}[1-9][0-9]?[A-Z]? [0-9][A-Z]{2,}|GIR 0AA$"   G "<0>GIR 0AA</0>"
+"^[A-Z]{1,2}[1-9][0-9]?[A-Z]? [0-9][A-Z]{2,}|GIR 0AA$"   G "<0>SW1 1ZZ</0>"
+"^[A-Z]{1,2}[1-9][0-9]?[A-Z]? [0-9][A-Z]{2,}|GIR 0AA$"     "BT01 3RT"
+"^[A-Z]{1,2}[1-9][0-9]?[A-Z]? [0-9][A-Z]{2,}|GIR 0AA$"     "G111 1AA"
+"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$"   G "<0>03-6106666</0>"
+"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$"   G "<0>036106666</0>"
+"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$"   G "<0>02-5523344</0>"
+"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$"     "00-6106666"
+"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$"     "03-0106666"
+"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$"     "02-55812346"
+"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$"   G "<0>050-346634</0>"
+"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$"   G "<0>058633633</0>"
+"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$"   G "<0>064-228226</0>"
+"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$"     "059-336622"
+"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$"     "064-022663"
+"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$"     "0545454545"
+"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}"   G "<0>AA11 1AA</0>"
+"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}"   G "<0>AA1A 1AA</0>"
+"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}"   G "<0>A11-1AA</0>"
+"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}"     "111 AAA"
+"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}"     "1AAA 1AA"
+"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}"     "A1AA 1AA"
+"@{2}((\S)+)@{2}"   G "<0>@@test@@</0>"
+"@{2}((\S)+)@{2}"   G "<0>@@name@@</0>"
+"@{2}((\S)+)@{2}"   G "<0>@@2342@@</0>"
+"@{2}((\S)+)@{2}"     "@test@"
+"@{2}((\S)+)@{2}"     "@@na me@@"
+"@{2}((\S)+)@{2}"     "@@ name@@"
+"([0-1][0-9]|2[0-3]):[0-5][0-9]"   G "<0>00:00</0>"
+"([0-1][0-9]|2[0-3]):[0-5][0-9]"   G "<0>13:59</0>"
+"([0-1][0-9]|2[0-3]):[0-5][0-9]"   G "<0>23:59</0>"
+"([0-1][0-9]|2[0-3]):[0-5][0-9]"     "24:00"
+"([0-1][0-9]|2[0-3]):[0-5][0-9]"     "23:60"
+"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$"   G "<0>23</0>"
+"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$"   G "<0>-17.e23</0>"
+"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$"   G "<0>+.23e+2</0>"
+"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$"     "+.e2"
+"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$"     "23.17.5"
+"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$"     "10e2.0"
+"^([1-zA-Z0-1@.\s ]{1,255})$"   G "<0>email@email.com</0>"
+"^([1-zA-Z0-1@.\s ]{1,255})$"   G "<0>My Name</0>"
+"^([1-zA-Z0-1@.\s ]{1,255})$"   G "<0>asdf12df</0>"
+"^([1-zA-Z0-1@.\s ]{1,255})$"     "‘,\*&$<>"
+"^([1-zA-Z0-1@.\s ]{1,255})$"     "1001' string"
+"^((0[1-9])|(1[0-2]))\/(\d{4})$"   G "<0>12/2002</0>"
+"^((0[1-9])|(1[0-2]))\/(\d{4})$"   G "<0>11/1900</0>"
+"^((0[1-9])|(1[0-2]))\/(\d{4})$"   G "<0>02/1977</0>"
+"^((0[1-9])|(1[0-2]))\/(\d{4})$"     "1/1977"
+"^((0[1-9])|(1[0-2]))\/(\d{4})$"     "00/000"
+"^((0[1-9])|(1[0-2]))\/(\d{4})$"     "15/2002"
+"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$"   G "<0>(0 34 56) 34 56 67</0>"
+"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$"   G "<0>(03 45) 5 67 67</0>"
+"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$"   G "<0>(0 45) 2 33 45-45</0>"
+"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$"     "(2345) 34 34"
+"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$"     "(0 56) 456 456"
+"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$"     "(3 45) 2 34-45678"
+"(?:\d|I{1,3})?\s?\w{2,}\.?\s*\d{1,}\:\d{1,}-?,?\d{0,2}(?:,\d{0,2}){0,2}"   G "<0>Genesis 3:3-4,6</0>"
+"(?:\d|I{1,3})?\s?\w{2,}\.?\s*\d{1,}\:\d{1,}-?,?\d{0,2}(?:,\d{0,2}){0,2}"   G "<0>II Sam 2:11,2</0>"
+"(?:\d|I{1,3})?\s?\w{2,}\.?\s*\d{1,}\:\d{1,}-?,?\d{0,2}(?:,\d{0,2}){0,2}"   G "<0>2 Tim 3:16</0>"
+"(?:\d|I{1,3})?\s?\w{2,}\.?\s*\d{1,}\:\d{1,}-?,?\d{0,2}(?:,\d{0,2}){0,2}"     "Genesis chap 3, verse 3"
+"(?:\d|I{1,3})?\s?\w{2,}\.?\s*\d{1,}\:\d{1,}-?,?\d{0,2}(?:,\d{0,2}){0,2}"     "2nd Samuel 2"
+"(\[[Ii][Mm][Gg]\])(\S+?)(\[\/[Ii][Mm][Gg]\])"   G "<0>[IMG]http://bleh.jpg[/IMG]</0>"
+"(\[[Ii][Mm][Gg]\])(\S+?)(\[\/[Ii][Mm][Gg]\])"   G "<0>[ImG]bleh[/imG]</0>"
+"(\[[Ii][Mm][Gg]\])(\S+?)(\[\/[Ii][Mm][Gg]\])"   G "<0>[img]ftp://login:pass@bleh.gif[/img]</0>"
+"(\[[Ii][Mm][Gg]\])(\S+?)(\[\/[Ii][Mm][Gg]\])"     '<img src="bleh.jpg">'
+"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$"   G "<0>10/03/1979</0>"
+"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$"   G "<0>1-1-02</0>"
+"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$"   G "<0>01.1.2003</0>"
+"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$"     "10/03/197"
+"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$"     "01-02-003"
+"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$"     "01 02 03"
+#"^(?(^00000(|-0000))|(\d{5}(|-\d{4})))$"   G "<0>12345</0>"         # No Conditionals?
+#"^(?(^00000(|-0000))|(\d{5}(|-\d{4})))$"   G "<0>12345-6789</0>"    # No Conditionals?
+#"^(?(^00000(|-0000))|(\d{5}(|-\d{4})))$"     "00000"                # No Conditionals?
+#"^(?(^00000(|-0000))|(\d{5}(|-\d{4})))$"     "00000-0000"           # No Conditionals?
+#"^(?(^00000(|-0000))|(\d{5}(|-\d{4})))$"     "a4650-465s"           # No Conditionals?
+"^((0?[1-9])|((1|2)[0-9])|30|31)$"   G "<0>01</0>"
+"^((0?[1-9])|((1|2)[0-9])|30|31)$"   G "<0>12</0>"
+"^((0?[1-9])|((1|2)[0-9])|30|31)$"   G "<0>31</0>"
+"^((0?[1-9])|((1|2)[0-9])|30|31)$"     "123"
+"^((0?[1-9])|((1|2)[0-9])|30|31)$"     "32"
+"^((0?[1-9])|((1|2)[0-9])|30|31)$"     "abc"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$"   G "<0>1.222.333.1234</0>"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$"   G "<0>1-223-123-1232</0>"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$"   G "<0>12223334444</0>"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$"     "1.1.123123.123"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$"     "12-1322-112-31"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$"     "11231321131"
+"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$"   G "<0>DN3 6GB</0>"
+"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$"   G "<0>SW42 4RG</0>"
+"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$"   G "<0>GIR 0AA</0>"
+"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$"     "SEW4 5TY"
+"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$"     "AA2C 4FG"
+"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$"     "AA2 4CV"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$"   G "<0>asD1</0>"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$"   G "<0>asDF1234</0>"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$"   G "<0>ASPgo123</0>"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$"     "asdf"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$"     "1234"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$"     "ASDF12345"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))"   G "<0>1.222.333.1234</0>"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))"   G "<0>1-223-123-1232</0>"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))"   G "<0>1-888-425-DELL</0>"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))"     "1.1.123123.123"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))"     "12-1322-112-31"
+"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))"     "1-800-CALL-DEL"
+"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$"   G "<0>09:00</0>"
+"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$"   G "<0>9:00</0>"
+"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$"   G "<0>11:35</0>"
+"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$"     "13:00"
+"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$"     "9.00"
+"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$"     "6:60"
+"^([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$"   G "<0>1</0>"
+"^([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$"   G "<0>108</0>"
+"^([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$"   G "<0>255</0>"
+"^([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$"     "01"
+"^([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$"     "256"
+"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$"   G "<0>01/01/2001</0>"
+"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$"   G "<0>1/01/2001</0>"
+"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$"   G "<0>2002</0>"
+"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$"     "2/30/2002"
+"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$"     "13/23/2002"
+"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$"     "12345"
+"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$"   G "<0>SP939393H</0>"
+"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$"   G "<0>PX123456D</0>"
+"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$"   G "<0>SW355667G</0>"
+"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$"     "12SP9393H"
+"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$"     "S3P93930D"
+"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$"     "11223344SP00ddSS"
+"(^0[78][2347][0-9]{7})"   G "<0>0834128458</0>"
+"(^0[78][2347][0-9]{7})"   G "<0>0749526308</0>"
+"(^0[78][2347][0-9]{7})"     "0861212308"
+"(^0[78][2347][0-9]{7})"     "0892549851"
+"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$"   G "<0>C1406HHA</0>"
+"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$"   G "<0>A4126AAB</0>"
+"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$"   G "<0>c1406hha</0>"
+"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$"     "c1406HHA"
+"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$"     "4126"
+"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$"     "C1406hha"
+"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$"   G "<0>66.129.71.120</0>"
+"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$"   G "<0>207.46.230.218</0>"
+"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$"   G "<0>64.58.76.225</0>"
+"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$"     "10.0.5.4"
+"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$"     "192.168.0.1"
+"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$"     "my ip address"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"   G "<0>foo@foo.com</0>"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"   G "<0>foo@foo-foo.com.au</0>"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"   G "<0>foo@foo.foo.info</0>"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"     "foo@.com"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"     "foo@foo..com"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"     "foo@me@.com"
+"/\*[\d\D]*?\*/"   G "<0>/* my comment */</0>"
+"/\*[\d\D]*?\*/"   G "<0>/* my multiline comment */</0>"
+"/\*[\d\D]*?\*/"   G "<0>/* my nested comment */</0>"
+"/\*[\d\D]*?\*/"     "*/ anything here /*"
+"/\*[\d\D]*?\*/"     "anything between 2 separate comments"
+"/\*[\d\D]*?\*/"     "\* *\\"
+"/\*[\p{N}\P{N}]*?\*/"   G "<0>/* my comment */</0>"
+"/\*[\p{N}\P{N}]*?\*/"   G "<0>/* my multiline comment */</0>"
+"/\*[\p{N}\P{N}]*?\*/"   G "<0>/* my nested comment */</0>"
+"/\*[\p{N}\P{N}]*?\*/"     "*/ anything here /*"
+"/\*[\p{N}\P{N}]*?\*/"     "anything between 2 separate comments"
+"/\*[\p{N}\P{N}]*?\*/"     "\* *\\"
+"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))"   G "<0>1/31/2002</0>"
+"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))"   G "<0>04-30-02</0>"
+"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))"   G "<0>12-01/2002</0>"
+"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))"     "2/31/2002"
+"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))"     "13/0/02"
+"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))"     "Jan 1, 2001"
+'^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$'   G "<0>blah@[10.0.0.1]</0>"
+'^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$'   G "<0>a@b.c</0>"
+'^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$'     "non@match@."
+"^\d{9}[\d|X]$"   G "<0>1234123412</0>"
+"^\d{9}[\d|X]$"   G "<0>123412341X</0>"
+"^\d{9}[\d|X]$"     "not an isbn"
+"^\d{9}(\d|X)$"   G "<0>1234123412</0>"
+"^\d{9}(\d|X)$"   G "<0>123412341X</0>"
+"^\d{9}(\d|X)$"     "not an isbn"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$"   G "<0>01/01/2001</0>"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$"   G "<0>1/1/1999</0>"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$"   G "<0>10/20/2080</0>"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$"     "13/01/2001"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$"     "1/1/1800"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$"     "10/32/2080"
+"^\d*\.?((25)|(50)|(5)|(75)|(0)|(00))?$"   G "<0>0.25</0>"
+"^\d*\.?((25)|(50)|(5)|(75)|(0)|(00))?$"   G "<0>.75</0>"
+"^\d*\.?((25)|(50)|(5)|(75)|(0)|(00))?$"   G "<0>123.50</0>"
+"^\d*\.?((25)|(50)|(5)|(75)|(0)|(00))?$"     ".77"
+"^\d*\.?((25)|(50)|(5)|(75)|(0)|(00))?$"     "1.435"
+"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$"   G "<0>12345</0>"
+"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$"   G "<0>932 68</0>"
+"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$"   G "<0>S-621 46</0>"
+"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$"     "5367"
+"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$"     "425611"
+"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$"     "31 545"
+"^\d{5}(-\d{4})?$"   G "<0>48222</0>"
+"^\d{5}(-\d{4})?$"   G "<0>48222-1746</0>"
+"^\d{5}(-\d{4})?$"     "4632"
+"^\d{5}(-\d{4})?$"     "Blake"
+"^\d{5}(-\d{4})?$"     "37333-32"
+'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$'   G "<0>test.txt</0>"
+'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$'   G "<0>test.jpg.txt</0>"
+'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$'   G "<0>a&b c.bmp</0>"
+'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$'     "CON"
+'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$'     ".pdf"
+'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$'     "test:2.pdf"
+"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$"   G "<0>1'235.140</0>"
+"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$"   G "<0>1'222'333.120</0>"
+"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$"   G "<0>456</0>"
+"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$"     "1234.500"
+"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$"     "78'45.123"
+"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$"     "123,0012"
+"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$"   G "<0>T2p 3c7</0>"
+"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$"   G "<0>T3P3c7</0>"
+"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$"   G "<0>T2P 3C7</0>"
+"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$"     "123456"
+"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$"     "3C7T2P"
+"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$"     "11T21RWW"
+"^\$[0-9]+(\.[0-9][0-9])?$"   G "<0>$1.50</0>"
+"^\$[0-9]+(\.[0-9][0-9])?$"   G "<0>$49</0>"
+"^\$[0-9]+(\.[0-9][0-9])?$"   G "<0>$0.50</0>"
+"^\$[0-9]+(\.[0-9][0-9])?$"     "1.5"
+"^\$[0-9]+(\.[0-9][0-9])?$"     "$1.333"
+"^\$[0-9]+(\.[0-9][0-9])?$"     "this $5.12 fails"
+"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b"   G "<0>217.6.9.89</0>"
+"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b"   G "<0>0.0.0.0</0>"
+"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b"   G "<0>255.255.255.255</0>"
+"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b"     "256.0.0.0"
+"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b"     "0978.3.3.3"
+"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b"     "65.4t.54.3"
+"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)"   G "<0>http://www.aspemporium.com</0>"
+"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)"   G "<0>mailto:dominionx@hotmail.com</0>"
+"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)"   G "<0>ftp://ftp.test.com</0>"
+"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)"     "www.aspemporium.com"
+"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)"     "dominionx@hotmail.com"
+"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)"     "bloggs"
+"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}"   G "<0>(12) 123 1234</0>"
+"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}"   G "<0>(01512) 123 1234</0>"
+"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}"   G "<0>(0xx12) 1234 1234</0>"
+"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}"     "12 123 1234"
+"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}"     "(012) 123/1234"
+"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}"     "(012) 123 12345"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"   G "<0>bob-smith@foo.com</0>"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"   G "<0>bob.smith@foo.com</0>"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"   G "<0>bob_smith@foo.com</0>"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"     "-smith@foo.com"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"     ".smith@foo.com"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"     "smith@foo_com"
+"^(?=.*\d).{4,8}$"   G "<0>1234</0>"
+"^(?=.*\d).{4,8}$"   G "<0>asdf1234</0>"
+"^(?=.*\d).{4,8}$"   G "<0>asp123</0>"
+"^(?=.*\d).{4,8}$"     "asdf"
+"^(?=.*\d).{4,8}$"     "asdf12345"
+"^(?=.*\d).{4,8}$"     "password"
+"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}"   G "<0>user name</0>"
+"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}"   G "<0>user#name</0>"
+"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}"   G "<0>.....</0>"
+"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}"     "User_Name1"
+"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}"     "username@foo.com"
+"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}"     "user.name@mail.foo.com"
+"^100$|^[0-9]{1,2}$|^[0-9]{1,2}\,[0-9]{1,3}$"   G "<0>12,654</0>"
+"^100$|^[0-9]{1,2}$|^[0-9]{1,2}\,[0-9]{1,3}$"   G "<0>1,987</0>"
+"^100$|^[0-9]{1,2}$|^[0-9]{1,2}\,[0-9]{1,3}$"     "128,2"
+"^100$|^[0-9]{1,2}$|^[0-9]{1,2}\,[0-9]{1,3}$"     "12,"
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*[^\.\,\)\(\s]$"   G "<0>https://www.restrictd.com/~myhome/</0>"
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*[^\.\,\)\(\s]$"     "http://www.krumedia.com."
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*[^\.\,\)\(\s]$"     "(http://www.krumedia.com)"
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*[^\.\,\)\(\s]$"     "http://www.krumedia.com,"
+"(\d{1,3},(\d{3},)*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$"   G "<0>2&651.50</0>"
+"(\d{1,3},(\d{3},)*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$"   G "<0>987.895</0>"
+"(\d{1,3},(\d{3},)*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$"     "25$%787*"
+"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$"   G "<0>$1,456,983.00</0>"
+"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$"   G "<0>$1,700.07</0>"
+"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$"   G "<0>$68,944.23</0>"
+"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$"     "$20,86.93"
+"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$"     "$1098.84"
+"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$"     "$150."
+"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9])?$"   G "<0>$28,009,987.88</0>"
+"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9])?$"   G "<0>$23,099.05</0>"
+"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9])?$"   G "<0>$.88</0>"
+"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9])?$"     "$234,5.99"
+"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$"   G "<0>29/02/2004 20:15:27</0>"
+"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$"   G "<0>29/2/04 8:9:5</0>"
+"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$"   G "<0>31/3/2004 9:20:17</0>"
+"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$"     "29/02/2003 20:15:15"
+"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$"     "2/29/04 20:15:15"
+"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$"     "31/3/4 9:20:17"
+"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$"   G "<0>something@someserver.com</0>"
+"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$"   G "<0>firstname.lastname@mailserver.domain.com</0>"
+"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$"   G "<0>username-something@some-server.nl</0>"
+"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$"     "username@someserver.domain.c"
+"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$"     "somename@server.domain-com"
+"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$"     "someone@something.se_eo"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)"   G "<0>8am</0>"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)"   G "<0>8 am</0>"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)"   G "<0>8:00 am</0>"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)"     "8a"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)"     "8 a"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)"     "8:00 a"
+"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$"   G "<0>55(21)123-4567</0>"
+"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$"   G "<0>(11)1234-5678</0>"
+"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$"   G "<0>55(71)4562-2234</0>"
+"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$"     "3434-3432"
+"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$"     "4(23)232-3232"
+"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$"     "55(2)232-232"
+"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$"   G "<0>1:01 AM</0>"
+"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$"   G "<0>23:52:01</0>"
+"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$"   G "<0>03.24.36 AM</0>"
+"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$"     "19:31 AM"
+"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$"     "9:9 PM"
+"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$"     "25:60:61"
+"^\d{0,2}(\.\d{1,2})?$"   G "<0>99.99</0>"
+"^\d{0,2}(\.\d{1,2})?$"   G "<0>99</0>"
+"^\d{0,2}(\.\d{1,2})?$"   G "<0>.99</0>"
+"^\d{0,2}(\.\d{1,2})?$"     "999.999"
+"^\d{0,2}(\.\d{1,2})?$"     "999"
+"^\d{0,2}(\.\d{1,2})?$"     ".999"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$"   G "<0>1agdA*$#</0>"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$"   G "<0>1agdA*$#</0>"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$"   G "<0>1agdA*$#</0>"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$"     "wyrn%@*&$# f"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$"     "mbndkfh782"
+"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$"     "BNfhjdhfjd&*)%#$)"
+"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$"   G "<0>freshmeat.net</0>"
+"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$"   G "<0>123.com</0>"
+"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$"   G "<0>TempLate-toolkKt.orG</0>"
+"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$"     "-dog.com"
+"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$"     "?boy.net"
+"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$"     "this.domain"
+"^[^']*$"   G "<0>asljas</0>"
+"^[^']*$"   G "<0>%/&89uhuhadjkh</0>"
+"^[^']*$"   G '<0>"hi there!"</0>'
+"^[^']*$"     "'hi there!'"
+"^[^']*$"     "It's 9 o'clock"
+"^[^']*$"     "'''''"
+"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$"   G "<0>((24,((1,2,3),(3,4,5))))</0>"
+"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$"   G "<0>((1,((2,3,4),(4,5,6),(96,34,26))),(12,((1,3,4),(4,5,6),(7,8,9))))</0>"
+"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$"   G "<0>()</0>"
+"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$"     "(24,((1,2,3),(3,4,5)))"
+"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$"     "(  )"
+"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$"     "((23,(12,3,4),(4,5,6)))"
+"^[a-zA-Z0-9\s .\-_']+$"   G "<0>dony d'gsa</0>"
+"^[a-zA-Z0-9\s .\-_']+$"     "^[a-zA-Z0-9\s.\-_']+$"
+"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$"   G "<0>example@example.com</0>"
+"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$"   G "<0>foo@bar.info</0>"
+"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$"   G "<0>blah@127.0.0.1</0>"
+"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$"     "broken@@example.com"
+"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$"     "foo@bar.infp"
+"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$"     "blah@.nospam.biz"
+"^\d{5}(-\d{3})?$"   G "<0>13165-000</0>"
+"^\d{5}(-\d{3})?$"   G "<0>38175-000</0>"
+"^\d{5}(-\d{3})?$"   G "<0>81470-276</0>"
+"^\d{5}(-\d{3})?$"     "13165-00"
+"^\d{5}(-\d{3})?$"     "38175-abc"
+"^\d{5}(-\d{3})?$"     "81470-2763"
+"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$"   G "<0>$0.84</0>"
+"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$"   G "<0>$123458</0>"
+"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$"   G "<0>$1,234,567.89</0>"
+"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$"     "$12,3456.01"
+"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$"     "12345"
+"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$"     "$1.234"
+"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})"   G "<0>C:\\temp\\this allows spaces\\web.config</0>"
+"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})"   G "<0>\\\\Andromeda\\share\\file name.123</0>"
+"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})"     "tz:\temp\ fi*le?na:m<e>.doc"
+"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})"     "\\Andromeda\share\filename.a"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)"   G "<0>10:35</0>"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)"   G "<0>9:20</0>"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)"   G "<0>23</0>"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)"     "24:00"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)"     "20 PM"
+"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)"     "20:15 PM"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$"   G "<0>$3,023,123.34</0>"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$"   G "<0>9,876,453</0>"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$"   G "<0>123456.78</0>"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$"     "4,33,234.34"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$"     "$1.234"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$"     "abc"
+"^\$?\d+(\.(\d{2}))?$"   G "<0>$2.43</0>"
+"^\$?\d+(\.(\d{2}))?$"   G "<0>2.02</0>"
+"^\$?\d+(\.(\d{2}))?$"   G "<0>$2112</0>"
+"^\$?\d+(\.(\d{2}))?$"     "2.1"
+"^\$?\d+(\.(\d{2}))?$"     "$.14"
+"^\$?\d+(\.(\d{2}))?$"     "$2,222.12"
+/("[^"]*")|('[^\r]*)(\r\n)?/   G '<0>"my string"</0>'
+/("[^"]*")|('[^\r]*)(\r\n)?/   G '<0>"a string with \u0027 in it"</0>'
+/("[^"]*")|('[^\r]*)(\r\n)?/   G "<0>' comment</0>"
+/("[^"]*")|('[^\r]*)(\r\n)?/     /asd "/
+"^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$"   G "<0>BFDB4D31-3E35-4DAB-AFCA-5E6E5C8F61EA</0>"
+"^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$"   G "<0>BFDB4d31-3e35-4dab-afca-5e6e5c8f61ea</0>"
+"^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$"     "qqqBFDB4D31-3E35-4DAB-AFCA-5E6E5C8F61EA"
+"^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$"     "BFDB4D31-3E-4DAB-AFCA-5E6E5C8F61EA"
+"^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$"     "BFDB4D31-3E35-4DAB-AF"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"   G "<0>12.345-678</0>"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"   G "<0>23.345-123</0>"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"   G "<0>99.999</0>"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"     "41222-222"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"     "3.444-233"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"     "43.324444"
+"^\d{2}(\u002e)(\d{3})(-\d{3})?$"   G "<0>12.345-678</0>"
+"^\d{2}(\u002e)(\d{3})(-\d{3})?$"   G "<0>23.345-123</0>"
+"^\d{2}(\u002e)(\d{3})(-\d{3})?$"   G "<0>99.999</0>"
+"^\d{2}(\u002e)(\d{3})(-\d{3})?$"     "41222-222"
+"^\d{2}(\u002e)(\d{3})(-\d{3})?$"     "3.444-233"
+"^\d{2}(\u002e)(\d{3})(-\d{3})?$"     "43.324444"
+#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"   G "<0>c:\file.txt</0>"   # TODO:  debug
+#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"   G "<0>c:\folder\sub folder\file.txt</0>"   # TODO:  debug
+#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"   G "<0>\\network\folder\file.txt</0>"    # TODO:  debug
+"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"     "C:"
+"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"     "C:\file.xls"
+"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"     "folder.txt"
+"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"   G "<0>my.domain.com</0>"
+"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"   G "<0>regexlib.com</0>"
+"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"   G "<0>big-reg.com</0>"
+"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"     ".mydomain.com"
+"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"     "regexlib.comm"
+"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"     "-bigreg.com"
+"^\d{4}[\-\/\s]?((((0[13578])|(1[02]))[\-\/\s]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s]?(([0-2][0-9])|(30)))|(02[\-\/\s]?[0-2][0-9]))$"   G "<0>0001-12-31</0>"
+"^\d{4}[\-\/\s ]?((((0[13578])|(1[02]))[\-\/\s ]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s ]?(([0-2][0-9])|(30)))|(02[\-\/\s ]?[0-2][0-9]))$"   G "<0>9999 09 30</0>"
+"^\d{4}[\-\/\s]?((((0[13578])|(1[02]))[\-\/\s]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s]?(([0-2][0-9])|(30)))|(02[\-\/\s]?[0-2][0-9]))$"   G "<0>2002/03/03</0>"
+"^\d{4}[\-\/\s]?((((0[13578])|(1[02]))[\-\/\s]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s]?(([0-2][0-9])|(30)))|(02[\-\/\s]?[0-2][0-9]))$"     "0001\\02\\30"
+"^\d{4}[\-\/\s]?((((0[13578])|(1[02]))[\-\/\s]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s]?(([0-2][0-9])|(30)))|(02[\-\/\s]?[0-2][0-9]))$"     "9999.15.01"
+"^\d{4}[\-\/\s]?((((0[13578])|(1[02]))[\-\/\s]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s]?(([0-2][0-9])|(30)))|(02[\-\/\s]?[0-2][0-9]))$"     "2002/3/3"
+"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$"   G "<0>http://psychopop.org</0>"
+"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$"   G "<0>http://www.edsroom.com/newUser.asp</0>"
+"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$"   G "<0>http://unpleasant.jarrin.net/markov/inde</0>"
+"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$"     "ftp://psychopop.org"
+"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$"     "http://www.edsroom/"
+"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$"     "http://un/pleasant.jarrin.net/markov/index.asp"
+"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$"   G "<0>1145</0>"
+"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$"   G "<0>933</0>"
+"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$"   G "<0> 801</0>"
+"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$"     "0000"
+"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$"     "1330"
+"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$"     "8:30"
+"^\d{1,2}\/\d{2,4}$"   G "<0>9/02</0>"
+"^\d{1,2}\/\d{2,4}$"   G "<0>09/2002</0>"
+"^\d{1,2}\/\d{2,4}$"   G "<0>09/02</0>"
+"^\d{1,2}\/\d{2,4}$"     "Fall 2002"
+"^\d{1,2}\/\d{2,4}$"     "Sept 2002"
+"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$"   G "<0>01/01/2001</0>"
+"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$"   G "<0>02/30/2001</0>"
+"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$"   G "<0>12/31/2002</0>"
+"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$"     "1/1/02"
+"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$"     "1/1/2002"
+"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$"     "1/25/2002"
+"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$"   G "<0>15615552323</0>"
+"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$"   G "<0>1-561-555-1212</0>"
+"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$"   G "<0>5613333</0>"
+"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$"     "1-555-5555"
+"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$"     "15553333"
+"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$"     "0-561-555-1212"
+'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'   G '<0><input type = text name = "bob"></0>'
+'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'   G '<0><select name = "fred"></0>'
+#'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'   G '<0><form></0>'    #TODO:  Debug
+'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'     "<input type = submit>"   # TODO:  \w in pattern
+'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'     '<font face = "arial">'   # TODO:  \w in pattern
+'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'      "The dirty brown fox stank like"
+"^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$"   G "<0>1:00 AM</0>"
+"^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$"   G "<0>12:00 PM</0>"
+"^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$"   G "<0>1:00am</0>"
+"^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$"     "24:00"
+"^\d*$"   G "<0>123</0>"
+"^\d*$"   G "<0>000</0>"
+"^\d*$"   G "<0>43</0>"
+"^\d*$"     "asbc"
+"^\d*$"     "-34"
+"^\d*$"     "3.1415"
+"^[-+]?\d*$"   G "<0>123</0>"
+"^[-+]?\d*$"   G "<0>-123</0>"
+"^[-+]?\d*$"   G "<0>+123</0>"
+"^[-+]?\d*$"     "abc"
+"^[-+]?\d*$"     "3.14159"
+"^[-+]?\d*$"     "-3.14159"
+"^\d*\.?\d*$"   G "<0>123</0>"
+"^\d*\.?\d*$"   G "<0>3.14159</0>"
+"^\d*\.?\d*$"   G "<0>.234</0>"
+"^\d*\.?\d*$"     "abc"
+"^\d*\.?\d*$"     "-3.14159"
+"^\d*\.?\d*$"     "3.4.2"
+"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$"   G "<0>44240</0>"
+"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$"   G "<0>44240-5555</0>"
+"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$"   G "<0>T2P 3C7</0>"
+"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$"     "44240ddd"
+"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$"     "t44240-55"
+"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$"     "t2p3c7"
+"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$"   G "<0>(910)456-7890</0>"
+"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$"   G "<0>(910)456-8970 x12</0>"
+"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$"   G "<0>(910)456-8970 1211</0>"
+"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$"     "(910) 156-7890"
+"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$"     "(910) 056-7890"
+"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$"     "(910) 556-7890 x"
+"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$"   G "<0>31.01.2002</0>"
+"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$"   G "<0>29.2.2004</0>"
+"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$"   G "<0>09.02.2005</0>"
+"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$"     "31.11.2002"
+"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$"     "29.2.2002"
+"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$"     "33.06.2000"
+"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$"   G "<0>12/31/2003</0>"
+"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$"   G "<0>01/01/1900</0>"
+"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$"   G "<0>11/31/2002</0>"
+"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$"     "1/1/2002"
+"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$"     "01/01/02"
+"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$"     "01/01/2004"
+"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$"   G "<0>3/3/2003</0>"
+"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$"   G "<0>3/3/2002 3:33 pm</0>"
+"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$"   G "<0>3/3/2003 3:33:33 am</0>"
+"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$"     "13/1/2002"
+"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$"     "3/3/2002 3:33"
+"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$"     "31/3/2002"
+"([a-zA-Z]:(\\w+)*\\[a-zA-Z0_9]+)?.xls"   G "<0>E:\DyAGT\SD01A_specV2.xls</0>"
+"([a-zA-Z]:(\\w+)*\\[a-zA-Z0_9]+)?.xls"     "E:\DyAGT\SD01A_specV2.txt"
+"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))"   G "<0>02/29/2084</0>"
+"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))"   G "<0>01/31/2000</0>"
+"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))"   G "<0>11/30/2000</0>"
+"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))"     "02/29/2083"
+"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))"     "11/31/2000"
+"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))"     "01/32/2000"
+"^[a-zA-Z0-9\s .\-]+$"   G "<0>2222 Mock St.</0>"   # TODO:  \s in patterns not implemented
+"^[a-zA-Z0-9\s .\-]+$"   G "<0>1 A St.</0>"
+"^[a-zA-Z0-9\s .\-]+$"   G "<0>555-1212</0>"
+"^[a-zA-Z0-9\s.\-]+$"     "[A Street]"
+"^[a-zA-Z0-9\s.\-]+$"     "(3 A St.)"
+"^[a-zA-Z0-9\s.\-]+$"     "{34 C Ave.}"
+"^[a-zA-Z0-9\s.\-]+$"     "Last.*?(\d+.?\d*)"
+"^[a-zA-Z0-9\s .\-]+$"   G "<TR><TD ALIGN=RIGHT> </TD><TD>Last</TD><TD ALIGN=RIGHT NOW"
+"^[a-zA-Z0-9\s.\-]+$"     "[AADDSS]"
+"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$"   G "<0>1-(123)-123-1234</0>"
+"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$"   G "<0>123 123 1234</0>"
+"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$"   G "<0>1-800-ALPHNUM</0>"
+"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$"     "1.123.123.1234"
+"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$"     "(123)-1234-123"
+"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$"     "123-1234"
+"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$"   G "<0>02:04</0>"
+"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$"   G "<0>16:56</0>"
+"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$"   G "<0>23:59</0>"
+"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$"     "02:00 PM"
+"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$"     "PM2:00"
+"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$"     "24:00"
+"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$"   G "<0>01/01/1990</0>"
+"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$"   G "<0>12/12/9999</0>"
+"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$"   G "<0>3/28/2001</0>"
+"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$"     "3-8-01"
+"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$"     "13/32/1001"
+"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$"     "03/32/1989"
+"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})"   G "<0>1.2123644567</0>"
+"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})"   G "<0>0-234.567/8912</0>"
+"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})"   G "<0>1-(212)-123 4567</0>"
+"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})"     "0-212364345"
+"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})"     "1212-364,4321"
+"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})"     "0212\345/6789"
+"^([0-9]{6}[\s \-]{1}[0-9]{12}|[0-9]{18})$"   G "<0>000000 000000000000</0>"
+"^([0-9]{6}[\s \-]{1}[0-9]{12}|[0-9]{18})$"   G "<0>000000-000000000000</0>"
+"^([0-9]{6}[\s \-]{1}[0-9]{12}|[0-9]{18})$"   G "<0>000000000000000000</0>"
+"^([0-9]{6}[\s \-]{1}[0-9]{12}|[0-9]{18})$"     "000000_000000000000"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$"   G "<0>01/01/2001</0>"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$"   G "<0>1/1/2001</0>"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$"   G "<0>01/1/01</0>"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$"     "13/01/2001"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$"     "1/2/100"
+"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$"     "09/32/2001"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$"   G "<0>$3,023,123.34</0>"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$"   G "<0>9,876,453</0>"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$"   G "<0>123456.78</0>"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$"     "4,33,234.34"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$"     "$1.234"
+"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$"     "abc"
+"^\d{5}$|^\d{5}-\d{4}$"   G "<0>55555-5555</0>"
+"^\d{5}$|^\d{5}-\d{4}$"   G "<0>34564-3342</0>"
+"^\d{5}$|^\d{5}-\d{4}$"   G "<0>90210</0>"
+"^\d{5}$|^\d{5}-\d{4}$"     "434454444"
+"^\d{5}$|^\d{5}-\d{4}$"     "645-32-2345"
+"^\d{5}$|^\d{5}-\d{4}$"     "abc"
+"^\d{3}-\d{2}-\d{4}$"   G "<0>333-22-4444</0>"
+"^\d{3}-\d{2}-\d{4}$"   G "<0>123-45-6789</0>"
+"^\d{3}-\d{2}-\d{4}$"     "123456789"
+"^\d{3}-\d{2}-\d{4}$"     "SSN"
+"^[2-9]\d{2}-\d{3}-\d{4}$"   G "<0>800-555-5555</0>"
+"^[2-9]\d{2}-\d{3}-\d{4}$"   G "<0>333-444-5555</0>"
+"^[2-9]\d{2}-\d{3}-\d{4}$"   G "<0>212-666-1234</0>"
+"^[2-9]\d{2}-\d{3}-\d{4}$"     "000-000-0000"
+"^[2-9]\d{2}-\d{3}-\d{4}$"     "123-456-7890"
+"^[2-9]\d{2}-\d{3}-\d{4}$"     "2126661234"
+"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$"   G "<0>44240</0>"
+"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$"   G "<0>44240-5555</0>"
+"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$"   G "<0>G3H 6A3</0>"
+"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$"     "Ohio"
+"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$"     "abc"
+"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$"     "g3h6a3"
+"[0-9]{4}\s*[a-zA-Z]{2}"   G "<0>1054 WD</0>"
+"[0-9]{4}\s*[a-zA-Z]{2}"   G "<0>1054WD</0>"
+"[0-9]{4}\s*[a-zA-Z]{2}"   G "<0>1054 wd</0>"
+"[0-9]{4}\s*[a-zA-Z]{2}"     "10543"
+"(^1300\d{6}$)|(^1800|1900|1902\d{6}$)|(^0[2|3|7|8]{1}[0-9]{8}$)|(^13\d{4}$)|(^04\d{2,3}\d{6}$)"   G "<0>0732105432</0>"
+"(^1300\d{6}$)|(^1800|1900|1902\d{6}$)|(^0[2|3|7|8]{1}[0-9]{8}$)|(^13\d{4}$)|(^04\d{2,3}\d{6}$)"   G "<0>1300333444</0>"
+"(^1300\d{6}$)|(^1800|1900|1902\d{6}$)|(^0[2|3|7|8]{1}[0-9]{8}$)|(^13\d{4}$)|(^04\d{2,3}\d{6}$)"   G "<0>131313</0>"
+"(^1300\d{6}$)|(^1800|1900|1902\d{6}$)|(^0[2|3|7|8]{1}[0-9]{8}$)|(^13\d{4}$)|(^04\d{2,3}\d{6}$)"     "32105432"
+"(^1300\d{6}$)|(^1800|1900|1902\d{6}$)|(^0[2|3|7|8]{1}[0-9]{8}$)|(^13\d{4}$)|(^04\d{2,3}\d{6}$)"     "13000456"
+"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$"   G "<0>http://207.68.172.254/home.ashx</0>"
+"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$"   G "<0>ftp://ftp.netscape.com/</0>"
+"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$"   G "<0>https://www.brinkster.com/login.asp</0>"
+"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$"     "htp://mistake.com/"
+"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$"     "http://www_address.com/"
+"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$"     "ftp://www.files.com/file with spaces.txt"
+"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})"   G "<0>2002-11-03</0>"
+"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})"   G "<0>2007-17-08</0>"
+"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})"   G "<0>9999-99-99</0>"
+"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})"     "2002/17/18"
+"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})"     "2002.18.45"
+"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})"     "18.45.2002"
+"^\$?(\d{1,3}(\,\d{3})*|(\d+))(\.\d{0,2})?$"   G "<0>$0,234.50</0>"
+"^\$?(\d{1,3}(\,\d{3})*|(\d+))(\.\d{0,2})?$"   G "<0>0234.5</0>"
+"^\$?(\d{1,3}(\,\d{3})*|(\d+))(\.\d{0,2})?$"   G "<0>0,234.</0>"
+"^\$?(\d{1,3}(\,\d{3})*|(\d+))(\.\d{0,2})?$"     "$1,23,50"
+"^\$?(\d{1,3}(\,\d{3})*|(\d+))(\.\d{0,2})?$"     "$123.123"
+"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})"   G "<0>12.345-678</0>"
+"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})"   G "<0>12345-678</0>"
+"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})"   G "<0>12345678</0>"
+"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})"     "12.345678"
+"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})"     "12345-1"
+"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})"     "123"
+'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$'   G "<0>x:\\test\\testing.htm</0>"
+'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$'   G "<0>x:\\test\\test#$ ing.html</0>"
+'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$'   G "<0>\\\\test\testing.html</0>"
+'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$'     "x:\test\test/ing.htm"
+'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$'     "x:\test\test*.htm"
+'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$'     "\\test?<.htm"
+"^[1-9]{1}[0-9]{3}$"   G "<0>1234</0>"
+"^[1-9]{1}[0-9]{3}$"     "123"
+"^[1-9]{1}[0-9]{3}$"     "123A"
+"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$"   G "<0>A-1234</0>"
+"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$"   G "<0>A 1234</0>"
+"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$"   G "<0>A1234</0>"
+"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$"     "AA-1234"
+"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$"     "A12345"
+"^(F-)?[0-9]{5}$"   G "<0>12345</0>"
+"^(F-)?[0-9]{5}$"   G "<0>F-12345</0>"
+"^(F-)?[0-9]{5}$"     "F12345"
+"^(F-)?[0-9]{5}$"     "F-123456"
+"^(F-)?[0-9]{5}$"     "123456"
+"^(V-|I-)?[0-9]{4}$"   G "<0>1234</0>"
+"^(V-|I-)?[0-9]{4}$"   G "<0>V-1234</0>"
+"^(V-|I-)?[0-9]{4}$"     "12345"
+"^[1-9]{1}[0-9]{3} ?[A-Z]{2}$"   G "<0>1234 AB</0>"
+"^[1-9]{1}[0-9]{3} ?[A-Z]{2}$"   G "<0>1234AB</0>"
+"^[1-9]{1}[0-9]{3} ?[A-Z]{2}$"     "123AB"
+"^[1-9]{1}[0-9]{3} ?[A-Z]{2}$"     "1234AAA"
+"^([1-9]{2}|[0-9][1-9]|[1-9][0-9])[0-9]{3}$"   G "<0>12345</0>"
+"^([1-9]{2}|[0-9][1-9]|[1-9][0-9])[0-9]{3}$"   G "<0>10234</0>"
+"^([1-9]{2}|[0-9][1-9]|[1-9][0-9])[0-9]{3}$"   G "<0>01234</0>"
+"^([1-9]{2}|[0-9][1-9]|[1-9][0-9])[0-9]{3}$"     "00123"
+"^(/w|/W|[^<>+?$%\{}\&])+$"   G "<0>John Doe Sr.</0>"
+"^(/w|/W|[^<>+?$%\{}\&])+$"   G "<0>100 Elm St., Suite 25</0>"
+"^(/w|/W|[^<>+?$%\{}\&])+$"   G "<0>Valerie's Gift Shop</0>"
+"^(/w|/W|[^<>+?$%\{}\&])+$"     "<h1>Hey</h1>"
+/<[a-zA-Z][^>]*\son\w+=(\w+|'[^']*'|"[^"]*")[^>]*>/   G '<0><IMG onmouseover="window.close()"></0>'
+/<[a-zA-Z][^>]*\son\w+=(\w+|'[^']*'|"[^"]*")[^>]*>/     '<IMG src="star.gif">'
+"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$"   G "<0>1</0>"
+"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$"   G "<0>12345.123</0>"
+"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$"   G "<0>0.5</0>"
+"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$"     "0"
+"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$"     "0.0"
+"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$"     "123456.1234"
+"^.+@[^\.].*\.[a-z]{2,}$"   G "<0>whatever@somewhere.museum</0>"
+"^.+@[^\.].*\.[a-z]{2,}$"   G "<0>foreignchars@myforeigncharsdomain.nu</0>"
+"^.+@[^\.].*\.[a-z]{2,}$"   G "<0>me+mysomething@mydomain.com</0>"
+"^.+@[^\.].*\.[a-z]{2,}$"     "a@b.c"
+"^.+@[^\.].*\.[a-z]{2,}$"     "me@.my.com"
+"^.+@[^\.].*\.[a-z]{2,}$"     "a@b.comFOREIGNCHAR"
+"^(\d{5}-\d{4}|\d{5})$"   G "<0>12345</0>"
+"^(\d{5}-\d{4}|\d{5})$"   G "<0>12345-1234</0>"
+"^(\d{5}-\d{4}|\d{5})$"     "12345-12345"
+"^(\d{5}-\d{4}|\d{5})$"     "123"
+"^(\d{5}-\d{4}|\d{5})$"     "12345-abcd"
+"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$"   G "<0>0.0.0.0</0>"
+"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$"   G "<0>255.255.255.02</0>"
+"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$"   G "<0>192.168.0.136</0>"
+"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$"     "256.1.3.4"
+"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$"     "023.44.33.22"
+"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$"     "10.57.98.23."
+"<img([^>]*[^/])>"   G '<0><img src="bob"></0>'
+"<img([^>]*[^/])>"     '<img src="bob" />'
+"<!--[\s\S]*?-->"   G "<0><!-- comments --></0>"
+"<!--[\s\S]*?-->"   G "<0><!-- x = a > b - 3 --></0>"
+"<!--[\s\S]*?-->"     "<COMMENTS>this is a comment</COMMENTS>"
+"<!--[\p{Zs}\P{Zs}]*?-->"   G "<0><!-- comments --></0>"
+"<!--[\p{Zs}\P{Zs}]*?-->"   G "<0><!-- x = a > b - 3 --></0>"
+"<!--[\p{Zs}\P{Zs}]*?-->"     "<COMMENTS>this is a comment</COMMENTS>"
+/<\u002f?(\w+)(\s+\w+=(\w+|"[^"]*"|'[^']*'))*>/   G "<0><TD></0>"
+/<\u002f?(\w+)(\s+\w+=(\w+|"[^"]*"|'[^']*'))*>/   G '<0><TD bgColor="FFFFFF"></0>'
+/<\u002f?(\w+)(\s+\w+=(\w+|"[^"]*"|'[^']*'))*>/   G "<0></TD></0>"
+/<\u002f?(\w+)(\s+\w+=(\w+|"[^"]*"|'[^']*'))*>/     "No Tag Here ..."
+"(\{\\f\d*)\\([^;]+;)"   G "<0>{\\f0\\Some Font names here;</0>"
+"(\{\\f\d*)\\([^;]+;)"   G "<0>{\\f1\\fswiss\\fcharset0\\fprq2{\\*\\panose 020b0604020202020204}Arial;</0>"
+"(\{\\f\d*)\\([^;]+;)"   G "{\\f"
+"(\{\\f\d*)\\([^;]+;)"     "{f0fs20 some text}"
+#"</?([a-zA-Z][-A-Za-z\d\.]{0,71})(\s+(\S+)(\s*=\s*([-\w\.]{1,1024}|"[^"]{0,1024}"|'[^']{0,1024}'))?)*\s*>"   G '<0><IMG src='stars.gif' alt="space" height=1></0>'    # TODO:  Can't quote this pattern with the test syntax!
+#"</?([a-zA-Z][-A-Za-z\d\.]{0,71})(\s+(\S+)(\s*=\s*([-\w\.]{1,1024}|"[^"]{0,1024}"|'[^']{0,1024}'))?)*\s*>"     "this is not a tag"
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$"   G "<0>12/30/2002</0>"
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$"   G "<0>01/12/1998 13:30</0>"
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$"   G "<0>01/28/2002 22:35:00</0>"
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$"     "13/30/2002"
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$"     "01/12/1998 24:30"
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$"     "01/28/2002 22:35:64"
+#"((?<strElement>(^[A-Z0-9-;=]*:))(?<strValue>(.*)))"   G "<0>BEGIN:</0>"            #named capture
+#"((?<strElement>(^[A-Z0-9-;=]*:))(?<strValue>(.*)))"   G "<0>TEL;WORK;VOICE:</0>"   #named capture
+#"((?<strElement>(^[A-Z0-9-;=]*:))(?<strValue>(.*)))"   G "<0>TEL:</0>"              #named capture
+#"((?<strElement>(^[A-Z0-9-;=]*:))(?<strValue>(.*)))"     "begin:"                   #named capture
+#"((?<strElement>(^[A-Z0-9-;=]*:))(?<strValue>(.*)))"     "TEL;PREF;"                #named capture
+'^<a\s+href\s*=\s*"http:\/\/([^"]*)"([^>]*)>(.*?(?=<\/a>))<\/a>$'   G '<0><a href="http://www.mysite.com">my external link</a></0>'
+'^<a\s+href\s*=\s*"http:\/\/([^"]*)"([^>]*)>(.*?(?=<\/a>))<\/a>$'   G '<a href="http:/'
+'^<a\s+href\s*=\s*"http:\/\/([^"]*)"([^>]*)>(.*?(?=<\/a>))<\/a>$'     '<a href="myinternalpage.html">my internal link</a>'
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0]\d|[1][0-2])(\:[0-5]\d){1,2})*\s*([aApP][mM]{0,2})?$"   G "<0>12/31/2002</0>"
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0]\d|[1][0-2])(\:[0-5]\d){1,2})*\s*([aApP][mM]{0,2})?$"   G "<0>12/31/2002 08:00</0>"
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0]\d|[1][0-2])(\:[0-5]\d){1,2})*\s*([aApP][mM]{0,2})?$"   G "<0>12/31/2002 08:00 AM</0>"
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0]\d|[1][0-2])(\:[0-5]\d){1,2})*\s*([aApP][mM]{0,2})?$"     "12/31/02"
+"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0]\d|[1][0-2])(\:[0-5]\d){1,2})*\s*([aApP][mM]{0,2})?$"     "12/31/2002 14:00"
+"<blockquote>(?:\s*([^<]+)<br>\s*)+</blockquote>"   G "<0><blockquote>string1<br>string2<br>string3<br></blockquote></0>"
+"<blockquote>(?:\s*([^<]+)<br>\s*)+</blockquote>"     ".."
+"^((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1})))$"   G "<0>1/2/03</0>"
+"^((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1})))$"   G "<0>2/30/1999</0>"
+"^((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1})))$"   G "<0>03/04/19</0>"
+"^((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1})))$"     "3/4/2020"
+"^((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1})))$"     "3/4/1919"
+'</?(\w+)(\s*\w*\s*=\s*("[^"]*"|\u0027[^\u0027]\u0027|[^>]*))*|/?>'   G '<0><font color="blue"></0>'
+'</?(\w+)(\s*\w*\s*=\s*("[^"]*"|\u0027[^\u0027]\u0027|[^>]*))*|/?>'   G "<0></font></0>"
+'</?(\w+)(\s*\w*\s*=\s*("[^"]*"|\u0027[^\u0027]\u0027|[^>]*))*|/?>'   G "<0><br /></0>"
+'</?(\w+)(\s*\w*\s*=\s*("[^"]*"|\u0027[^\u0027]\u0027|[^>]*))*|/?>'     "this is a test..."
+"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$"   G "<0>12:00am</0>"
+"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$"   G "<0>1:00 PM</0>"
+"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$"   G "<0>  12:59   pm</0>"
+"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$"     "0:00"
+"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$"     "0:01 am"
+"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$"     "13:00 pm"
+"\({1}[0-9]{3}\){1}\-{1}[0-9]{3}\-{1}[0-9]{4}"   G "<0>(111)-111-1111</0>"
+"\({1}[0-9]{3}\){1}\-{1}[0-9]{3}\-{1}[0-9]{4}"     "11111111111"
+"[^abc]"   G "<0>def</0>"
+"[^abc]"     "abc"
+"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$"   G "<0>01/01/2002 04:42</0>"
+"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$"   G "<0>5-12-02 04:42 AM</0>"
+"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$"   G "<0>01.01/02    04-42aM</0>"
+"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$"     "01-12-1999 4:50PM"
+"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$"     "01-12-2002 15:10PM"
+"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$"     "01-12-002 8:20PM"
+"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$"   G "<0>11-02-02</0>"
+"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$"   G "<0>1-25-2002</0>"
+"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$"   G "<0>01/25/2002</0>"
+"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$"     "13-02-02"
+"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$"     "11.02.02"
+"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$"     "11/32/2002"
+"(([0-1][0-9])|([2][0-3])):([0-5][0-9]):([0-5][0-9])"   G "<0>09:30:00</0>"
+"(([0-1][0-9])|([2][0-3])):([0-5][0-9]):([0-5][0-9])"   G "<0>17:45:20</0>"
+"(([0-1][0-9])|([2][0-3])):([0-5][0-9]):([0-5][0-9])"   G "<0>23:59:59</0>"
+"(([0-1][0-9])|([2][0-3])):([0-5][0-9]):([0-5][0-9])"     "24:00:00"
+"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))"   G "<0>29/02/2000</0>"
+"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))"   G "<0>31/01/2000</0>"
+"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))"   G "<0>30-01-2000</0>"
+"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))"     "29/02/2002"
+"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))"     "32/01/2002"
+"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))"     "10/2/2002"
+"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$"   G "<0>01 46 70 89 12</0>"
+"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$"   G "<0>01-46-70-89-12</0>"
+"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$"   G "<0>0146708912</0>"
+"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$"     "01-46708912"
+"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$"     "01 46708912"
+"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$"     "+33235256677"
+"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$"   G "<0>good.gif</0>"
+"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$"   G "<0>go d.GIf</0>"
+"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$"   G "<0>goo_d.jPg</0>"
+"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$"     "junk"
+"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$"     "bad.bad.gif"
+"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$"     "slash\gif."
+"<[^>\s]*\bauthor\b[^>]*>"   G '<0><author name="Daniel"></0>'
+"<[^>\s]*\bauthor\b[^>]*>"   G "<0></sch:author></0>"
+# "<[^>\s]*\bauthor\b[^>]*>"   G '<0><pp:author name="Daniel"</0>'  #Debug  should work
+"<[^> ]*\bauthor\b[^>]*>"   G "<0></sch:author></0>"
+"<[^> ]*\bauthor\b[^>]*>"   G '<0><pp:author name="Daniel"></0>'
+"<[^>\s]*\bauthor\b[^>]*>"     "<other>"
+"<[^>\s]*\bauthor\b[^>]*>"     "</authors>"
+"<[^>\s]*\bauthor\b[^>]*>"     "<work>author</work>"
+"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$"   G "<0>04/2/29</0>"
+"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$"   G "<0>2002-4-30</0>"
+"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$"   G "<0>02.10.31</0>"
+"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$"     "2003/2/29"
+"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$"     "02.4.31"
+"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$"     "00/00/00"
+'(\d*)\u0027*-*(\d*)/*(\d*)"'   G '<0>5\u0027-3/16"</0>'
+'(\d*)\u0027*-*(\d*)/*(\d*)"'   G '<0>1\u0027-2"</0>'
+'(\d*)\u0027*-*(\d*)/*(\d*)"'   G '<0>5/16"</0>'
+'(\d*)\u0027*-*(\d*)/*(\d*)"'     '1 3/16'
+"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$"   G "<0>1</0>"
+"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$"   G "<0>23</0>"
+"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$"   G "<0>50</0>"
+"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$"     "0"
+"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$"     "111"
+"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$"     "xyz"
+"^([ \u00c0-\u01ffa-zA-Z'])+$"   G "<0>Jon Doe</0>"
+"^([ \u00c0-\u01ffa-zA-Z'])+$"   G "<0>J\u00f8rn</0>"
+"^([ \u00c0-\u01ffa-zA-Z'])+$"   G "<0>Mc'Neelan</0>"
+"^([ \u00c0-\u01ffa-zA-Z'])+$"     "Henry); hacking attempt"
+"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$"   G "<0>1:00 PM</0>"
+"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$"   G "<0>6:45 am</0>"
+"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$"   G "<0>17:30</0>"
+"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$"     "4:32 am"
+"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$"     "5:30:00 am"
+"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$"     "17:01"
+"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)"   G "<0>0.050</0>"
+"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)"   G "<0>5.0000</0>"
+"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)"   G "<0>5000</0>"
+"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)"     "0"
+"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)"     "0.0"
+"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)"     ".0"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"   G "<0>Sacramento</0>"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "<0><2>San Francisco</2></0>"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "<0><3>San Luis Obispo</3></0>"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "SanFrancisco"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "SanLuisObispo"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "San francisco"
+"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$"   G "<0>{e02ff0e4-00ad-090A-c030-0d00a0008ba0}</0>"
+"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$"   G "<0>e02ff0e4-00ad-090A-c030-0d00a0008ba0</0>"
+"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$"     "0xe02ff0e400ad090Ac0300d00a0008ba0"
+"^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$"   G "<0>{e02ff0e4-00ad-090A-c030-0d00a0008ba0}</0>"
+"^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$"   G "<0>e02ff0e4-00ad-090A-c030-0d00a0008ba0</0>"
+"^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$"     "0xe02ff0e400ad090Ac0300d00a0008ba0"
+"^([a-zA-Z0-9@*#]{8,15})$"   G "<0>@12X*567</0>"
+"^([a-zA-Z0-9@*#]{8,15})$"   G "<0>1#Zv96g@*Yfasd4</0>"
+"^([a-zA-Z0-9@*#]{8,15})$"   G "<0>#67jhgt@erd</0>"
+"^([a-zA-Z0-9@*#]{8,15})$"     "$12X*567"
+"^([a-zA-Z0-9@*#]{8,15})$"     "1#Zv_96"
+"^([a-zA-Z0-9@*#]{8,15})$"     "+678jhgt@erd"
+'(("|\u0027)[a-z0-9\/\.\?\=\&]*(\.htm|\.asp|\.php|\.jsp)[a-z0-9\/\.\?\=\&]*("|\u0027))|(href=*?[a-z0-9\/\.\?\=\&"\u0027]*)'   G '<0>href="produktsida.asp?kategori2=218"</0>'
+'(("|\u0027)[a-z0-9\/\.\?\=\&]*(\.htm|\.asp|\.php|\.jsp)[a-z0-9\/\.\?\=\&]*("|\u0027))|(href=*?[a-z0-9\/\.\?\=\&"\u0027]*)'   G '<0>href="NuclearTesting.htm"</0>'
+'(("|\u0027)[a-z0-9\/\.\?\=\&]*(\.htm|\.asp|\.php|\.jsp)[a-z0-9\/\.\?\=\&]*("|\u0027))|(href=*?[a-z0-9\/\.\?\=\&"\u0027]*)'     'U Suck'
+"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$"   G "<0>05-01-2002</0>"
+"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$"   G "<0>29-02-2004</0>"
+"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$"   G "<0>31-12-2002</0>"
+"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$"     "1-1-02"
+"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$"     "29-02-2002"
+"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$"     "31-11-2002"
+"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$"   G "<0>123456.123456</0>"
+"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$"   G "<0>123456,123456</0>"
+"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$"   G "<0>123456</0>"
+"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$"     "123a.123"
+"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$"     "123a,123"
+"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$"     "a"
+"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$"   G "<0>AC</0>"
+"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$"   G "<0>RJ</0>"
+"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$"   G "<0>SP</0>"
+"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$"     "XX"
+"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$"     "AB"
+"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$"     "HJ"
+"^[+]?\d*$"   G "<0>0123456789</0>"
+"^[+]?\d*$"   G "<0>1234</0>"
+"^[+]?\d*$"   G "<0>1</0>"
+"^[+]?\d*$"     "1.0?&"
+"^[+]?\d*$"     "a1"
+"^[+]?\d*$"     "2a-"
+#/<[aA][ ]{0,}([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[    \f]){0,}>((<(([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[    \f]){0,})>([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[  \f]){0,})|(([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[	\f]){0,})){1,}/    G "<0><a href='javascript:functionA();'><i>this text is italicized</i></a></0>"  #TODO:  Need infinite loop breaking
+#/<[aA][ ]{0,}([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[    \f]){0,}>((<(([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[    \f]){0,})>([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[  \f]){0,})|(([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[	\f]){0,})){1,}/     "<A href='#'><P</A></P>"    #TODO:  need infinite loop breaking.
+"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$"   G "<0>0:00</0>"
+"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$"   G "<0>23:00</0>"
+"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$"   G "<0>00:59</0>"
+"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$"     "0:0"
+"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$"     "24:00"
+"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$"     "00:60"
+"^((0[1-9])|(1[0-2]))\/(\d{2})$"   G "<0>11/03</0>"
+"^((0[1-9])|(1[0-2]))\/(\d{2})$"   G "<0>01/04</0>"
+"^((0[1-9])|(1[0-2]))\/(\d{2})$"     "13/03"
+"^((0[1-9])|(1[0-2]))\/(\d{2})$"     "10/2003"
+"<script[^>]*>[\w|\t|\r|\W]*</script>"   G '<0><script language=javascript>document.write("one");</script></0>'
+"<script[^>]*>[\w|\t|\r|\W]*</script>"     "--"
+"<script[^>]*>[\w|\t|\r|\W]*</script>"     "A-Z][a-z]+"
+#"<script[^>]*>[\w|\t|\r|\W]*</script>"   G "<0>strFirstName</0>"   # Test Case damaged?
+#"<script[^>]*>[\w|\t|\r|\W]*</script>"   G "<0>intAgeInYears</0>"   # Test Case damaged?
+#"<script[^>]*>[\w|\t|\r|\W]*</script>"   G "<0>Where the Wild Things Are</0>"   #  Test Case damaged?
+"<script[^>]*>[\w|\t|\r|\W]*</script>"     "123"
+"<script[^>]*>[\w|\t|\r|\W]*</script>"     "abc"
+"<script[^>]*>[\w|\t|\r|\W]*</script>"     "this has no caps in it"
+"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)"   G "<0>-0.050</0>"
+"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)"   G "<0>-5.000</0>"
+"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)"   G "<0>-5</0>"
+"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)"     "0"
+"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)"     "0.0"
+"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)"     ".0"
+"^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1]))$|^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"   G "<0>2002/02/03</0>"
+"^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1]))$|^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"   G "<0>2002/02/03 12:12:18</0>"
+"^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1]))$|^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"     "2002/02/36"
+"^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1]))$|^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"     "02/03/2002"
+"^(\d|,)*\.?\d*$"   G "<0>1,000</0>"
+"^(\d|,)*\.?\d*$"   G "<0>3,000.05</0>"
+"^(\d|,)*\.?\d*$"   G "<0>5,000,000</0>"
+"^(\d|,)*\.?\d*$"     "abc"
+"^(\d|,)*\.?\d*$"     "$100,000"
+"^(\d|,)*\.?\d*$"     "Forty"
+"^\d$"   G "<0>1</0>"
+"^\d$"   G "<0>2</0>"
+"^\d$"   G "<0>3</0>"
+"^\d$"     "a"
+"^\d$"     "324"
+"^\d$"     "num"
+"^[0-9]+$"   G "<0>1234567890</0>"
+"^[0-9]+$"   G "<0>1234567890</0>"
+"^[0-9]+$"   G "<0>1234567890</0>"
+"^[0-9]+$"     "http://none"
+"^[0-9]+$"     "http://none"
+"^[0-9]+$"     "http://none"
+"^.{4,8}$"   G "<0>asdf</0>"
+"^.{4,8}$"   G "<0>1234</0>"
+"^.{4,8}$"   G "<0>asdf1234</0>"
+"^.{4,8}$"     "asd"
+"^.{4,8}$"     "123"
+"^.{4,8}$"     "asdfe12345"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"   G "<0>a@a.com</0>"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"   G "<0>a@a.com.au</0>"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"   G "<0>a@a.au</0>"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"     "word"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"     "word@"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"     "@word"
+"^\d{5}-\d{4}$"   G "<0>22222-3333</0>"
+"^\d{5}-\d{4}$"   G "<0>34545-2367</0>"
+"^\d{5}-\d{4}$"   G "<0>56334-2343</0>"
+"^\d{5}-\d{4}$"     "123456789"
+"^\d{5}-\d{4}$"     "A3B 4C5"
+"^\d{5}-\d{4}$"     "55335"
+"(a|b|c).(a.b)*.b+.c"   G "<0>autbfc</0>"
+"(a|b|c).(a.b)*.b+.c"     "attc"
+'"((\\")|[^"(\\")])+"'   G '<0>"test"</0>'
+'"((\\")|[^"(\\")])+"'   G '<0>"escape\"quote"</0>'
+'"((\\")|[^"(\\")])+"'   G '<0>"\\""</0>'
+'"((\\")|[^"(\\")])+"'     "test"
+'"((\\")|[^"(\\")])+"'     '"test'
+'"((\\")|[^"(\\")])+"'     '""test\\"'
+"((0[1-9])|(1[02]))/\d{2}"   G "<0>01/00</0>"
+"((0[1-9])|(1[02]))/\d{2}"   G "<0>12/99</0>"
+"((0[1-9])|(1[02]))/\d{2}"     "13/00"
+"((0[1-9])|(1[02]))/\d{2}"     "12/AS"
+"^[a-zA-Z]$"   G "<0>a</0>"
+"^[a-zA-Z]$"   G "<0>B</0>"
+"^[a-zA-Z]$"   G "<0>c</0>"
+"^[a-zA-Z]$"     "0"
+"^[a-zA-Z]$"     "&"
+"^[a-zA-Z]$"     "AbC"
+"^[a-zA-Z]+$"   G "<0>abc</0>"
+"^[a-zA-Z]+$"   G "<0>ABC</0>"
+"^[a-zA-Z]+$"   G "<0>aBcDeF</0>"
+"^[a-zA-Z]+$"     "abc123"
+"^[a-zA-Z]+$"     "mr."
+"^[a-zA-Z]+$"     "a word"
+"^\s*[a-zA-Z,\p{Zs}]+\s*$"   G "<0>Smith, Ed</0>"
+"^\s*[a-zA-Z,\p{Zs}]+\s*$"   G "<0>Ed Smith</0>"
+"^\s*[a-zA-Z,\p{Zs}]+\s*$"   G "<0>aBcDeFgH</0>"
+"^\s*[a-zA-Z,\p{Zs}]+\s*$"     "a123"
+"^\s*[a-zA-Z,\p{Zs}]+\s*$"     "AB5"
+"^\s*[a-zA-Z,\p{Zs}]+\s*$"     "Mr. Ed"
+"(\w+?@\w+?\u002E.+)"   G "<0>bob@vsnl.com</0>"
+"(\w+?@\w+?\u002E.+)"     "[AABB]"
+"^\d+$"   G "<0>123</0>"
+"^\d+$"   G "<0>10</0>"
+"^\d+$"   G "<0>54</0>"
+"^\d+$"     "-54"
+"^\d+$"     "54.234"
+"^\d+$"     "abc"
+"^(\+|-)?\d+$"   G "<0>-34</0>"
+"^(\+|-)?\d+$"   G "<0>34</0>"
+"^(\+|-)?\d+$"   G "<0>+5</0>"
+"^(\+|-)?\d+$"     "abc"
+"^(\+|-)?\d+$"     "3.1415"
+"^(\+|-)?\d+$"     "-5.3"
+"foo"   G "<0>foo</0>"
+"foo"     "bar"
+"^[1-5]$"   G "<0>1</0>"
+"^[1-5]$"   G "<0>3</0>"
+"^[1-5]$"   G "<0>4</0>"
+"^[1-5]$"     "6"
+"^[1-5]$"     "23"
+"^[1-5]$"     "a"
+"^[12345]$"   G "<0>1</0>"
+"^[12345]$"   G "<0>2</0>"
+"^[12345]$"   G "<0>4</0>"
+"^[12345]$"     "6"
+"^[12345]$"     "-1"
+"^[12345]$"     "abc"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"   G "<0>joe@aol.com</0>"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"   G "<0>joe@wrox.co.uk</0>"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"   G "<0>joe@domain.info</0>"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"     "a@b"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"     "notanemail"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"     "joe@@."
+"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"   G "<0>joe@aol.com</0>"
+"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"   G "<0>ssmith@aspalliance.com</0>"
+"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"   G "<0>a@b.cc</0>"
+"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"     "joe@123aspx.com"
+"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"     "joe@web.info"
+"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"     "joe@company.co.uk"
+"[\w-]+@([\w-]+\.)+[\w-]+"   G "<0>joe@aol.com</0>"
+"[\w-]+@([\w-]+\.)+[\w-]+"   G "<0>a@b.c</0>"
+"[\w-]+@([\w-]+\.)+[\w-]+"     "asdf"
+"[\w-]+@([\w-]+\.)+[\w-]+"     "1234"
+"\d{4}-?\d{4}-?\d{4}-?\d{4}"   G "<0>1234-1234-1234-1234</0>"
+"\d{4}-?\d{4}-?\d{4}-?\d{4}"   G "<0>1234123412341234</0>"
+"\d{4}-?\d{4}-?\d{4}-?\d{4}"     "1234123412345"
+"^\d{5}$"   G "<0>33333</0>"
+"^\d{5}$"   G "<0>55555</0>"
+"^\d{5}$"   G "<0>23445</0>"
+"^\d{5}$"     "abcd"
+"^\d{5}$"     "1324"
+"^\d{5}$"     "as;lkjdf"
+"(\w+)\s+\1"   G "<0>hubba hubba</0>"
+"(\w+)\s+\1"   G "<0>mandate dated</0>"
+"(\w+)\s+\1"   G "<0>an annual</0>"
+"(\w+)\s+\1"     "may day"
+"(\w+)\s+\1"     "gogo"
+"(\w+)\s+\1"     "1212"
+"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"   G "<0>3SquareBand.com</0>"
+"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"   G "<0>asp.net</0>"
+"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"   G "<0>army.mil</0>"
+"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"     "$SquareBand.com"
+"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"     "asp/dot.net"
+"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"     "army.military"
+
diff --git a/go/mysql/icuregex/testdata/regextst_extended.txt b/go/mysql/icuregex/testdata/regextst_extended.txt
new file mode 100644
index 00000000000..841e5e46092
--- /dev/null
+++ b/go/mysql/icuregex/testdata/regextst_extended.txt
@@ -0,0 +1,126 @@
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Copyright (c) 2001-2015 International Business Machines
+# Corporation and others. All Rights Reserved.
+#
+#  file:
+#
+#   ICU regular expression test cases.
+#
+#   format:   one test case per line,
+#               <test case>    =  <pattern>   <flags>  <match string>  [# comment]
+#               <pattern>      =  "<regular expression pattern>"
+#               <match string> =  "<tagged string>"
+#                                 the quotes on the pattern and match string can be " or ' or /
+#               <tagged string> = text, with the start and end of each
+#                                 capture group tagged with <n>...</n>.  The overall match,
+#                                 if any, is group 0, as in <0>matched text</0>
+#                                  A region can be specified with <r>...</r> tags.
+#                                 Standard ICU unescape will be applied, allowing \u, \U, etc. to appear.
+#
+#               <flags>         = any combination of
+#                                   i      case insensitive match
+#                                   x      free spacing and comments
+#                                   s      dot-matches-all mode
+#                                   m      multi-line mode.
+#                                            ($ and ^ match at embedded new-lines)
+#                                   D      Unix Lines mode (only recognize 0x0a as new-line)
+#                                   Q      UREGEX_LITERAL flag.  Entire pattern is literal string.
+#                                   v      If icu configured without break iteration, this
+#                                          regex test pattern should not compile.
+#                                   e      set the UREGEX_ERROR_ON_UNKNOWN_ESCAPES flag
+#                                   d      dump the compiled pattern
+#                                   t      trace operation of match engine.
+#                                   2-9    a digit between 2 and 9, specifies the number of
+#                                          times to execute find().  The expected results are
+#                                          for the last find() in the sequence.
+#                                   G      Only check match / no match.  Do not check capture groups.
+#                                   E      Pattern compilation error expected
+#                                   L      Use LookingAt() rather than find()
+#                                   M      Use matches() rather than find().
+#
+#                                   a      Use non-Anchoring Bounds.
+#                                   b      Use Transparent Bounds.
+#                                          The a and b options only make a difference if
+#                                          a <r>region</r> has been specified in the string.
+#                                   z|Z    hitEnd was expected(z) or not expected (Z).
+#                                          With neither, hitEnd is not checked.
+#                                   y|Y    Require End expected(y) or not expected (Y).
+#
+#                                 White space must be present between the flags and the match string.
+#
+
+"[:xdigit:]"                " <0>4f</0>"
+"\P{XDIGIT}+"               "4f<0> </0>"
+
+"[:blank:]"                 "<0> </0>4f"
+"\P{BLANK}+"                "<0>4f</0> "
+
+"[:print:]"                 "<0> 4f</0>\x07"
+"\P{PRINT}+"                " 4f<0>\x07</0>"
+
+"\p{Age=1.1}"         "<0>4f</0>🥱"
+"\p{Age=11}"         "4f🥱"
+"\p{Age=12}"         "4f<0>🥱</0>"
+
+"\p{Name=LATIN SMALL LETTER B}"   "Good<0>b</0>ye"
+
+"\p{Numeric_Value=3}"   "Good<0>3</0>ye"
+"\p{Numeric_Value=14}"  "Good<0>⑭</0>ye"
+
+"\p{Script_Extensions=Greek}"  "Good<0>β</0>ye"
+
+"\p{Bidi_Control}"  "Good<0>\u200E</0>ye"
+"\p{Bidi_Class=LeftToRight}"  "<0>Goodbye</0>"
+"\p{Bidi_Class=RightToLeft}"  "Goodbye"
+"\p{Bidi_Class=LeftToRight}"  "؈"
+"\p{Bidi_Paired_Bracket_Type=Open}"  "Good<0>(</0>ye"
+
+"\p{Soft_Dotted}" "Good<0>i</0>ye"
+
+"\p{Changes_When_Lowercased}" "<0>G</0>oodbye"
+"\p{Changes_When_Titlecased}" "<0>g</0>oodbye"
+"\p{Changes_When_Uppercased}" "G<0>oodbye</0>"
+"\p{Changes_When_CaseMapped}" " <0>Goodbye</0>3"
+"\p{Cased}" " <0>G</0>oodbye3"
+"\p{CaseIgnorable}" "foo<0>.</0>bar"
+
+"\p{Indic_Syllabic_Category=Avagraha}" "foo<0>\u09BD</0>bar"
+"\p{IndicPositionalCategory=Top_And_Left_And_Right}" "foo<0>\u0B4C</0>bar"
+"\p{VerticalOrientation=U}" "foo<0>\uA015</0>bar"
+
+"\p{Canonical_Combining_Class=Nukta}" "foo<0>\u093C</0>bar"
+"\p{Lead_Canonical_Combining_Class=Above}" "foo<0>\u0300</0>bar"
+"\p{Trail_Canonical_Combining_Class=Above}" "foo<0>\u0300</0>bar"
+
+"\p{Changes_When_Casefolded}"   "<0>\uFB03</0>Goodbye"
+"\p{Changes_When_Casefolded}" 2 "\uFB03<0>G</0>oodbye"
+
+"\p{NFC_Inert}" "foo<0>\uFB03</0>bar"
+"\p{NFKC_Inert}" "foo<0>\uFB03</0>bar"
+"\P{NFD_Inert}" "foo<0>À</0>bar"
+"\P{NFKD_Inert}" "foo<0>À</0>bar"
+
+"\p{NFC_Quick_Check=No}" "foo<0>\u0340</0>bar"
+"\p{NFKC_Quick_Check=No}" "foo<0>\u0340</0>bar"
+"\p{NFD_Quick_Check=No}" "foo<0>\u00C0</0>bar"
+"\p{NFKD_Quick_Check=No}" "foo<0>\u00C0</0>bar"
+
+"\p{Full_Composition_Exclusion}" "foo<0>\u0374</0>bar"
+
+"\p{Numeric_Type=Decimal}" "foo<0>3</0>bar"
+"\p{Joining_Type=Dual_Joining}" "foo<0>\u0626</0>bar"
+"\p{Joining_Group=African_Feh}" "foo<0>\u08BB</0>bar"
+"\p{General_Category=Close_Punctuation}" "foo[bar"
+"\p{General_Category=Close_Punctuation}" "foo<0>]</0>]bar"
+"\p{General_Category=Close_Punctuation}" 2 "foo]<0>]</0>bar"
+
+"\p{Hangul_Syllable_Type=Not_Applicable}" "<0>f</0>"
+"\p{Hangul_Syllable_Type=Leading_Jamo}" "foo<0>\u1100</0>bar"
+
+"\p{Regional_Indicator=Yes}" "foo<0>\U0001F1E6</0>bar"
+
+# Currently unsupported property classes below. They require
+# significant additional code to support.
+"\p{Changes_When_NFKC_Casefolded}" E "foo<0>\uFB03</0>bar"
+"\p{Segment_Starter}" E "<0>\uFB03</0>Goodbye"
\ No newline at end of file
diff --git a/go/mysql/sql_error.go b/go/mysql/sql_error.go
index c400de4ef9a..ac988033e3d 100644
--- a/go/mysql/sql_error.go
+++ b/go/mysql/sql_error.go
@@ -218,6 +218,28 @@ var stateToMysqlCode = map[vterrors.State]mysqlCode{
 	vterrors.WrongArguments:               {num: ERWrongArguments, state: SSUnknownSQLState},
 	vterrors.UnknownStmtHandler:           {num: ERUnknownStmtHandler, state: SSUnknownSQLState},
 	vterrors.UnknownTimeZone:              {num: ERUnknownTimeZone, state: SSUnknownSQLState},
+	vterrors.RegexpStringNotTerminated:    {num: ERRegexpStringNotTerminated, state: SSUnknownSQLState},
+	vterrors.RegexpBufferOverflow:         {num: ERRegexpBufferOverflow, state: SSUnknownSQLState},
+	vterrors.RegexpIllegalArgument:        {num: ERRegexpIllegalArgument, state: SSUnknownSQLState},
+	vterrors.RegexpIndexOutOfBounds:       {num: ERRegexpIndexOutOfBounds, state: SSUnknownSQLState},
+	vterrors.RegexpInternal:               {num: ERRegexpInternal, state: SSUnknownSQLState},
+	vterrors.RegexpRuleSyntax:             {num: ERRegexpRuleSyntax, state: SSUnknownSQLState},
+	vterrors.RegexpBadEscapeSequence:      {num: ERRegexpBadEscapeSequence, state: SSUnknownSQLState},
+	vterrors.RegexpUnimplemented:          {num: ERRegexpUnimplemented, state: SSUnknownSQLState},
+	vterrors.RegexpMismatchParen:          {num: ERRegexpMismatchParen, state: SSUnknownSQLState},
+	vterrors.RegexpBadInterval:            {num: ERRegexpBadInterval, state: SSUnknownSQLState},
+	vterrors.RegexpMaxLtMin:               {num: ERRRegexpMaxLtMin, state: SSUnknownSQLState},
+	vterrors.RegexpInvalidBackRef:         {num: ERRegexpInvalidBackRef, state: SSUnknownSQLState},
+	vterrors.RegexpLookBehindLimit:        {num: ERRegexpLookBehindLimit, state: SSUnknownSQLState},
+	vterrors.RegexpMissingCloseBracket:    {num: ERRegexpMissingCloseBracket, state: SSUnknownSQLState},
+	vterrors.RegexpInvalidRange:           {num: ERRegexpInvalidRange, state: SSUnknownSQLState},
+	vterrors.RegexpStackOverflow:          {num: ERRegexpStackOverflow, state: SSUnknownSQLState},
+	vterrors.RegexpTimeOut:                {num: ERRegexpTimeOut, state: SSUnknownSQLState},
+	vterrors.RegexpPatternTooBig:          {num: ERRegexpPatternTooBig, state: SSUnknownSQLState},
+	vterrors.RegexpInvalidFlag:            {num: ERRegexpInvalidFlag, state: SSUnknownSQLState},
+	vterrors.RegexpInvalidCaptureGroup:    {num: ERRegexpInvalidCaptureGroup, state: SSUnknownSQLState},
+	vterrors.CharacterSetMismatch:         {num: ERCharacterSetMismatch, state: SSUnknownSQLState},
+	vterrors.WrongParametersToNativeFct:   {num: ERWrongParametersToNativeFct, state: SSUnknownSQLState},
 }
 
 func getStateToMySQLState(state vterrors.State) mysqlCode {
diff --git a/go/vt/vterrors/state.go b/go/vt/vterrors/state.go
index d7ed04e1c7b..609ab6fbd1b 100644
--- a/go/vt/vterrors/state.go
+++ b/go/vt/vterrors/state.go
@@ -88,6 +88,31 @@ const (
 	// unknown timezone
 	UnknownTimeZone
 
+	// regexp errors
+	RegexpStringNotTerminated
+	RegexpBufferOverflow
+	RegexpIllegalArgument
+	RegexpIndexOutOfBounds
+	RegexpInternal
+	RegexpRuleSyntax
+	RegexpBadEscapeSequence
+	RegexpUnimplemented
+	RegexpMismatchParen
+	RegexpBadInterval
+	RegexpMaxLtMin
+	RegexpInvalidBackRef
+	RegexpLookBehindLimit
+	RegexpMissingCloseBracket
+	RegexpInvalidRange
+	RegexpStackOverflow
+	RegexpTimeOut
+	RegexpPatternTooBig
+	RegexpInvalidCaptureGroup
+	RegexpInvalidFlag
+
+	CharacterSetMismatch
+	WrongParametersToNativeFct
+
 	// No state should be added below NumOfStates
 	NumOfStates
 )
diff --git a/go/vt/vtgate/evalengine/cached_size.go b/go/vt/vtgate/evalengine/cached_size.go
index c249bf3e86c..ea525e46a25 100644
--- a/go/vt/vtgate/evalengine/cached_size.go
+++ b/go/vt/vtgate/evalengine/cached_size.go
@@ -1257,6 +1257,54 @@ func (cached *builtinRandomBytes) CachedSize(alloc bool) int64 {
 	size += cached.CallExpr.CachedSize(false)
 	return size
 }
+func (cached *builtinRegexpInstr) CachedSize(alloc bool) int64 {
+	if cached == nil {
+		return int64(0)
+	}
+	size := int64(0)
+	if alloc {
+		size += int64(48)
+	}
+	// field CallExpr vitess.io/vitess/go/vt/vtgate/evalengine.CallExpr
+	size += cached.CallExpr.CachedSize(false)
+	return size
+}
+func (cached *builtinRegexpLike) CachedSize(alloc bool) int64 {
+	if cached == nil {
+		return int64(0)
+	}
+	size := int64(0)
+	if alloc {
+		size += int64(48)
+	}
+	// field CallExpr vitess.io/vitess/go/vt/vtgate/evalengine.CallExpr
+	size += cached.CallExpr.CachedSize(false)
+	return size
+}
+func (cached *builtinRegexpReplace) CachedSize(alloc bool) int64 {
+	if cached == nil {
+		return int64(0)
+	}
+	size := int64(0)
+	if alloc {
+		size += int64(48)
+	}
+	// field CallExpr vitess.io/vitess/go/vt/vtgate/evalengine.CallExpr
+	size += cached.CallExpr.CachedSize(false)
+	return size
+}
+func (cached *builtinRegexpSubstr) CachedSize(alloc bool) int64 {
+	if cached == nil {
+		return int64(0)
+	}
+	size := int64(0)
+	if alloc {
+		size += int64(48)
+	}
+	// field CallExpr vitess.io/vitess/go/vt/vtgate/evalengine.CallExpr
+	size += cached.CallExpr.CachedSize(false)
+	return size
+}
 func (cached *builtinRepeat) CachedSize(alloc bool) int64 {
 	if cached == nil {
 		return int64(0)
diff --git a/go/vt/vtgate/evalengine/compare.go b/go/vt/vtgate/evalengine/compare.go
index f2262cf8730..deee5fdb520 100644
--- a/go/vt/vtgate/evalengine/compare.go
+++ b/go/vt/vtgate/evalengine/compare.go
@@ -137,7 +137,7 @@ func compareStrings(l, r eval) (int, error) {
 	if err != nil {
 		return 0, err
 	}
-	collation := col.Get()
+	collation := col.Collation.Get()
 	if collation == nil {
 		panic("unknown collation after coercion")
 	}
diff --git a/go/vt/vtgate/evalengine/compiler_asm.go b/go/vt/vtgate/evalengine/compiler_asm.go
index 870c32fd767..1267eaf1d1d 100644
--- a/go/vt/vtgate/evalengine/compiler_asm.go
+++ b/go/vt/vtgate/evalengine/compiler_asm.go
@@ -35,6 +35,8 @@ import (
 
 	"github.com/google/uuid"
 
+	"vitess.io/vitess/go/mysql/icuregex"
+
 	"vitess.io/vitess/go/hack"
 	"vitess.io/vitess/go/mysql/collations"
 	"vitess.io/vitess/go/mysql/collations/charset"
@@ -3942,10 +3944,6 @@ func (asm *assembler) Fn_YEARWEEK() {
 	}, "FN YEARWEEK DATE(SP-1)")
 }
 
-func intervalStackOffset(l, i int) int {
-	return l - i + 1
-}
-
 func (asm *assembler) Interval_i(l int) {
 	asm.adjustStack(-l)
 	asm.emit(func(env *ExpressionEnv) int {
@@ -4285,3 +4283,442 @@ func (asm *assembler) Fn_DATEADD_s(unit datetime.IntervalType, sub bool, col col
 	}, "FN DATEADD TEMPORAL(SP-2), INTERVAL(SP-1)")
 
 }
+
+func (asm *assembler) Fn_REGEXP_LIKE(m *icuregex.Matcher, negate bool, c charset.Charset, offset int) {
+	asm.adjustStack(-offset)
+	asm.emit(func(env *ExpressionEnv) int {
+		input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes)
+		m.Reset(charset.Expand(nil, input.bytes, c))
+
+		ok, err := m.Find()
+		if err != nil {
+			env.vm.err = err
+			env.vm.sp -= offset
+			return 1
+		}
+		if negate {
+			ok = !ok
+		}
+		env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalBool(ok)
+		env.vm.sp -= offset
+		return 1
+	}, "FN REGEXP_LIKE VARCHAR(SP-2), VARCHAR(SP-1)")
+}
+
+func (asm *assembler) Fn_REGEXP_LIKE_slow(negate bool, c collations.Charset, flags icuregex.RegexpFlag, offset int) {
+	asm.adjustStack(-offset)
+	asm.emit(func(env *ExpressionEnv) int {
+		var err error
+		input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes)
+		pattern := env.vm.stack[env.vm.sp-offset].(*evalBytes)
+
+		if offset > 1 {
+			fe := env.vm.stack[env.vm.sp-offset+1]
+			flags, err = regexpFlags(fe, flags, "regexp_like")
+			if err != nil {
+				env.vm.err = err
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		p, err := compileRegex(pattern, c, flags)
+		if err != nil {
+			env.vm.err = err
+			env.vm.sp -= offset
+			return 1
+		}
+
+		m := icuregex.NewMatcher(p)
+		m.Reset(charset.Expand(nil, input.bytes, c))
+
+		ok, err := m.Find()
+		if err != nil {
+			env.vm.err = err
+			env.vm.sp--
+			return 1
+		}
+		if negate {
+			ok = !ok
+		}
+		env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalBool(ok)
+		env.vm.sp -= offset
+		return 1
+	}, "FN REGEXP_LIKE_SLOW VARCHAR(SP-2), VARCHAR(SP-1)")
+}
+
+func (asm *assembler) Fn_REGEXP_INSTR(m *icuregex.Matcher, c charset.Charset, offset int) {
+	asm.adjustStack(-offset)
+	asm.emit(func(env *ExpressionEnv) int {
+		input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes)
+		runes := charset.Expand(nil, input.bytes, c)
+
+		if len(runes) == 0 {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(0)
+			env.vm.sp -= offset
+			return 1
+		}
+
+		pos := int64(1)
+		if offset > 1 {
+			pos, env.vm.err = positionInstr(env.vm.stack[env.vm.sp-offset+1].(*evalInt64), int64(len(runes)))
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		occ := int64(1)
+		if offset > 2 {
+			occ = occurrence(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), occ)
+		}
+
+		returnOpt := int64(0)
+		if offset > 3 {
+			returnOpt, env.vm.err = returnOption(env.vm.stack[env.vm.sp-offset+3].(*evalInt64), "regexp_instr")
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		m.Reset(runes[pos-1:])
+
+		found := false
+		for i := int64(0); i < occ; i++ {
+			found, env.vm.err = m.Find()
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+			if !found {
+				break
+			}
+		}
+		if !found {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(0)
+		} else if returnOpt == 0 {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(int64(m.Start()) + pos)
+		} else {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(int64(m.End()) + pos)
+		}
+		env.vm.sp -= offset
+		return 1
+	}, "FN REGEXP_INSTR VARCHAR(SP-2), VARCHAR(SP-1)")
+}
+
+func (asm *assembler) Fn_REGEXP_INSTR_slow(c collations.Charset, flags icuregex.RegexpFlag, offset int) {
+	asm.adjustStack(-offset)
+	asm.emit(func(env *ExpressionEnv) int {
+		input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes)
+		pattern := env.vm.stack[env.vm.sp-offset].(*evalBytes)
+
+		if offset > 4 {
+			fe := env.vm.stack[env.vm.sp-offset+4]
+			flags, env.vm.err = regexpFlags(fe, flags, "regexp_instr")
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		p, err := compileRegex(pattern, c, flags)
+		if err != nil {
+			env.vm.err = err
+			env.vm.sp -= offset
+			return 1
+		}
+
+		runes := charset.Expand(nil, input.bytes, c)
+		if len(runes) == 0 {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(0)
+			env.vm.sp -= offset
+			return 1
+		}
+
+		pos := int64(1)
+		if offset > 1 {
+			pos, env.vm.err = positionInstr(env.vm.stack[env.vm.sp-offset+1].(*evalInt64), int64(len(runes)))
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		occ := int64(1)
+		if offset > 2 {
+			occ = occurrence(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), occ)
+		}
+
+		returnOpt := int64(0)
+		if offset > 3 {
+			returnOpt, env.vm.err = returnOption(env.vm.stack[env.vm.sp-offset+3].(*evalInt64), "regexp_instr")
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		m := icuregex.NewMatcher(p)
+		m.Reset(runes[pos-1:])
+
+		found := false
+		for i := int64(0); i < occ; i++ {
+			found, env.vm.err = m.Find()
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+			if !found {
+				break
+			}
+		}
+		if !found {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(0)
+		} else if returnOpt == 0 {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(int64(m.Start()) + pos)
+		} else {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(int64(m.End()) + pos)
+		}
+		env.vm.sp -= offset
+		return 1
+	}, "FN REGEXP_INSTR_SLOW VARCHAR(SP-2), VARCHAR(SP-1)")
+}
+
+func (asm *assembler) Fn_REGEXP_SUBSTR(m *icuregex.Matcher, merged collations.TypedCollation, offset int) {
+	asm.adjustStack(-offset)
+	asm.emit(func(env *ExpressionEnv) int {
+		input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes)
+		c := merged.Collation.Get().Charset()
+		runes := charset.Expand(nil, input.bytes, c)
+
+		pos := int64(1)
+		if offset > 1 {
+			limit := int64(len(runes))
+			pos, env.vm.err = position(env.vm.stack[env.vm.sp-offset+1].(*evalInt64), limit, "regexp_substr")
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+			if pos-1 == limit {
+				env.vm.stack[env.vm.sp-offset-1] = nil
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		occ := int64(1)
+		if offset > 2 {
+			occ = occurrence(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), occ)
+		}
+
+		m.Reset(runes[pos-1:])
+
+		found := false
+		for i := int64(0); i < occ; i++ {
+			found, env.vm.err = m.Find()
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+			if !found {
+				break
+			}
+		}
+
+		if !found {
+			env.vm.stack[env.vm.sp-offset-1] = nil
+		} else {
+			out := runes[int64(m.Start())+pos-1 : int64(m.End())+pos-1]
+			b := charset.Collapse(nil, out, c)
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalText(b, resultCollation(merged))
+		}
+		env.vm.sp -= offset
+		return 1
+	}, "FN REGEXP_SUBSTR VARCHAR(SP-2), VARCHAR(SP-1)")
+}
+
+func (asm *assembler) Fn_REGEXP_SUBSTR_slow(merged collations.TypedCollation, flags icuregex.RegexpFlag, offset int) {
+	asm.adjustStack(-offset)
+	asm.emit(func(env *ExpressionEnv) int {
+		input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes)
+		pattern := env.vm.stack[env.vm.sp-offset].(*evalBytes)
+		c := merged.Collation.Get().Charset()
+		runes := charset.Expand(nil, input.bytes, c)
+
+		pos := int64(1)
+		if offset > 1 {
+			limit := int64(len(runes))
+			pos, env.vm.err = position(env.vm.stack[env.vm.sp-offset+1].(*evalInt64), limit, "regexp_substr")
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+			if pos-1 == limit {
+				env.vm.stack[env.vm.sp-offset-1] = nil
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		occ := int64(1)
+		if offset > 2 {
+			occ = occurrence(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), occ)
+		}
+
+		if offset > 3 {
+			fe := env.vm.stack[env.vm.sp-offset+3]
+			flags, env.vm.err = regexpFlags(fe, flags, "regexp_substr")
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		p, err := compileRegex(pattern, c, flags)
+		if err != nil {
+			env.vm.err = err
+			env.vm.sp -= offset
+			return 1
+		}
+
+		m := icuregex.NewMatcher(p)
+		m.Reset(runes[pos-1:])
+
+		found := false
+		for i := int64(0); i < occ; i++ {
+			found, env.vm.err = m.Find()
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+			if !found {
+				break
+			}
+		}
+
+		if !found {
+			env.vm.stack[env.vm.sp-offset-1] = nil
+		} else {
+			out := runes[int64(m.Start())+pos-1 : int64(m.End())+pos-1]
+			b := charset.Collapse(nil, out, c)
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalText(b, resultCollation(merged))
+		}
+		env.vm.sp -= offset
+		return 1
+	}, "FN REGEXP_SUBSTR_SLOW VARCHAR(SP-2), VARCHAR(SP-1)")
+}
+
+func (asm *assembler) Fn_REGEXP_REPLACE(m *icuregex.Matcher, merged collations.TypedCollation, offset int) {
+	asm.adjustStack(-offset)
+	asm.emit(func(env *ExpressionEnv) int {
+		input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes)
+		repl := env.vm.stack[env.vm.sp-offset+1].(*evalBytes)
+
+		c := merged.Collation.Get().Charset()
+		inputRunes := charset.Expand(nil, input.bytes, c)
+		replRunes := charset.Expand(nil, repl.bytes, c)
+
+		pos := int64(1)
+		if offset > 2 {
+			limit := int64(len(inputRunes))
+			pos, env.vm.err = position(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), limit, "regexp_replace")
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+			if pos-1 == limit {
+				env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(input.bytes, sqltypes.Text, resultCollation(merged))
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		occ := int64(0)
+		if offset > 3 {
+			occ = occurrence(env.vm.stack[env.vm.sp-offset+3].(*evalInt64), occ)
+		}
+
+		m.Reset(inputRunes[pos-1:])
+
+		b, replaced, err := regexpReplace(m, inputRunes, replRunes, pos, occ, merged.Collation.Get().Charset())
+		if err != nil {
+			env.vm.err = err
+			env.vm.sp -= offset
+			return 1
+		}
+		if !replaced {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(input.bytes, sqltypes.Text, resultCollation(merged))
+		} else {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(b, sqltypes.Text, resultCollation(merged))
+		}
+		env.vm.sp -= offset
+		return 1
+	}, "FN REGEXP_REPLACE VARCHAR(SP-2), VARCHAR(SP-1)")
+}
+
+func (asm *assembler) Fn_REGEXP_REPLACE_slow(merged collations.TypedCollation, flags icuregex.RegexpFlag, offset int) {
+	asm.adjustStack(-offset)
+	asm.emit(func(env *ExpressionEnv) int {
+		input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes)
+		pattern := env.vm.stack[env.vm.sp-offset].(*evalBytes)
+		repl := env.vm.stack[env.vm.sp-offset+1].(*evalBytes)
+
+		c := merged.Collation.Get().Charset()
+		inputRunes := charset.Expand(nil, input.bytes, c)
+		replRunes := charset.Expand(nil, repl.bytes, c)
+
+		pos := int64(1)
+		if offset > 2 {
+			limit := int64(len(inputRunes))
+			pos, env.vm.err = position(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), limit, "regexp_replace")
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+			if pos-1 == limit {
+				env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(input.bytes, sqltypes.Text, resultCollation(merged))
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		occ := int64(0)
+		if offset > 3 {
+			occ = occurrence(env.vm.stack[env.vm.sp-offset+3].(*evalInt64), 0)
+		}
+
+		if offset > 4 {
+			fe := env.vm.stack[env.vm.sp-offset+4]
+			flags, env.vm.err = regexpFlags(fe, flags, "regexp_replace")
+			if env.vm.err != nil {
+				env.vm.sp -= offset
+				return 1
+			}
+		}
+
+		p, err := compileRegex(pattern, c, flags)
+		if err != nil {
+			env.vm.err = err
+			env.vm.sp -= offset
+			return 1
+		}
+
+		m := icuregex.NewMatcher(p)
+		m.Reset(inputRunes[pos-1:])
+
+		b, replaced, err := regexpReplace(m, inputRunes, replRunes, pos, occ, merged.Collation.Get().Charset())
+		if err != nil {
+			env.vm.err = err
+			env.vm.sp -= offset
+			return 1
+		}
+		if !replaced {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(input.bytes, sqltypes.Text, resultCollation(merged))
+		} else {
+			env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(b, sqltypes.Text, resultCollation(merged))
+		}
+		env.vm.sp -= offset
+		return 1
+	}, "FN REGEXP_REPLACE_SLOW VARCHAR(SP-2), VARCHAR(SP-1)")
+}
diff --git a/go/vt/vtgate/evalengine/compiler_test.go b/go/vt/vtgate/evalengine/compiler_test.go
index 92ef9d3d465..1b5ace371c9 100644
--- a/go/vt/vtgate/evalengine/compiler_test.go
+++ b/go/vt/vtgate/evalengine/compiler_test.go
@@ -444,6 +444,10 @@ func TestCompilerSingle(t *testing.T) {
 			expression: `INTERVAL(0, 0, 0, -1, NULL, NULL, 1)`,
 			result:     `INT64(5)`,
 		},
+		{
+			expression: `REGEXP_REPLACE(1234, 12, 6, 1)`,
+			result:     `TEXT("634")`,
+		},
 	}
 
 	for _, tc := range testCases {
diff --git a/go/vt/vtgate/evalengine/expr_collate.go b/go/vt/vtgate/evalengine/expr_collate.go
index 16fe8351880..2ba2e3dba61 100644
--- a/go/vt/vtgate/evalengine/expr_collate.go
+++ b/go/vt/vtgate/evalengine/expr_collate.go
@@ -54,6 +54,12 @@ var collationUtf8mb3 = collations.TypedCollation{
 	Repertoire:   collations.RepertoireUnicode,
 }
 
+var collationRegexpFallback = collations.TypedCollation{
+	Collation:    collations.CollationLatin1Swedish,
+	Coercibility: collations.CoerceCoercible,
+	Repertoire:   collations.RepertoireASCII,
+}
+
 type (
 	CollateExpr struct {
 		UnaryExpr
@@ -152,16 +158,16 @@ func mergeCollations(c1, c2 collations.TypedCollation, t1, t2 sqltypes.Type) (co
 	})
 }
 
-func mergeAndCoerceCollations(left, right eval) (eval, eval, collations.ID, error) {
+func mergeAndCoerceCollations(left, right eval) (eval, eval, collations.TypedCollation, error) {
 	lt := left.SQLType()
 	rt := right.SQLType()
 
 	mc, coerceLeft, coerceRight, err := mergeCollations(evalCollation(left), evalCollation(right), lt, rt)
 	if err != nil {
-		return nil, nil, 0, err
+		return nil, nil, collations.TypedCollation{}, err
 	}
 	if coerceLeft == nil && coerceRight == nil {
-		return left, right, mc.Collation, nil
+		return left, right, mc, nil
 	}
 
 	left1 := newEvalRaw(lt, left.(*evalBytes).bytes, mc)
@@ -170,16 +176,16 @@ func mergeAndCoerceCollations(left, right eval) (eval, eval, collations.ID, erro
 	if coerceLeft != nil {
 		left1.bytes, err = coerceLeft(nil, left1.bytes)
 		if err != nil {
-			return nil, nil, 0, err
+			return nil, nil, collations.TypedCollation{}, err
 		}
 	}
 	if coerceRight != nil {
 		right1.bytes, err = coerceRight(nil, right1.bytes)
 		if err != nil {
-			return nil, nil, 0, err
+			return nil, nil, collations.TypedCollation{}, err
 		}
 	}
-	return left1, right1, mc.Collation, nil
+	return left1, right1, mc, nil
 }
 
 type collationAggregation struct {
diff --git a/go/vt/vtgate/evalengine/expr_compare.go b/go/vt/vtgate/evalengine/expr_compare.go
index cef7493e026..3aca0cc1151 100644
--- a/go/vt/vtgate/evalengine/expr_compare.go
+++ b/go/vt/vtgate/evalengine/expr_compare.go
@@ -558,7 +558,7 @@ func (l *LikeExpr) eval(env *ExpressionEnv) (eval, error) {
 		return nil, err
 	}
 
-	var col collations.ID
+	var col collations.TypedCollation
 	left, right, col, err = mergeAndCoerceCollations(left, right)
 	if err != nil {
 		return nil, err
@@ -567,11 +567,11 @@ func (l *LikeExpr) eval(env *ExpressionEnv) (eval, error) {
 	var matched bool
 	switch {
 	case typeIsTextual(left.SQLType()) && typeIsTextual(right.SQLType()):
-		matched = l.matchWildcard(left.(*evalBytes).bytes, right.(*evalBytes).bytes, col)
+		matched = l.matchWildcard(left.(*evalBytes).bytes, right.(*evalBytes).bytes, col.Collation)
 	case typeIsTextual(right.SQLType()):
-		matched = l.matchWildcard(left.ToRawBytes(), right.(*evalBytes).bytes, col)
+		matched = l.matchWildcard(left.ToRawBytes(), right.(*evalBytes).bytes, col.Collation)
 	case typeIsTextual(left.SQLType()):
-		matched = l.matchWildcard(left.(*evalBytes).bytes, right.ToRawBytes(), col)
+		matched = l.matchWildcard(left.(*evalBytes).bytes, right.ToRawBytes(), col.Collation)
 	default:
 		matched = l.matchWildcard(left.ToRawBytes(), right.ToRawBytes(), collations.CollationBinaryID)
 	}
diff --git a/go/vt/vtgate/evalengine/fn_regexp.go b/go/vt/vtgate/evalengine/fn_regexp.go
new file mode 100644
index 00000000000..5886a5c3765
--- /dev/null
+++ b/go/vt/vtgate/evalengine/fn_regexp.go
@@ -0,0 +1,1062 @@
+/*
+Copyright 2023 The Vitess Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package evalengine
+
+import (
+	"errors"
+	"strings"
+
+	"vitess.io/vitess/go/mysql/collations"
+	"vitess.io/vitess/go/mysql/collations/charset"
+	"vitess.io/vitess/go/mysql/icuregex"
+	icuerrors "vitess.io/vitess/go/mysql/icuregex/errors"
+	"vitess.io/vitess/go/sqltypes"
+	querypb "vitess.io/vitess/go/vt/proto/query"
+	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
+	"vitess.io/vitess/go/vt/vterrors"
+)
+
+func regexpFlags(m eval, flags icuregex.RegexpFlag, f string) (icuregex.RegexpFlag, error) {
+	switch m := m.(type) {
+	case *evalBytes:
+		for _, b := range m.bytes {
+			switch b {
+			case 'c':
+				flags &= ^icuregex.CaseInsensitive
+			case 'i':
+				flags |= icuregex.CaseInsensitive
+			case 'm':
+				flags |= icuregex.Multiline
+			case 'n':
+				flags |= icuregex.DotAll
+			case 'u':
+				flags |= icuregex.UnixLines
+			default:
+				return flags, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongArguments, "Incorrect arguments to %s.", f)
+			}
+		}
+	default:
+		return flags, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongArguments, "Incorrect arguments to %s.", f)
+	}
+
+	return flags, nil
+}
+
+func occurrence(e *evalInt64, min int64) int64 {
+	if e.i < min {
+		return min
+	}
+	return e.i
+}
+
+func returnOption(val *evalInt64, f string) (int64, error) {
+	switch val.i {
+	case 0, 1:
+		// Valid return options.
+		return val.i, nil
+	}
+	return 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongArguments, "Incorrect arguments to %s: return_option must be 1 or 0.", f)
+}
+
+func positionInstr(val *evalInt64, limit int64) (int64, error) {
+	pos := val.i
+	if pos < 1 || pos > limit {
+		return 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpIndexOutOfBounds, "Index out of bounds in regular expression search.")
+	}
+	return pos, nil
+}
+
+func position(val *evalInt64, limit int64, f string) (int64, error) {
+	pos := val.i
+	if pos < 1 {
+		return 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongParametersToNativeFct, "Incorrect parameters in the call to native function '%s'", f)
+	}
+	if pos-1 > limit {
+		return 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpIndexOutOfBounds, "Index out of bounds in regular expression search.")
+	}
+	return pos, nil
+}
+
+func evalRegexpCollation(input, pat eval, f string) (eval, eval, collations.TypedCollation, icuregex.RegexpFlag, error) {
+	var typedCol collations.TypedCollation
+	var err error
+
+	if inputBytes, ok := input.(*evalBytes); ok {
+		if patBytes, ok := pat.(*evalBytes); ok {
+			inputCol := inputBytes.col.Collation
+			patCol := patBytes.col.Collation
+			if (inputCol == collations.CollationBinaryID && patCol != collations.CollationBinaryID) ||
+				(inputCol != collations.CollationBinaryID && patCol == collations.CollationBinaryID) {
+				inputColName := inputCol.Get().Name()
+				patColName := patCol.Get().Name()
+				return nil, nil, typedCol, 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.CharacterSetMismatch, "Character set '%s' cannot be used in conjunction with '%s' in call to %s.", inputColName, patColName, f)
+			}
+		}
+	}
+
+	input, pat, typedCol, err = mergeAndCoerceCollations(input, pat)
+	if err != nil {
+		return nil, nil, collations.TypedCollation{}, 0, err
+	}
+
+	var flags icuregex.RegexpFlag
+	var collation = typedCol.Collation.Get()
+	if strings.Contains(collation.Name(), "_ci") {
+		flags |= icuregex.CaseInsensitive
+	}
+
+	return input, pat, typedCol, flags, nil
+}
+
+func compileRegexpCollation(input, pat ctype, f string) (collations.TypedCollation, icuregex.RegexpFlag, error) {
+	var merged collations.TypedCollation
+	var err error
+
+	if input.isTextual() && pat.isTextual() {
+		inputCol := input.Col.Collation
+		patCol := pat.Col.Collation
+		if (inputCol == collations.CollationBinaryID && patCol != collations.CollationBinaryID) ||
+			(inputCol != collations.CollationBinaryID && patCol == collations.CollationBinaryID) {
+			inputColName := inputCol.Get().Name()
+			patColName := patCol.Get().Name()
+			return input.Col, 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.CharacterSetMismatch, "Character set '%s' cannot be used in conjunction with '%s' in call to %s.", inputColName, patColName, f)
+		}
+	}
+
+	if input.Col.Collation != pat.Col.Collation {
+		merged, _, _, err = mergeCollations(input.Col, pat.Col, input.Type, pat.Type)
+	} else {
+		merged = input.Col
+	}
+	if err != nil {
+		return input.Col, 0, err
+	}
+
+	var flags icuregex.RegexpFlag
+	var collation = merged.Collation.Get()
+	if strings.Contains(collation.Name(), "_ci") {
+		flags |= icuregex.CaseInsensitive
+	}
+	return merged, flags, nil
+}
+
+func compileRegex(pat eval, c collations.Charset, flags icuregex.RegexpFlag) (*icuregex.Pattern, error) {
+	patRunes := charset.Expand(nil, pat.ToRawBytes(), c)
+
+	if len(patRunes) == 0 {
+		return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpIllegalArgument, "Illegal argument to a regular expression.")
+	}
+
+	regexp, err := icuregex.Compile(patRunes, flags)
+	if err == nil {
+		return regexp, nil
+	}
+
+	var compileErr *icuregex.CompileError
+	if errors.Is(err, icuerrors.ErrUnsupported) {
+		err = vterrors.NewErrorf(vtrpcpb.Code_UNIMPLEMENTED, vterrors.RegexpUnimplemented, err.Error())
+	} else if errors.Is(err, icuerrors.ErrIllegalArgument) {
+		err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpIllegalArgument, err.Error())
+	} else if errors.As(err, &compileErr) {
+		switch compileErr.Code {
+		case icuregex.InternalError:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInternal, compileErr.Error())
+		case icuregex.RuleSyntax:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpRuleSyntax, compileErr.Error())
+		case icuregex.BadEscapeSequence:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpBadEscapeSequence, compileErr.Error())
+		case icuregex.PropertySyntax:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpRuleSyntax, compileErr.Error())
+		case icuregex.Unimplemented:
+			err = vterrors.NewErrorf(vtrpcpb.Code_UNIMPLEMENTED, vterrors.RegexpUnimplemented, compileErr.Error())
+		case icuregex.MismatchedParen:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpMismatchParen, compileErr.Error())
+		case icuregex.BadInterval:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpBadInterval, compileErr.Error())
+		case icuregex.MaxLtMin:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpMaxLtMin, compileErr.Error())
+		case icuregex.InvalidBackRef:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInvalidBackRef, compileErr.Error())
+		case icuregex.InvalidFlag:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInvalidFlag, compileErr.Error())
+		case icuregex.LookBehindLimit:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpLookBehindLimit, compileErr.Error())
+		case icuregex.MissingCloseBracket:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpMissingCloseBracket, compileErr.Error())
+		case icuregex.InvalidRange:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInvalidRange, compileErr.Error())
+		case icuregex.PatternTooBig:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpPatternTooBig, compileErr.Error())
+		case icuregex.InvalidCaptureGroupName:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInvalidCaptureGroup, compileErr.Error())
+		default:
+			err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInternal, compileErr.Error())
+		}
+	}
+
+	return nil, err
+}
+
+func compileConstantRegex(c *compiler, args TupleExpr, pat, mt int, cs collations.TypedCollation, flags icuregex.RegexpFlag, f string) (*icuregex.Pattern, error) {
+	pattern := args[pat]
+	if !pattern.constant() {
+		return nil, c.unsupported(pattern)
+	}
+	var err error
+	staticEnv := EmptyExpressionEnv()
+	pattern, err = simplifyExpr(staticEnv, pattern)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(args) > mt {
+		fl := args[mt]
+		if !fl.constant() {
+			return nil, c.unsupported(fl)
+		}
+		fl, err = simplifyExpr(staticEnv, fl)
+		if err != nil {
+			return nil, err
+		}
+		flags, err = regexpFlags(fl.(*Literal).inner, flags, f)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	if pattern.(*Literal).inner == nil {
+		return nil, c.unsupported(pattern)
+	}
+
+	innerPat, err := evalToVarchar(pattern.(*Literal).inner, cs.Collation, true)
+	if err != nil {
+		return nil, err
+	}
+
+	return compileRegex(innerPat, cs.Collation.Get().Charset(), flags)
+}
+
+// resultCollation returns the collation to use for the result of a regexp.
+// This falls back to latin1_swedish if the input collation is binary. This
+// seems to be a side effect of how MySQL also works. Probably due to how it
+// is using ICU and converting there.
+func resultCollation(in collations.TypedCollation) collations.TypedCollation {
+	if in.Collation == collationBinary.Collation {
+		return collationRegexpFallback
+	}
+	return in
+}
+
+type builtinRegexpLike struct {
+	CallExpr
+	Negate bool
+}
+
+func (r *builtinRegexpLike) eval(env *ExpressionEnv) (eval, error) {
+	input, err := r.Arguments[0].eval(env)
+	if err != nil || input == nil {
+		return nil, err
+	}
+
+	pat, err := r.Arguments[1].eval(env)
+	if err != nil || pat == nil {
+		return nil, err
+	}
+
+	input, pat, typedCol, flags, err := evalRegexpCollation(input, pat, "regexp_like")
+	if err != nil {
+		return nil, err
+	}
+	collation := typedCol.Collation.Get()
+
+	if len(r.Arguments) > 2 {
+		m, err := r.Arguments[2].eval(env)
+		if err != nil || m == nil {
+			return nil, err
+		}
+		flags, err = regexpFlags(m, flags, "regexp_like")
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	regexp, err := compileRegex(pat, collation.Charset(), flags)
+	if err != nil {
+		return nil, err
+	}
+
+	inputRunes := charset.Expand(nil, input.ToRawBytes(), collation.Charset())
+	m := icuregex.NewMatcher(regexp)
+	m.Reset(inputRunes)
+
+	ok, err := m.Find()
+	if err != nil {
+		return nil, err
+	}
+	if r.Negate {
+		ok = !ok
+	}
+	return newEvalBool(ok), nil
+}
+
+func (r *builtinRegexpLike) typeof(env *ExpressionEnv, fields []*querypb.Field) (sqltypes.Type, typeFlag) {
+	_, f1 := r.Arguments[0].typeof(env, fields)
+	_, f2 := r.Arguments[1].typeof(env, fields)
+	var f3 typeFlag
+	if len(r.Arguments) > 2 {
+		_, f3 = r.Arguments[2].typeof(env, fields)
+	}
+	return sqltypes.Int64, f1 | f2 | f3 | flagIsBoolean
+}
+
+func (r *builtinRegexpLike) compileSlow(c *compiler, input, pat, fl ctype, merged collations.TypedCollation, flags icuregex.RegexpFlag, skips ...*jump) (ctype, error) {
+	if !pat.isTextual() || pat.Col.Collation != merged.Collation {
+		c.asm.Convert_xce(len(r.Arguments)-1, sqltypes.VarChar, merged.Collation)
+	}
+
+	c.asm.Fn_REGEXP_LIKE_slow(r.Negate, merged.Collation.Get().Charset(), flags, len(r.Arguments)-1)
+	c.asm.jumpDestination(skips...)
+	return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | fl.Flag | flagIsBoolean}, nil
+}
+
+func (r *builtinRegexpLike) compile(c *compiler) (ctype, error) {
+	input, err := r.Arguments[0].compile(c)
+	if err != nil {
+		return ctype{}, err
+	}
+	var skips []*jump
+	skips = append(skips, c.compileNullCheckArg(input, 0))
+
+	pat, err := r.Arguments[1].compile(c)
+	if err != nil {
+		return ctype{}, err
+	}
+	skips = append(skips, c.compileNullCheckArg(pat, 1))
+
+	var f ctype
+
+	if len(r.Arguments) > 2 {
+		f, err = r.Arguments[2].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(f, 2))
+	}
+
+	merged, flags, err := compileRegexpCollation(input, pat, "regexp_like")
+	if err != nil {
+		return ctype{}, err
+	}
+
+	if !input.isTextual() || input.Col.Collation != merged.Collation {
+		c.asm.Convert_xce(len(r.Arguments), sqltypes.VarChar, merged.Collation)
+	}
+
+	// We optimize for the case where the pattern is a constant. If not,
+	// we fall back to the slow path.
+	p, err := compileConstantRegex(c, r.Arguments, 1, 2, merged, flags, "regexp_like")
+	if err != nil {
+		return r.compileSlow(c, input, pat, f, merged, flags, skips...)
+	}
+
+	c.asm.Fn_REGEXP_LIKE(icuregex.NewMatcher(p), r.Negate, merged.Collation.Get().Charset(), len(r.Arguments)-1)
+	c.asm.jumpDestination(skips...)
+
+	return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | f.Flag | flagIsBoolean}, nil
+}
+
+var _ Expr = (*builtinRegexpLike)(nil)
+
+type builtinRegexpInstr struct {
+	CallExpr
+}
+
+func (r *builtinRegexpInstr) eval(env *ExpressionEnv) (eval, error) {
+	input, err := r.Arguments[0].eval(env)
+	if err != nil || input == nil {
+		return nil, err
+	}
+
+	pat, err := r.Arguments[1].eval(env)
+	if err != nil || pat == nil {
+		return nil, err
+	}
+
+	input, pat, typedCol, flags, err := evalRegexpCollation(input, pat, "regexp_instr")
+	if err != nil {
+		return nil, err
+	}
+
+	var posExpr eval
+	if len(r.Arguments) > 2 {
+		posExpr, err = r.Arguments[2].eval(env)
+		if err != nil || posExpr == nil {
+			return nil, err
+		}
+	}
+
+	var occExpr eval
+	if len(r.Arguments) > 3 {
+		occExpr, err = r.Arguments[3].eval(env)
+		if err != nil || occExpr == nil {
+			return nil, err
+		}
+	}
+
+	var retExpr eval
+	if len(r.Arguments) > 4 {
+		retExpr, err = r.Arguments[4].eval(env)
+		if err != nil || retExpr == nil {
+			return nil, err
+		}
+	}
+
+	var mtExpr eval
+	if len(r.Arguments) > 5 {
+		mtExpr, err = r.Arguments[5].eval(env)
+		if err != nil || mtExpr == nil {
+			return nil, err
+		}
+	}
+
+	collation := typedCol.Collation.Get()
+
+	pos := int64(1)
+	occ := int64(1)
+	returnOpt := int64(0)
+
+	if mtExpr != nil {
+		flags, err = regexpFlags(mtExpr, flags, "regexp_instr")
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	regexp, err := compileRegex(pat, collation.Charset(), flags)
+	if err != nil {
+		return nil, err
+	}
+
+	inputRunes := charset.Expand(nil, input.ToRawBytes(), collation.Charset())
+	if len(inputRunes) == 0 {
+		return newEvalInt64(0), nil
+	}
+
+	if posExpr != nil {
+		pos, err = positionInstr(evalToInt64(posExpr), int64(len(inputRunes)))
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	if occExpr != nil {
+		occ = occurrence(evalToInt64(occExpr), occ)
+	}
+
+	if retExpr != nil {
+		returnOpt, err = returnOption(evalToInt64(retExpr), "regexp_instr")
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	m := icuregex.NewMatcher(regexp)
+	m.Reset(inputRunes[pos-1:])
+
+	found := false
+	for i := int64(0); i < occ; i++ {
+		found, err = m.Find()
+		if err != nil {
+			return nil, err
+		}
+		if !found {
+			break
+		}
+	}
+	if !found {
+		return newEvalInt64(0), nil
+	}
+	if returnOpt == 0 {
+		return newEvalInt64(int64(m.Start()) + pos), nil
+	}
+	return newEvalInt64(int64(m.End()) + pos), nil
+}
+
+func (r *builtinRegexpInstr) typeof(env *ExpressionEnv, fields []*querypb.Field) (sqltypes.Type, typeFlag) {
+	_, f1 := r.Arguments[0].typeof(env, fields)
+	_, f2 := r.Arguments[1].typeof(env, fields)
+	var f3, f4, f5, f6 typeFlag
+	if len(r.Arguments) > 2 {
+		_, f3 = r.Arguments[2].typeof(env, fields)
+	}
+	if len(r.Arguments) > 3 {
+		_, f4 = r.Arguments[3].typeof(env, fields)
+	}
+	if len(r.Arguments) > 4 {
+		_, f5 = r.Arguments[4].typeof(env, fields)
+	}
+	if len(r.Arguments) > 5 {
+		_, f6 = r.Arguments[5].typeof(env, fields)
+	}
+	return sqltypes.Int64, f1 | f2 | f3 | f4 | f5 | f6
+}
+
+func (r *builtinRegexpInstr) compileSlow(c *compiler, input, pat, pos, occ, returnOption, matchType ctype, merged collations.TypedCollation, flags icuregex.RegexpFlag, skips ...*jump) (ctype, error) {
+	if !pat.isTextual() || pat.Col.Collation != merged.Collation {
+		c.asm.Convert_xce(len(r.Arguments)-1, sqltypes.VarChar, merged.Collation)
+	}
+
+	c.asm.Fn_REGEXP_INSTR_slow(merged.Collation.Get().Charset(), flags, len(r.Arguments)-1)
+	c.asm.jumpDestination(skips...)
+	return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | pos.Flag | occ.Flag | returnOption.Flag | matchType.Flag}, nil
+}
+
+func (r *builtinRegexpInstr) compile(c *compiler) (ctype, error) {
+	input, err := r.Arguments[0].compile(c)
+	if err != nil {
+		return ctype{}, err
+	}
+	var skips []*jump
+	skips = append(skips, c.compileNullCheckArg(input, 0))
+
+	pat, err := r.Arguments[1].compile(c)
+	if err != nil {
+		return ctype{}, err
+	}
+	skips = append(skips, c.compileNullCheckArg(pat, 1))
+
+	var pos ctype
+	if len(r.Arguments) > 2 {
+		pos, err = r.Arguments[2].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(pos, 2))
+		_ = c.compileToInt64(pos, 1)
+	}
+
+	var occ ctype
+	if len(r.Arguments) > 3 {
+		occ, err = r.Arguments[3].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(occ, 3))
+		_ = c.compileToInt64(occ, 1)
+	}
+
+	var returnOpt ctype
+	if len(r.Arguments) > 4 {
+		returnOpt, err = r.Arguments[4].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(returnOpt, 4))
+		_ = c.compileToInt64(returnOpt, 1)
+	}
+
+	var matchType ctype
+	if len(r.Arguments) > 5 {
+		matchType, err = r.Arguments[5].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(matchType, 5))
+		switch {
+		case matchType.isTextual():
+		default:
+			c.asm.Convert_xb(1, sqltypes.VarBinary, 0, false)
+		}
+	}
+
+	merged, flags, err := compileRegexpCollation(input, pat, "regexp_instr")
+	if err != nil {
+		return ctype{}, err
+	}
+
+	if !input.isTextual() || input.Col.Collation != merged.Collation {
+		c.asm.Convert_xce(len(r.Arguments), sqltypes.VarChar, merged.Collation)
+	}
+
+	// We optimize for the case where the pattern is a constant. If not,
+	// we fall back to the slow path.
+	p, err := compileConstantRegex(c, r.Arguments, 1, 5, merged, flags, "regexp_instr")
+	if err != nil {
+		return r.compileSlow(c, input, pat, pos, occ, returnOpt, matchType, merged, flags, skips...)
+	}
+
+	c.asm.Fn_REGEXP_INSTR(icuregex.NewMatcher(p), merged.Collation.Get().Charset(), len(r.Arguments)-1)
+	c.asm.jumpDestination(skips...)
+
+	return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | flagIsBoolean}, nil
+}
+
+var _ Expr = (*builtinRegexpInstr)(nil)
+
+type builtinRegexpSubstr struct {
+	CallExpr
+}
+
+func (r *builtinRegexpSubstr) eval(env *ExpressionEnv) (eval, error) {
+	input, err := r.Arguments[0].eval(env)
+	if err != nil || input == nil {
+		return nil, err
+	}
+
+	pat, err := r.Arguments[1].eval(env)
+	if err != nil || pat == nil {
+		return nil, err
+	}
+
+	input, pat, typedCol, flags, err := evalRegexpCollation(input, pat, "regexp_substr")
+	if err != nil {
+		return nil, err
+	}
+
+	var posExpr eval
+	// For some reason this gets checked before NULL checks of the other values
+	if len(r.Arguments) > 2 {
+		posExpr, err = r.Arguments[2].eval(env)
+		if err != nil || posExpr == nil {
+			return nil, err
+		}
+	}
+
+	var occExpr eval
+	if len(r.Arguments) > 3 {
+		occExpr, err = r.Arguments[3].eval(env)
+		if err != nil || occExpr == nil {
+			return nil, err
+		}
+	}
+
+	var mtExpr eval
+	if len(r.Arguments) > 4 {
+		mtExpr, err = r.Arguments[4].eval(env)
+		if err != nil || mtExpr == nil {
+			return nil, err
+		}
+	}
+
+	collation := typedCol.Collation.Get()
+	pos := int64(1)
+	occ := int64(1)
+	inputRunes := charset.Expand(nil, input.ToRawBytes(), collation.Charset())
+
+	if posExpr != nil {
+		pos, err = position(evalToInt64(posExpr), int64(len(inputRunes)), "regexp_substr")
+		if err != nil {
+			return nil, err
+		}
+
+	}
+
+	if occExpr != nil {
+		occ = occurrence(evalToInt64(occExpr), occ)
+	}
+
+	if mtExpr != nil {
+		flags, err = regexpFlags(mtExpr, flags, "regexp_substr")
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	regexp, err := compileRegex(pat, collation.Charset(), flags)
+	if err != nil {
+		return nil, err
+	}
+
+	m := icuregex.NewMatcher(regexp)
+	m.Reset(inputRunes[pos-1:])
+
+	found := false
+	for i := int64(0); i < occ; i++ {
+		found, err = m.Find()
+		if err != nil {
+			return nil, err
+		}
+		if !found {
+			break
+		}
+	}
+	if !found {
+		return nil, nil
+	}
+	out := inputRunes[int64(m.Start())+pos-1 : int64(m.End())+pos-1]
+	b := charset.Collapse(nil, out, collation.Charset())
+	return newEvalText(b, resultCollation(typedCol)), nil
+}
+
+func (r *builtinRegexpSubstr) typeof(env *ExpressionEnv, fields []*querypb.Field) (sqltypes.Type, typeFlag) {
+	_, f1 := r.Arguments[0].typeof(env, fields)
+	_, f2 := r.Arguments[1].typeof(env, fields)
+	var f3, f4, f5 typeFlag
+	if len(r.Arguments) > 2 {
+		_, f3 = r.Arguments[2].typeof(env, fields)
+	}
+	if len(r.Arguments) > 3 {
+		_, f4 = r.Arguments[3].typeof(env, fields)
+	}
+	if len(r.Arguments) > 4 {
+		_, f5 = r.Arguments[4].typeof(env, fields)
+	}
+	return sqltypes.VarChar, f1 | f2 | f3 | f4 | f5
+}
+
+func (r *builtinRegexpSubstr) compileSlow(c *compiler, input, pat, pos, occ, matchType ctype, merged collations.TypedCollation, flags icuregex.RegexpFlag, skips ...*jump) (ctype, error) {
+	if !pat.isTextual() || pat.Col.Collation != merged.Collation {
+		c.asm.Convert_xce(len(r.Arguments)-1, sqltypes.VarChar, merged.Collation)
+	}
+
+	c.asm.Fn_REGEXP_SUBSTR_slow(merged, flags, len(r.Arguments)-1)
+	c.asm.jumpDestination(skips...)
+	return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | pos.Flag | occ.Flag | matchType.Flag}, nil
+}
+
+func (r *builtinRegexpSubstr) compile(c *compiler) (ctype, error) {
+	input, err := r.Arguments[0].compile(c)
+	if err != nil {
+		return ctype{}, err
+	}
+	var skips []*jump
+	skips = append(skips, c.compileNullCheckArg(input, 0))
+
+	pat, err := r.Arguments[1].compile(c)
+	if err != nil {
+		return ctype{}, err
+	}
+	skips = append(skips, c.compileNullCheckArg(pat, 1))
+
+	var pos ctype
+	if len(r.Arguments) > 2 {
+		pos, err = r.Arguments[2].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(pos, 2))
+		_ = c.compileToInt64(pos, 1)
+	}
+
+	var occ ctype
+	if len(r.Arguments) > 3 {
+		occ, err = r.Arguments[3].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(occ, 3))
+		_ = c.compileToInt64(occ, 1)
+	}
+
+	var matchType ctype
+	if len(r.Arguments) > 4 {
+		matchType, err = r.Arguments[4].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(matchType, 4))
+		switch {
+		case matchType.isTextual():
+		default:
+			c.asm.Convert_xb(1, sqltypes.VarBinary, 0, false)
+		}
+	}
+
+	merged, flags, err := compileRegexpCollation(input, pat, "regexp_substr")
+	if err != nil {
+		return ctype{}, err
+	}
+
+	if !input.isTextual() || input.Col.Collation != merged.Collation {
+		c.asm.Convert_xce(len(r.Arguments), sqltypes.VarChar, merged.Collation)
+	}
+
+	// We optimize for the case where the pattern is a constant. If not,
+	// we fall back to the slow path.
+	p, err := compileConstantRegex(c, r.Arguments, 1, 4, merged, flags, "regexp_substr")
+	if err != nil {
+		return r.compileSlow(c, input, pat, pos, occ, matchType, merged, flags, skips...)
+	}
+
+	c.asm.Fn_REGEXP_SUBSTR(icuregex.NewMatcher(p), merged, len(r.Arguments)-1)
+	c.asm.jumpDestination(skips...)
+
+	return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | pos.Flag | occ.Flag | matchType.Flag}, nil
+}
+
+var _ Expr = (*builtinRegexpSubstr)(nil)
+
+type builtinRegexpReplace struct {
+	CallExpr
+}
+
+func regexpReplace(m *icuregex.Matcher, inputRunes, replRunes []rune, pos, occ int64, c collations.Charset) ([]byte, bool, error) {
+	var err error
+	found := false
+	if occ > 0 {
+		for i := int64(0); i < occ; i++ {
+			found, err = m.Find()
+			if err != nil {
+				return nil, false, err
+			}
+			if !found {
+				break
+			}
+		}
+		if !found {
+			return nil, false, nil
+		}
+
+		out := append(inputRunes[:int64(m.Start())+pos-1], replRunes...)
+		out = append(out, inputRunes[int64(m.End())+pos-1:]...)
+		return charset.Collapse(nil, out, c), true, nil
+	}
+
+	found, err = m.Find()
+	if err != nil {
+		return nil, false, err
+	}
+
+	if !found {
+		return nil, false, nil
+	}
+
+	start := int64(m.Start()) + pos - 1
+	out := append(inputRunes[:start], replRunes...)
+	end := int64(m.End()) + pos - 1
+	for {
+		found, err = m.Find()
+		if err != nil {
+			return nil, false, err
+		}
+		if !found {
+			break
+		}
+		nextStart := int64(m.Start()) + pos - 1
+		out = append(out, inputRunes[end:nextStart]...)
+		out = append(out, replRunes...)
+		end = int64(m.End()) + pos - 1
+	}
+
+	out = append(out, inputRunes[end:]...)
+	return charset.Collapse(nil, out, c), true, nil
+}
+
+func (r *builtinRegexpReplace) eval(env *ExpressionEnv) (eval, error) {
+	input, err := r.Arguments[0].eval(env)
+	if err != nil || input == nil {
+		return nil, err
+	}
+
+	pat, err := r.Arguments[1].eval(env)
+	if err != nil || pat == nil {
+		return nil, err
+	}
+
+	replArg, err := r.Arguments[2].eval(env)
+	if err != nil || replArg == nil {
+		return nil, err
+	}
+
+	input, pat, typedCol, flags, err := evalRegexpCollation(input, pat, "regexp_replace")
+	if err != nil {
+		return nil, err
+	}
+
+	var posExpr eval
+	// For some reason this gets checked before NULL checks of the other values
+	if len(r.Arguments) > 3 {
+		posExpr, err = r.Arguments[3].eval(env)
+		if err != nil || posExpr == nil {
+			return nil, err
+		}
+	}
+
+	var occExpr eval
+	if len(r.Arguments) > 4 {
+		occExpr, err = r.Arguments[4].eval(env)
+		if err != nil || occExpr == nil {
+			return nil, err
+		}
+	}
+
+	var mtExpr eval
+	if len(r.Arguments) > 5 {
+		mtExpr, err = r.Arguments[5].eval(env)
+		if err != nil || mtExpr == nil {
+			return nil, err
+		}
+	}
+
+	collation := typedCol.Collation.Get()
+
+	repl, ok := replArg.(*evalBytes)
+	if !ok {
+		repl, err = evalToVarchar(replArg, typedCol.Collation, true)
+		if err != nil {
+			return nil, err
+		}
+	}
+	pos := int64(1)
+	occ := int64(0)
+	inputRunes := charset.Expand(nil, input.ToRawBytes(), collation.Charset())
+	replRunes := charset.Expand(nil, repl.ToRawBytes(), repl.col.Collation.Get().Charset())
+
+	if posExpr != nil {
+		pos, err = position(evalToInt64(posExpr), int64(len(inputRunes)), "regexp_replace")
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	if occExpr != nil {
+		occ = occurrence(evalToInt64(occExpr), occ)
+	}
+
+	if mtExpr != nil {
+		flags, err = regexpFlags(mtExpr, flags, "regexp_replace")
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	regexp, err := compileRegex(pat, collation.Charset(), flags)
+	if err != nil {
+		return nil, err
+	}
+
+	m := icuregex.NewMatcher(regexp)
+	m.Reset(inputRunes[pos-1:])
+
+	bytes, replaced, err := regexpReplace(m, inputRunes, replRunes, pos, occ, collation.Charset())
+	if err != nil {
+		return nil, err
+	}
+	if !replaced {
+		return newEvalRaw(sqltypes.Text, input.ToRawBytes(), resultCollation(typedCol)), nil
+	}
+	return newEvalRaw(sqltypes.Text, bytes, resultCollation(typedCol)), nil
+}
+
+func (r *builtinRegexpReplace) typeof(env *ExpressionEnv, fields []*querypb.Field) (sqltypes.Type, typeFlag) {
+	_, f1 := r.Arguments[0].typeof(env, fields)
+	_, f2 := r.Arguments[1].typeof(env, fields)
+	_, f3 := r.Arguments[2].typeof(env, fields)
+	var f4, f5, f6 typeFlag
+	if len(r.Arguments) > 3 {
+		_, f4 = r.Arguments[3].typeof(env, fields)
+	}
+	if len(r.Arguments) > 4 {
+		_, f5 = r.Arguments[4].typeof(env, fields)
+	}
+	if len(r.Arguments) > 5 {
+		_, f6 = r.Arguments[5].typeof(env, fields)
+	}
+	return sqltypes.Text, f1 | f2 | f3 | f4 | f5 | f6
+}
+
+func (r *builtinRegexpReplace) compileSlow(c *compiler, input, pat, repl, pos, occ, matchType ctype, merged collations.TypedCollation, flags icuregex.RegexpFlag, skips ...*jump) (ctype, error) {
+	if !pat.isTextual() || pat.Col.Collation != merged.Collation {
+		c.asm.Convert_xce(len(r.Arguments)-1, sqltypes.VarChar, merged.Collation)
+	}
+
+	c.asm.Fn_REGEXP_REPLACE_slow(merged, flags, len(r.Arguments)-1)
+	c.asm.jumpDestination(skips...)
+	return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | repl.Flag | pos.Flag | occ.Flag | matchType.Flag}, nil
+}
+
+func (r *builtinRegexpReplace) compile(c *compiler) (ctype, error) {
+	input, err := r.Arguments[0].compile(c)
+	if err != nil {
+		return ctype{}, err
+	}
+	var skips []*jump
+	skips = append(skips, c.compileNullCheckArg(input, 0))
+
+	pat, err := r.Arguments[1].compile(c)
+	if err != nil {
+		return ctype{}, err
+	}
+	skips = append(skips, c.compileNullCheckArg(pat, 1))
+
+	repl, err := r.Arguments[2].compile(c)
+	if err != nil {
+		return ctype{}, err
+	}
+	skips = append(skips, c.compileNullCheckArg(repl, 2))
+
+	var pos ctype
+	if len(r.Arguments) > 3 {
+		pos, err = r.Arguments[3].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(pos, 3))
+		_ = c.compileToInt64(pos, 1)
+	}
+
+	var occ ctype
+	if len(r.Arguments) > 4 {
+		occ, err = r.Arguments[4].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(occ, 4))
+		_ = c.compileToInt64(occ, 1)
+	}
+
+	var matchType ctype
+	if len(r.Arguments) > 5 {
+		matchType, err = r.Arguments[5].compile(c)
+		if err != nil {
+			return ctype{}, err
+		}
+		skips = append(skips, c.compileNullCheckArg(matchType, 5))
+		switch {
+		case matchType.isTextual():
+		default:
+			c.asm.Convert_xb(1, sqltypes.VarBinary, 0, false)
+		}
+	}
+
+	merged, flags, err := compileRegexpCollation(input, pat, "regexp_replace")
+	if err != nil {
+		return ctype{}, err
+	}
+
+	if !input.isTextual() || input.Col.Collation != merged.Collation {
+		c.asm.Convert_xce(len(r.Arguments), sqltypes.VarChar, merged.Collation)
+	}
+
+	if !repl.isTextual() || repl.Col.Collation != merged.Collation {
+		c.asm.Convert_xce(len(r.Arguments)-2, sqltypes.VarChar, merged.Collation)
+	}
+
+	// We optimize for the case where the pattern is a constant. If not,
+	// we fall back to the slow path.
+	p, err := compileConstantRegex(c, r.Arguments, 1, 5, merged, flags, "regexp_replace")
+	if err != nil {
+		return r.compileSlow(c, input, pat, repl, pos, occ, matchType, merged, flags, skips...)
+	}
+
+	c.asm.Fn_REGEXP_REPLACE(icuregex.NewMatcher(p), merged, len(r.Arguments)-1)
+	c.asm.jumpDestination(skips...)
+
+	return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | repl.Flag | pos.Flag | occ.Flag | matchType.Flag}, nil
+}
+
+var _ Expr = (*builtinRegexpReplace)(nil)
diff --git a/go/vt/vtgate/evalengine/integration/fuzz_test.go b/go/vt/vtgate/evalengine/integration/fuzz_test.go
index 24cd2733fd4..563bb323244 100644
--- a/go/vt/vtgate/evalengine/integration/fuzz_test.go
+++ b/go/vt/vtgate/evalengine/integration/fuzz_test.go
@@ -98,6 +98,11 @@ var (
 		regexp.MustCompile(`Invalid JSON text in argument (\d+) to function (\w+): (.*?)`),
 		regexp.MustCompile(`Illegal mix of collations`),
 		regexp.MustCompile(`Incorrect (DATE|DATETIME) value`),
+		regexp.MustCompile(`Syntax error in regular expression`),
+		regexp.MustCompile(`The regular expression contains an unclosed bracket expression`),
+		regexp.MustCompile(`Illegal argument to a regular expression`),
+		regexp.MustCompile(`Incorrect arguments to regexp_substr`),
+		regexp.MustCompile(`Incorrect arguments to regexp_replace`),
 	}
 )
 
diff --git a/go/vt/vtgate/evalengine/mysql_test.go b/go/vt/vtgate/evalengine/mysql_test.go
index 18802cfb8dc..987ad906b88 100644
--- a/go/vt/vtgate/evalengine/mysql_test.go
+++ b/go/vt/vtgate/evalengine/mysql_test.go
@@ -147,6 +147,6 @@ func TestMySQLGolden(t *testing.T) {
 
 func TestDebug1(t *testing.T) {
 	// Debug
-	eval, err := testSingle(t, `SELECT DATE_SUB(TIMESTAMP'2025-01-01 00:00:00', INTERVAL '1.999999' year_month)`)
+	eval, err := testSingle(t, `SELECT  _latin1 0xFF regexp _latin1 '[[:lower:]]' COLLATE latin1_bin`)
 	t.Logf("eval=%s err=%v coll=%s", eval.String(), err, eval.Collation().Get().Name())
 }
diff --git a/go/vt/vtgate/evalengine/testcases/cases.go b/go/vt/vtgate/evalengine/testcases/cases.go
index b72c5dae816..d6e692b1a99 100644
--- a/go/vt/vtgate/evalengine/testcases/cases.go
+++ b/go/vt/vtgate/evalengine/testcases/cases.go
@@ -151,6 +151,10 @@ var Cases = []TestCase{
 	{Run: FnUUID},
 	{Run: FnUUIDToBin},
 	{Run: DateMath},
+	{Run: RegexpLike},
+	{Run: RegexpInstr},
+	{Run: RegexpSubstr},
+	{Run: RegexpReplace},
 }
 
 func JSONPathOperations(yield Query) {
@@ -1898,3 +1902,287 @@ func DateMath(yield Query) {
 		}
 	}
 }
+
+func RegexpLike(yield Query) {
+	mysqlDocSamples := []string{
+		`'Michael!' REGEXP '.*'`,
+		`'Michael!' RLIKE '.*'`,
+		`'Michael!' NOT REGEXP '.*'`,
+		`'Michael!' NOT RLIKE '.*'`,
+		`'new*\n*line' REGEXP 'new\\*.\\*line'`,
+		`'a' REGEXP '^[a-d]'`,
+		`REGEXP_LIKE('CamelCase', 'CAMELCASE')`,
+		`REGEXP_LIKE('CamelCase', 'CAMELCASE' COLLATE utf8mb4_0900_as_cs)`,
+		`REGEXP_LIKE('abc', 'ABC'`,
+		`REGEXP_LIKE('abc', 'ABC', 'c')`,
+		`REGEXP_LIKE(1234, 12)`,
+		`REGEXP_LIKE(1234, 12, 'c')`,
+		`' '  REGEXP '[[:blank:]]'`,
+		`'\t' REGEXP '[[:blank:]]'`,
+		`' '  REGEXP '[[:space:]]'`,
+		`'\t' REGEXP '[[:space:]]'`,
+		`_latin1 0xFF regexp _latin1 '[[:lower:]]' COLLATE latin1_bin`,
+		`_koi8r  0xFF regexp _koi8r  '[[:lower:]]' COLLATE koi8r_bin`,
+		`_latin1 0xFF regexp _latin1 '[[:upper:]]' COLLATE latin1_bin`,
+		`_koi8r  0xFF regexp _koi8r  '[[:upper:]]' COLLATE koi8r_bin`,
+		`_latin1 0xF7 regexp _latin1 '[[:alpha:]]'`,
+		`_koi8r  0xF7 regexp _koi8r  '[[:alpha:]]'`,
+		`_latin1'a' regexp _latin1'A' collate latin1_general_ci`,
+		`_latin1'a' regexp _latin1'A' collate latin1_bin`,
+
+		`_latin1 'ÿ' regexp _utf8mb4 'ÿ'`,
+		`_utf8mb4 'ÿ' regexp _latin1 'ÿ'`,
+		`convert('ÿ' as char character set latin1) regexp _utf8mb4 'ÿ'`,
+		`_utf8mb4 'ÿ' regexp convert('ÿ' as char character set latin1)`,
+
+		`'a' regexp '\\p{alphabetic}'`,
+		`'a' regexp '\\P{alphabetic}'`,
+		`'👌🏾regexp '\\p{Emoji}\\p{Emoji_modifier}'`,
+		`'a' regexp '\\p{Lowercase_letter}'`,
+		`'a' regexp '\\p{Uppercase_letter}'`,
+		`'A' regexp '\\p{Lowercase_letter}'`,
+		`'A' regexp '\\p{Uppercase_letter}'`,
+		`'a' collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}'`,
+		`'A' collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}'`,
+		`'a' collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}'`,
+		`'A' collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}'`,
+		`0xff REGEXP 0xff`,
+		`0xff REGEXP 0xfe`,
+		`cast(time '12:34:58' as json) REGEXP 0xff`,
+	}
+
+	for _, q := range mysqlDocSamples {
+		yield(q, nil)
+	}
+
+	for _, i := range regexInputs {
+		for _, p := range regexInputs {
+			yield(fmt.Sprintf("%s REGEXP %s", i, p), nil)
+			yield(fmt.Sprintf("%s NOT REGEXP %s", i, p), nil)
+			for _, m := range regexMatchStrings {
+				yield(fmt.Sprintf("REGEXP_LIKE(%s, %s, %s)", i, p, m), nil)
+			}
+		}
+	}
+}
+
+func RegexpInstr(yield Query) {
+	mysqlDocSamples := []string{
+		`REGEXP_INSTR('Michael!', '.*')`,
+		`REGEXP_INSTR('new*\n*line', 'new\\*.\\*line')`,
+		`REGEXP_INSTR('a', '^[a-d]')`,
+		`REGEXP_INSTR('CamelCase', 'CAMELCASE')`,
+		`REGEXP_INSTR('CamelCase', 'CAMELCASE' COLLATE utf8mb4_0900_as_cs)`,
+		`REGEXP_INSTR('abc', 'ABC'`,
+		`REGEXP_INSTR('abc', 'ABC', 'c')`,
+		`REGEXP_INSTR('0', '0', 1, 0)`,
+		`REGEXP_INSTR(' ', '[[:blank:]]')`,
+		`REGEXP_INSTR('\t', '[[:blank:]]')`,
+		`REGEXP_INSTR(' ', '[[:space:]]')`,
+		`REGEXP_INSTR('\t', '[[:space:]]')`,
+		`REGEXP_INSTR(_latin1 0xFF, _latin1 '[[:lower:]]' COLLATE latin1_bin)`,
+		`REGEXP_INSTR(_koi8r  0xFF, _koi8r  '[[:lower:]]' COLLATE koi8r_bin)`,
+		`REGEXP_INSTR(_latin1 0xFF, _latin1 '[[:upper:]]' COLLATE latin1_bin)`,
+		`REGEXP_INSTR(_koi8r  0xFF, _koi8r  '[[:upper:]]' COLLATE koi8r_bin)`,
+		`REGEXP_INSTR(_latin1 0xF7, _latin1 '[[:alpha:]]')`,
+		`REGEXP_INSTR(_koi8r  0xF7, _koi8r  '[[:alpha:]]')`,
+		`REGEXP_INSTR(_latin1'a', _latin1'A' collate latin1_general_ci)`,
+		`REGEXP_INSTR(_latin1'a', _latin1'A' collate latin1_bin)`,
+		`REGEXP_INSTR('a', '\\p{alphabetic}')`,
+		`REGEXP_INSTR('a', '\\P{alphabetic}')`,
+		`REGEXP_INSTR('👌🏾, '\\p{Emoji}\\p{Emoji_modifier}')`,
+		`REGEXP_INSTR('a', '\\p{Lowercase_letter}')`,
+		`REGEXP_INSTR('a', '\\p{Uppercase_letter}')`,
+		`REGEXP_INSTR('A', '\\p{Lowercase_letter}')`,
+		`REGEXP_INSTR('A', '\\p{Uppercase_letter}')`,
+		`REGEXP_INSTR('a', collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}')`,
+		`REGEXP_INSTR('A', collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}')`,
+		`REGEXP_INSTR('a', collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}')`,
+		`REGEXP_INSTR('A', collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}')`,
+		`REGEXP_INSTR('dog cat dog', 'dog')`,
+		`REGEXP_INSTR('dog cat dog', 'dog', 2)`,
+		`REGEXP_INSTR('dog cat dog', 'dog', 1, 1)`,
+		`REGEXP_INSTR('dog cat dog', 'dog', 1, 1, 0)`,
+		`REGEXP_INSTR('dog cat dog', 'dog', 1, 1, 1)`,
+		`REGEXP_INSTR('dog cat dog', 'DOG', 1, 1, 1, 'i')`,
+		`REGEXP_INSTR('dog cat dog', 'DOG', 1, 1, 1, 'c')`,
+		`REGEXP_INSTR('dog cat dog', 'dog', 1, 2)`,
+		`REGEXP_INSTR('dog cat dog', 'dog', 1, 2, 0)`,
+		`REGEXP_INSTR('dog cat dog', 'dog', 1, 2, 1)`,
+		`REGEXP_INSTR('dog cat dog', 'DOG', 1, 2, 1, 'i')`,
+		`REGEXP_INSTR('dog cat dog', 'DOG', 1, 2, 1, 'c')`,
+		`REGEXP_INSTR('aa aaa aaaa', 'a{2}')`,
+		`REGEXP_INSTR('aa aaa aaaa', 'a{4}')`,
+		`REGEXP_INSTR(1234, 12)`,
+		`REGEXP_INSTR(1234, 12, 1)`,
+		`REGEXP_INSTR(1234, 12, 100)`,
+		`REGEXP_INSTR(1234, 12, 1, 1)`,
+		`REGEXP_INSTR(1234, 12, 1, 1, 1)`,
+		`REGEXP_INSTR(1234, 12, 1, 1, 1, 'c')`,
+		`REGEXP_INSTR('', ' ', 1000)`,
+		`REGEXP_INSTR(' ', ' ', 1000)`,
+		`REGEXP_INSTR(NULL, 'DOG', 1, 2, 1, 'c')`,
+		`REGEXP_INSTR('dog cat dog', NULL, 1, 2, 1, 'c')`,
+		`REGEXP_INSTR('dog cat dog', 'DOG', NULL, 2, 1, 'c')`,
+		`REGEXP_INSTR('dog cat dog', 'DOG', 1, NULL, 1, 'c')`,
+		`REGEXP_INSTR('dog cat dog', 'DOG', 1, 2, NULL, 'c')`,
+		`REGEXP_INSTR('dog cat dog', 'DOG', 1, 2, 1, NULL)`,
+
+		`REGEXP_INSTR('dog cat dog', NULL, 1, 2, 1, 'c')`,
+		`REGEXP_INSTR('dog cat dog', _latin1 'DOG', NULL, 2, 1, 'c')`,
+		`REGEXP_INSTR('dog cat dog', _latin1 'DOG', 1, NULL, 1, 'c')`,
+		`REGEXP_INSTR('dog cat dog', _latin1 'DOG', 1, 2, NULL, 'c')`,
+		`REGEXP_INSTR('dog cat dog', _latin1 'DOG', 1, 2, 1, NULL)`,
+	}
+
+	for _, q := range mysqlDocSamples {
+		yield(q, nil)
+	}
+}
+
+func RegexpSubstr(yield Query) {
+	mysqlDocSamples := []string{
+		`REGEXP_SUBSTR('Michael!', '.*')`,
+		`REGEXP_SUBSTR('new*\n*line', 'new\\*.\\*line')`,
+		`REGEXP_SUBSTR('a', '^[a-d]')`,
+		`REGEXP_SUBSTR('CamelCase', 'CAMELCASE')`,
+		`REGEXP_SUBSTR('CamelCase', 'CAMELCASE' COLLATE utf8mb4_0900_as_cs)`,
+		`REGEXP_SUBSTR('abc', 'ABC'`,
+		`REGEXP_SUBSTR(' ', '[[:blank:]]')`,
+		`REGEXP_SUBSTR('\t', '[[:blank:]]')`,
+		`REGEXP_SUBSTR(' ', '[[:space:]]')`,
+		`REGEXP_SUBSTR('\t', '[[:space:]]')`,
+		`REGEXP_SUBSTR(_latin1'a', _latin1'A' collate latin1_general_ci)`,
+		`REGEXP_SUBSTR(_latin1'a', _latin1'A' collate latin1_bin)`,
+		`REGEXP_SUBSTR('a', '\\p{alphabetic}')`,
+		`REGEXP_SUBSTR('a', '\\P{alphabetic}')`,
+		`REGEXP_SUBSTR('👌🏾, '\\p{Emoji}\\p{Emoji_modifier}')`,
+		`REGEXP_SUBSTR('a', '\\p{Lowercase_letter}')`,
+		`REGEXP_SUBSTR('a', '\\p{Uppercase_letter}')`,
+		`REGEXP_SUBSTR('A', '\\p{Lowercase_letter}')`,
+		`REGEXP_SUBSTR('A', '\\p{Uppercase_letter}')`,
+		`REGEXP_SUBSTR('a', collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}')`,
+		`REGEXP_SUBSTR('A', collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}')`,
+		`REGEXP_SUBSTR('a', collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}')`,
+		`REGEXP_SUBSTR('A', collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}')`,
+		`REGEXP_SUBSTR('dog cat dog', 'dog')`,
+		`REGEXP_SUBSTR('dog cat dog', 'dog', 2)`,
+		`REGEXP_SUBSTR('dog cat dog', 'dog', 1, 1)`,
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 1, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 1, 'c')`,
+		`REGEXP_SUBSTR('dog cat dog', 'dog', 1, 2)`,
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 2, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 2, 'c')`,
+		`REGEXP_SUBSTR('aa aaa aaaa', 'a{2}')`,
+		`REGEXP_SUBSTR('aa aaa aaaa', 'a{4}')`,
+		`REGEXP_SUBSTR(1234, 12)`,
+		`REGEXP_SUBSTR(1234, 12, 1)`,
+		`REGEXP_SUBSTR(1234, 12, 100)`,
+		`REGEXP_SUBSTR(1234, 12, 1, 1)`,
+		`REGEXP_SUBSTR(1234, 12, 1, 1, 'c')`,
+
+		`REGEXP_SUBSTR(NULL, 'DOG', 1, 1, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', NULL, 1, 1, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', NULL, 1, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', 1, NULL, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 1, NULL)`,
+
+		`REGEXP_SUBSTR(NULL, '[', 1, 1, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', '[', NULL, 1, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', '[', 1, NULL, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', '[', 1, 1, NULL)`,
+
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', 0, 1, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', -1, 1, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', 100, 1, 'i')`,
+		`REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 1, 0)`,
+
+		`REGEXP_SUBSTR(' ', ' ', 1)`,
+		`REGEXP_SUBSTR(' ', ' ', 2)`,
+		`REGEXP_SUBSTR(' ', ' ', 3)`,
+	}
+
+	for _, q := range mysqlDocSamples {
+		yield(q, nil)
+	}
+}
+
+func RegexpReplace(yield Query) {
+	mysqlDocSamples := []string{
+		`REGEXP_REPLACE('a b c', 'b', 'X')`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 0)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 1)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 2)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 3)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 2, 0)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 2, 1)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 2, 2)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 2, 3)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 3, 0)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 3, 1)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 3, 2)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 3, 3)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 4, 0)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 4, 1)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 4, 2)`,
+		`REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 4, 3)`,
+		`REGEXP_REPLACE('a', '\\p{Lowercase_letter}', 'X')`,
+		`REGEXP_REPLACE('a', '\\p{Uppercase_letter}', 'X')`,
+		`REGEXP_REPLACE('A', '\\p{Lowercase_letter}', 'X')`,
+		`REGEXP_REPLACE('A', '\\p{Uppercase_letter}', 'X')`,
+		`REGEXP_REPLACE(1234, 12, 6)`,
+		`REGEXP_REPLACE(1234, 12, 6, 1)`,
+		`REGEXP_REPLACE(1234, 12, 6, 100)`,
+		`REGEXP_REPLACE(1234, 12, 6, 1, 1)`,
+		`REGEXP_REPLACE(1234, 12, 6, 1, 1, 'c')`,
+
+		`REGEXP_REPLACE(NULL, 'DOG', 'bar', 1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', NULL, 'bar', 1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', 'DOG', NULL, 1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', 1, NULL, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', 1, 1, NULL)`,
+		`REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', '1', '1', 0)`,
+
+		`REGEXP_REPLACE(NULL, _latin1'DOG', 'bar', 1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', _latin1'DOG', NULL, 1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', 1, NULL, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', 1, 1, NULL)`,
+		`REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', '1', '1', 0)`,
+
+		`REGEXP_REPLACE(NULL, '[', 'bar', 1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', '[', NULL, 1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', '[', 'bar', 1, NULL, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', '[', 'bar', 1, 1, NULL)`,
+
+		`REGEXP_REPLACE(NULL, _latin1'[', 'bar', 1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', _latin1'[', NULL, 1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', _latin1'[', 'bar', 1, NULL, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', _latin1'[', 'bar', 1, 1, NULL)`,
+
+		`REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', 0, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', -1, 1, 'i')`,
+		`REGEXP_REPLACE('', 'DOG', 'bar', -1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', 100, 1, 'i')`,
+		`REGEXP_REPLACE('', 'DOG', 'bar', 100, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', 1, 1, 0)`,
+
+		`REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', 0, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', -1, 1, 'i')`,
+		`REGEXP_REPLACE('', _latin1'DOG', 'bar', -1, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', 100, 1, 'i')`,
+		`REGEXP_REPLACE('', _latin1'DOG', 'bar', 100, 1, 'i')`,
+		`REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', 1, 1, 0)`,
+
+		`REGEXP_REPLACE(' ', ' ', 'x', 1)`,
+		`REGEXP_REPLACE(' ', ' ', 'x', 2)`,
+		`REGEXP_REPLACE(' ', ' ', 'x', 3)`,
+
+		`REGEXP_REPLACE(' ', _latin1' ', 'x', 1)`,
+		`REGEXP_REPLACE(' ', _latin1' ', 'x', 2)`,
+		`REGEXP_REPLACE(' ', _latin1' ', 'x', 3)`,
+	}
+
+	for _, q := range mysqlDocSamples {
+		yield(q, nil)
+	}
+}
diff --git a/go/vt/vtgate/evalengine/testcases/inputs.go b/go/vt/vtgate/evalengine/testcases/inputs.go
index 47f50b677c5..5785375955f 100644
--- a/go/vt/vtgate/evalengine/testcases/inputs.go
+++ b/go/vt/vtgate/evalengine/testcases/inputs.go
@@ -133,6 +133,41 @@ var inputConversions = []string{
 	"cast(time '12:34:56' as json)", "cast(time '12:34:58' as json)", "cast(time '5 12:34:58' as json)",
 }
 
+var regexInputs = []string{
+	"0", "1", "' 0 '", `'\t1foo\t'`,
+	`'foobar'`, `_utf8 'foobar'`, `''`, `_binary 'foobar'`,
+	`0x0`, `0x1`, `0xff`,
+	"NULL", "true", "false",
+	"0xFF666F6F626172FF",
+	"time '10:04:58'", "date '2000-01-01'",
+	"timestamp '2000-01-01 10:34:58'",
+	"cast(0 as json)", "cast(1 as json)",
+	"cast(true as json)", "cast(false as json)",
+	// JSON numbers
+	"cast(2 as json)", "cast(1.1 as json)", "cast(-1.1 as json)",
+	// JSON strings
+	"cast('\"foo\"' as json)",
+	// JSON binary values
+	"cast(_binary' \"foo\"' as json)",
+	"cast(0xFF666F6F626172FF as json)",
+	"cast(0b01 as json)",
+	// JSON arrays
+	"cast('[\"a\"]' as json)",
+	// JSON objects
+	"cast('{\"a\": 1, \"b\": 2}' as json)",
+}
+
+var regexMatchStrings = []string{
+	"NULL",
+	"'c'", "'i'", "'m'", "'n'", "'u'", "'cimnu'", "'cimnuunmic'",
+}
+
+var regexCounters = []string{
+	"NULL",
+	"0", "1", "5", "100000",
+	"'2'", "0.4", "0.5", "0x1",
+}
+
 const inputPi = "314159265358979323846264338327950288419716939937510582097494459"
 
 var inputStrings = []string{
diff --git a/go/vt/vtgate/evalengine/translate.go b/go/vt/vtgate/evalengine/translate.go
index 7690201f2a3..8cc6df7bd02 100644
--- a/go/vt/vtgate/evalengine/translate.go
+++ b/go/vt/vtgate/evalengine/translate.go
@@ -75,6 +75,14 @@ func (ast *astCompiler) translateComparisonExpr2(op sqlparser.ComparisonExprOper
 		return &LikeExpr{BinaryExpr: binaryExpr}, nil
 	case sqlparser.NotLikeOp:
 		return &LikeExpr{BinaryExpr: binaryExpr, Negate: true}, nil
+	case sqlparser.RegexpOp, sqlparser.NotRegexpOp:
+		return &builtinRegexpLike{
+			CallExpr: CallExpr{
+				Arguments: []Expr{left, right},
+				Method:    "REGEXP_LIKE",
+			},
+			Negate: op == sqlparser.NotRegexpOp,
+		}, nil
 	default:
 		return nil, vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, op.ToString())
 	}
diff --git a/go/vt/vtgate/evalengine/translate_builtin.go b/go/vt/vtgate/evalengine/translate_builtin.go
index fb6f988af7d..49784973180 100644
--- a/go/vt/vtgate/evalengine/translate_builtin.go
+++ b/go/vt/vtgate/evalengine/translate_builtin.go
@@ -765,6 +765,167 @@ func (ast *astCompiler) translateCallable(call sqlparser.Callable) (Expr, error)
 			collate:  ast.cfg.Collation,
 		}, nil
 
+	case *sqlparser.RegexpLikeExpr:
+		input, err := ast.translateExpr(call.Expr)
+		if err != nil {
+			return nil, err
+		}
+
+		pattern, err := ast.translateExpr(call.Pattern)
+		if err != nil {
+			return nil, err
+		}
+
+		args := []Expr{input, pattern}
+
+		if call.MatchType != nil {
+			matchType, err := ast.translateExpr(call.MatchType)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, matchType)
+		}
+
+		return &builtinRegexpLike{
+			CallExpr: CallExpr{Arguments: args, Method: "REGEXP_LIKE"},
+			Negate:   false,
+		}, nil
+
+	case *sqlparser.RegexpInstrExpr:
+		input, err := ast.translateExpr(call.Expr)
+		if err != nil {
+			return nil, err
+		}
+
+		pattern, err := ast.translateExpr(call.Pattern)
+		if err != nil {
+			return nil, err
+		}
+
+		args := []Expr{input, pattern}
+
+		if call.Position != nil {
+			position, err := ast.translateExpr(call.Position)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, position)
+		}
+
+		if call.Occurrence != nil {
+			occurrence, err := ast.translateExpr(call.Occurrence)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, occurrence)
+		}
+
+		if call.ReturnOption != nil {
+			returnOption, err := ast.translateExpr(call.ReturnOption)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, returnOption)
+		}
+
+		if call.MatchType != nil {
+			matchType, err := ast.translateExpr(call.MatchType)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, matchType)
+		}
+
+		return &builtinRegexpInstr{
+			CallExpr: CallExpr{Arguments: args, Method: "REGEXP_INSTR"},
+		}, nil
+
+	case *sqlparser.RegexpSubstrExpr:
+		input, err := ast.translateExpr(call.Expr)
+		if err != nil {
+			return nil, err
+		}
+
+		pattern, err := ast.translateExpr(call.Pattern)
+		if err != nil {
+			return nil, err
+		}
+
+		args := []Expr{input, pattern}
+
+		if call.Position != nil {
+			position, err := ast.translateExpr(call.Position)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, position)
+		}
+
+		if call.Occurrence != nil {
+			occurrence, err := ast.translateExpr(call.Occurrence)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, occurrence)
+		}
+
+		if call.MatchType != nil {
+			matchType, err := ast.translateExpr(call.MatchType)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, matchType)
+		}
+
+		return &builtinRegexpSubstr{
+			CallExpr: CallExpr{Arguments: args, Method: "REGEXP_SUBSTR"},
+		}, nil
+
+	case *sqlparser.RegexpReplaceExpr:
+		input, err := ast.translateExpr(call.Expr)
+		if err != nil {
+			return nil, err
+		}
+
+		pattern, err := ast.translateExpr(call.Pattern)
+		if err != nil {
+			return nil, err
+		}
+
+		repl, err := ast.translateExpr(call.Repl)
+		if err != nil {
+			return nil, err
+		}
+
+		args := []Expr{input, pattern, repl}
+
+		if call.Position != nil {
+			position, err := ast.translateExpr(call.Position)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, position)
+		}
+
+		if call.Occurrence != nil {
+			occurrence, err := ast.translateExpr(call.Occurrence)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, occurrence)
+		}
+
+		if call.MatchType != nil {
+			matchType, err := ast.translateExpr(call.MatchType)
+			if err != nil {
+				return nil, err
+			}
+			args = append(args, matchType)
+		}
+
+		return &builtinRegexpReplace{
+			CallExpr: CallExpr{Arguments: args, Method: "REGEXP_REPLACE"},
+		}, nil
 	default:
 		return nil, translateExprNotSupported(call)
 	}
diff --git a/go/vt/vttablet/tabletmanager/vreplication/utils.go b/go/vt/vttablet/tabletmanager/vreplication/utils.go
index 02bcbb235be..1e26687e147 100644
--- a/go/vt/vttablet/tabletmanager/vreplication/utils.go
+++ b/go/vt/vttablet/tabletmanager/vreplication/utils.go
@@ -155,6 +155,26 @@ func isUnrecoverableError(err error) bool {
 		mysql.ERInvalidJSONTextInParams,
 		mysql.ERJSONDocumentTooDeep,
 		mysql.ERJSONValueTooBig,
+		mysql.ERRegexpError,
+		mysql.ERRegexpStringNotTerminated,
+		mysql.ERRegexpIllegalArgument,
+		mysql.ERRegexpIndexOutOfBounds,
+		mysql.ERRegexpInternal,
+		mysql.ERRegexpRuleSyntax,
+		mysql.ERRegexpBadEscapeSequence,
+		mysql.ERRegexpUnimplemented,
+		mysql.ERRegexpMismatchParen,
+		mysql.ERRegexpBadInterval,
+		mysql.ERRRegexpMaxLtMin,
+		mysql.ERRegexpInvalidBackRef,
+		mysql.ERRegexpLookBehindLimit,
+		mysql.ERRegexpMissingCloseBracket,
+		mysql.ERRegexpInvalidRange,
+		mysql.ERRegexpStackOverflow,
+		mysql.ERRegexpTimeOut,
+		mysql.ERRegexpPatternTooBig,
+		mysql.ERRegexpInvalidCaptureGroup,
+		mysql.ERRegexpInvalidFlag,
 		mysql.ERNoDefault,
 		mysql.ERNoDefaultForField,
 		mysql.ERNonUniq,