Skip to content

Commit

Permalink
cue: precompile regular expressions
Browse files Browse the repository at this point in the history
This can result in a pretty significant
performance gain.

Change-Id: Id77976eac1afb0e1ea461ae5c65a9a1785d346e3
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/2350
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
  • Loading branch information
mpvl committed Jun 26, 2019
1 parent fdd176c commit 6767a01
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 39 deletions.
9 changes: 5 additions & 4 deletions cue/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ func (v *astVisitor) walk(astNode ast.Node) (value value) {
list := &list{baseValue: newExpr(n), elem: s}
list.initLit()
if n.Ellipsis != token.NoPos || n.Type != nil {
list.len = newBound(list.baseValue, opGeq, intKind, list.len)
list.len = newBound(v.ctx(), list.baseValue, opGeq, intKind, list.len)
if n.Type != nil {
list.typ = v1.walk(n.Type)
}
Expand Down Expand Up @@ -313,7 +313,7 @@ func (v *astVisitor) walk(astNode ast.Node) (value value) {
// to rewrite it.

if name != "" {
yielder.key = &stringLit{newNode(x), name}
yielder.key = &stringLit{newNode(x), name, nil}
yielder.value = v.walk(field.Value)
}

Expand Down Expand Up @@ -553,6 +553,7 @@ func (v *astVisitor) walk(astNode ast.Node) (value value) {
case token.GEQ, token.GTR, token.LSS, token.LEQ,
token.NEQ, token.MAT, token.NMAT:
value = newBound(
v.ctx(),
newExpr(n),
tokenMap[n.Op],
topKind|nonGround,
Expand All @@ -574,12 +575,12 @@ func (v *astVisitor) walk(astNode ast.Node) (value value) {
value = d

default:
value = &binaryExpr{
value = updateBin(v.ctx(), &binaryExpr{
newExpr(n),
tokenMap[n.Op], // op
v.walk(n.X), // left
v.walk(n.Y), // right
}
})
}

case *ast.CommentGroup:
Expand Down
34 changes: 21 additions & 13 deletions cue/binop.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ func (x *bound) binOp(ctx *context, src source, op op, other evaluated) evaluate
if k == x.k {
return x
}
return newBound(newSrc.base(), x.op, k, xv)
return newBound(ctx, newSrc.base(), x.op, k, xv)

case *bound:
yv := y.value.(evaluated)
Expand Down Expand Up @@ -718,25 +718,33 @@ func (x *stringLit) binOp(ctx *context, src source, op op, other evaluated) eval
return cmpTonode(src, op, strings.Compare(x.str, str))
case opAdd:
src := binSrc(src.Pos(), op, x, other)
return &stringLit{src, x.str + str}
return &stringLit{src, x.str + str, nil}
case opMat:
b, err := regexp.MatchString(str, x.str)
if err != nil {
return ctx.mkErr(src, "error parsing regexp: %v", err)
if y.re == nil {
// This really should not happen, but leave in for safety.
b, err := regexp.MatchString(str, x.str)
if err != nil {
return ctx.mkErr(src, "error parsing regexp: %v", err)
}
return boolTonode(src, b)
}
return boolTonode(src, b)
return boolTonode(src, y.re.MatchString(x.str))
case opNMat:
b, err := regexp.MatchString(str, x.str)
if err != nil {
return ctx.mkErr(src, "error parsing regexp: %v", err)
if y.re == nil {
// This really should not happen, but leave in for safety.
b, err := regexp.MatchString(str, x.str)
if err != nil {
return ctx.mkErr(src, "error parsing regexp: %v", err)
}
return boolTonode(src, !b)
}
return boolTonode(src, !b)
return boolTonode(src, !y.re.MatchString(x.str))
}
case *numLit:
switch op {
case opMul:
src := binSrc(src.Pos(), op, x, other)
return &stringLit{src, strings.Repeat(x.str, y.intValue(ctx))}
return &stringLit{src, strings.Repeat(x.str, y.intValue(ctx)), nil}
}
}
return ctx.mkIncompatible(src, op, x, other)
Expand All @@ -762,14 +770,14 @@ func (x *bytesLit) binOp(ctx *context, src source, op op, other evaluated) evalu
case opAdd:
copy := append([]byte(nil), x.b...)
copy = append(copy, b...)
return &bytesLit{binSrc(src.Pos(), op, x, other), copy}
return &bytesLit{binSrc(src.Pos(), op, x, other), copy, nil}
}

case *numLit:
switch op {
case opMul:
src := binSrc(src.Pos(), op, x, other)
return &bytesLit{src, bytes.Repeat(x.b, y.intValue(ctx))}
return &bytesLit{src, bytes.Repeat(x.b, y.intValue(ctx)), nil}
}
}
return ctx.mkIncompatible(src, op, x, other)
Expand Down
4 changes: 2 additions & 2 deletions cue/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ func (x *bound) evalPartial(ctx *context) (result evaluated) {
if v == x.value {
return x
}
return newBound(x.baseValue, x.op, x.k, v)
return newBound(ctx, x.baseValue, x.op, x.k, v)
}

func (x *interpolation) evalPartial(ctx *context) (result evaluated) {
Expand All @@ -214,7 +214,7 @@ func (x *interpolation) evalPartial(ctx *context) (result evaluated) {
}
}
}
return &stringLit{x.baseValue, buf.String()}
return &stringLit{x.baseValue, buf.String(), nil}
}

func (x *list) evalPartial(ctx *context) (result evaluated) {
Expand Down
4 changes: 2 additions & 2 deletions cue/go.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,9 @@ func convert(ctx *context, src source, x interface{}) evaluated {
case bool:
return &boolLit{src.base(), v}
case string:
return &stringLit{src.base(), v}
return &stringLit{src.base(), v, nil}
case []byte:
return &bytesLit{src.base(), v}
return &bytesLit{src.base(), v, nil}
case int:
return toInt(ctx, src, int64(v))
case int8:
Expand Down
4 changes: 2 additions & 2 deletions cue/lit.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,9 @@ func parseString(ctx *context, node ast.Expr, q literal.QuoteInfo, s string) (n
return ctx.mkErr(src, err, "invalid string: %v", err)
}
if q.IsDouble() {
return &stringLit{src, str}
return &stringLit{src, str, nil}
}
return &bytesLit{src, []byte(str)}
return &bytesLit{src, []byte(str), nil}
}

func (p *litParser) digitVal(ch byte) (d int) {
Expand Down
4 changes: 2 additions & 2 deletions cue/rewrite.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func (x *bound) rewrite(ctx *context, fn rewriteFunc) value {
if v == x.value {
return x
}
return newBound(x.baseValue, x.op, x.k, v)
return newBound(ctx, x.baseValue, x.op, x.k, v)
}

func (x *interpolation) rewrite(ctx *context, fn rewriteFunc) value {
Expand Down Expand Up @@ -182,7 +182,7 @@ func (x *binaryExpr) rewrite(ctx *context, fn rewriteFunc) value {
if left == x.left && right == x.right {
return x
}
return &binaryExpr{x.baseValue, x.op, left, right}
return updateBin(ctx, &binaryExpr{x.baseValue, x.op, left, right})
}

func (x *unification) rewrite(ctx *context, fn rewriteFunc) value {
Expand Down
3 changes: 2 additions & 1 deletion cue/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,7 @@ func (v Value) Template() func(label string) Value {
return nil
}
return func(label string) Value {
arg := &stringLit{x.baseValue, label}
arg := &stringLit{x.baseValue, label, nil}
y := fn.call(ctx, x, arg)
return newValueRoot(ctx, y)
}
Expand Down Expand Up @@ -1578,6 +1578,7 @@ func (v Value) Expr() (Op, []Value) {
a = append(a, remakeValue(v, &stringLit{
x.baseValue,
v.ctx().labelStr(x.feature),
nil,
}))
op = SelectorOp
case *indexExpr:
Expand Down
68 changes: 55 additions & 13 deletions cue/value.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package cue

import (
"math/big"
"regexp"
"sort"
"strconv"
"time"
Expand Down Expand Up @@ -195,8 +196,8 @@ func boolTonode(src source, b bool) evaluated {

type bytesLit struct {
baseValue
b []byte
// TODO: maintain extended grapheme index cache.
b []byte
re *regexp.Regexp // only set if needed
}

func (x *bytesLit) kind() kind { return bytesKind }
Expand Down Expand Up @@ -243,12 +244,13 @@ func (x *bytesLit) slice(ctx *context, lo, hi *numLit) evaluated {
if len(x.b) < hix {
return ctx.mkErr(hi, "slice bounds out of range")
}
return &bytesLit{x.baseValue, x.b[lox:hix]}
return &bytesLit{x.baseValue, x.b[lox:hix], nil}
}

type stringLit struct {
baseValue
str string
re *regexp.Regexp // only set if needed

// TODO: maintain extended grapheme index cache.
}
Expand All @@ -271,7 +273,7 @@ func (x *stringLit) at(ctx *context, i int) evaluated {
return ctx.mkErr(x, "index %d out of bounds", i)
}
// TODO: this is incorrect.
return &stringLit{x.baseValue, string(runes[i : i+1])}
return &stringLit{x.baseValue, string(runes[i : i+1]), nil}
}
func (x *stringLit) len() int { return len([]rune(x.str)) }

Expand All @@ -297,7 +299,7 @@ func (x *stringLit) slice(ctx *context, lo, hi *numLit) evaluated {
if len(runes) < hix {
return ctx.mkErr(hi, "slice bounds out of range")
}
return &stringLit{x.baseValue, string(runes[lox:hix])}
return &stringLit{x.baseValue, string(runes[lox:hix]), nil}
}

type numBase struct {
Expand Down Expand Up @@ -391,13 +393,19 @@ type bound struct {
value value
}

func newBound(base baseValue, op op, k kind, v value) *bound {
func newBound(ctx *context, base baseValue, op op, k kind, v value) evaluated {
kv := v.kind()
if kv.isAnyOf(numKind) {
kv |= numKind
} else if op == opNeq && kv&atomKind == nullKind {
kv = typeKinds &^ nullKind
}
if op == opMat || op == opNMat {
v = compileRegexp(ctx, v)
if isBottom(v) {
return v.(*bottom)
}
}
return &bound{base, op, unifyType(k&topKind, kv) | nonGround, v}
}

Expand All @@ -406,8 +414,8 @@ func (x *bound) kind() kind {
}

func mkIntRange(a, b string) evaluated {
from := newBound(baseValue{}, opGeq, intKind, parseInt(intKind, a))
to := newBound(baseValue{}, opLeq, intKind, parseInt(intKind, b))
from := newBound(nil, baseValue{}, opGeq, intKind, parseInt(intKind, a))
to := newBound(nil, baseValue{}, opLeq, intKind, parseInt(intKind, b))
e := &unification{
binSrc(token.NoPos, opUnify, from, to),
[]evaluated{from, to},
Expand All @@ -422,8 +430,8 @@ func mkIntRange(a, b string) evaluated {
}

func mkFloatRange(a, b string) evaluated {
from := newBound(baseValue{}, opGeq, numKind, parseFloat(a))
to := newBound(baseValue{}, opLeq, numKind, parseFloat(b))
from := newBound(nil, baseValue{}, opGeq, numKind, parseFloat(a))
to := newBound(nil, baseValue{}, opLeq, numKind, parseFloat(b))
e := &unification{
binSrc(token.NoPos, opUnify, from, to),
[]evaluated{from, to},
Expand All @@ -449,7 +457,7 @@ var predefinedRanges = map[string]evaluated{

// Do not include an alias for "byte", as it would be too easily confused
// with the builtin "bytes".
"uint": newBound(baseValue{}, opGeq, intKind, parseInt(intKind, "0")),
"uint": newBound(nil, baseValue{}, opGeq, intKind, parseInt(intKind, "0")),
"uint8": mkIntRange("0", "255"),
"uint16": mkIntRange("0", "65535"),
"uint32": mkIntRange("0", "4294967295"),
Expand Down Expand Up @@ -810,7 +818,7 @@ func (x *structLit) applyTemplate(ctx *context, i int, v evaluated) (evaluated,
return err, nil
}
name := ctx.labelStr(x.arcs[i].feature)
arg := &stringLit{x.baseValue, name}
arg := &stringLit{x.baseValue, name, nil}
w := fn.call(ctx, x, arg).evalPartial(ctx)
v = binOp(ctx, x, opUnify, v, w)

Expand Down Expand Up @@ -1086,6 +1094,27 @@ type unaryExpr struct {

func (x *unaryExpr) kind() kind { return x.x.kind() }

func compileRegexp(ctx *context, v value) value {
var err error
switch x := v.(type) {
case *stringLit:
if x.re == nil {
x.re, err = regexp.Compile(x.str)
if err != nil {
return ctx.mkErr(v, "could not compile regular expression %q: %v", x.str, err)
}
}
case *bytesLit:
if x.re == nil {
x.re, err = regexp.Compile(string(x.b))
if err != nil {
return ctx.mkErr(v, "could not compile regular expression %q: %v", x.b, err)
}
}
}
return v
}

type binaryExpr struct {
baseValue
op op
Expand Down Expand Up @@ -1115,7 +1144,19 @@ func mkBin(ctx *context, pos token.Pos, op op, left, right value) value {
// return left
// }
}
return &binaryExpr{binSrc(pos, op, left, right), op, left, right}
bin := &binaryExpr{binSrc(pos, op, left, right), op, left, right}
return updateBin(ctx, bin)
}

func updateBin(ctx *context, bin *binaryExpr) value {
switch bin.op {
case opMat, opNMat:
bin.right = compileRegexp(ctx, bin.right)
if isBottom(bin.right) {
return bin.right
}
}
return bin
}

func (x *binaryExpr) kind() kind {
Expand Down Expand Up @@ -1357,6 +1398,7 @@ func (x *feed) yield(ctx *context, yfn yieldFunc) (result evaluated) {
key := &stringLit{
x.baseValue,
ctx.labelStr(a.feature),
nil,
}
val := src.at(ctx, i)
v := fn.call(ctx, x, key, val)
Expand Down

0 comments on commit 6767a01

Please sign in to comment.