mongodb · blink1073 · Sep 12, 2023 · Jul 7, 2023 · Sep 5, 2023 · Sep 5, 2023
diff --git a/bson/bsoncodec/slice_codec.go b/bson/bsoncodec/slice_codec.go
@@ -62,7 +62,7 @@ func (sc SliceCodec) EncodeValue(ec EncodeContext, vw bsonrw.ValueWriter, val re
 	}
 
 	// If we have a []primitive.E we want to treat it as a document instead of as an array.
-	if val.Type().ConvertibleTo(tD) {
+	if val.Type() == tD || val.Type().ConvertibleTo(tD) {
 		d := val.Convert(tD).Interface().(primitive.D)
 
 		dw, err := vw.WriteDocument()

diff --git a/bson/bsoncodec/struct_codec.go b/bson/bsoncodec/struct_codec.go
@@ -190,15 +190,14 @@ func (sc *StructCodec) EncodeValue(ec EncodeContext, vw bsonrw.ValueWriter, val
 		encoder := desc.encoder
 
 		var zero bool
-		rvInterface := rv.Interface()
 		if cz, ok := encoder.(CodecZeroer); ok {
-			zero = cz.IsTypeZero(rvInterface)
+			zero = cz.IsTypeZero(rv.Interface())
 		} else if rv.Kind() == reflect.Interface {
 			// isZero will not treat an interface rv as an interface, so we need to check for the
 			// zero interface separately.
 			zero = rv.IsNil()
 		} else {
-			zero = isZero(rvInterface, sc.EncodeOmitDefaultStruct || ec.omitZeroStruct)
+			zero = isZero(rv, sc.EncodeOmitDefaultStruct || ec.omitZeroStruct)
 		}
 		if desc.omitEmpty && zero {
 			continue
@@ -392,56 +391,32 @@ func (sc *StructCodec) DecodeValue(dc DecodeContext, vr bsonrw.ValueReader, val
 	return nil
 }
 
-func isZero(i interface{}, omitZeroStruct bool) bool {
-	v := reflect.ValueOf(i)
-
-	// check the value validity
-	if !v.IsValid() {
-		return true
+func isZero(v reflect.Value, omitZeroStruct bool) bool {
+	kind := v.Kind()
+	if (kind != reflect.Ptr || !v.IsNil()) && v.Type().Implements(tZeroer) {
+		return v.Interface().(Zeroer).IsZero()
 	}
-
-	if z, ok := v.Interface().(Zeroer); ok && (v.Kind() != reflect.Ptr || !v.IsNil()) {
-		return z.IsZero()
-	}
-
-	switch v.Kind() {
-	case reflect.Array, reflect.Map, reflect.Slice, reflect.String:
-		return v.Len() == 0
-	case reflect.Bool:
-		return !v.Bool()
-	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
-		return v.Int() == 0
-	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
-		return v.Uint() == 0
-	case reflect.Float32, reflect.Float64:
-		return v.Float() == 0
-	case reflect.Interface, reflect.Ptr:
-		return v.IsNil()
-	case reflect.Struct:
+	if kind == reflect.Struct {
 		if !omitZeroStruct {
 			return false
 		}
-
-		// TODO(GODRIVER-2820): Update the logic to be able to handle private struct fields.
-		// TODO Use condition "reflect.Zero(v.Type()).Equal(v)" instead.
-
 		vt := v.Type()
 		if vt == tTime {
 			return v.Interface().(time.Time).IsZero()
 		}
-		for i := 0; i < v.NumField(); i++ {
-			if vt.Field(i).PkgPath != "" && !vt.Field(i).Anonymous {
+		numField := vt.NumField()
+		for i := 0; i < numField; i++ {
+			ff := vt.Field(i)
+			if ff.PkgPath != "" && !ff.Anonymous {
 				continue // Private field
 			}
-			fld := v.Field(i)
-			if !isZero(fld.Interface(), omitZeroStruct) {
+			if !isZero(v.Field(i), omitZeroStruct) {
 				return false
 			}
 		}
 		return true
 	}
-
-	return false
+	return !v.IsValid() || v.IsZero()
 }
 
 type structDescription struct {
@@ -708,21 +683,21 @@ func getInlineField(val reflect.Value, index []int) (reflect.Value, error) {
 
 // DeepZero returns recursive zero object
 func deepZero(st reflect.Type) (result reflect.Value) {
-	result = reflect.Indirect(reflect.New(st))
-
-	if result.Kind() == reflect.Struct {
-		for i := 0; i < result.NumField(); i++ {
-			if f := result.Field(i); f.Kind() == reflect.Ptr {
-				if f.CanInterface() {
-					if ft := reflect.TypeOf(f.Interface()); ft.Elem().Kind() == reflect.Struct {
-						result.Field(i).Set(recursivePointerTo(deepZero(ft.Elem())))
-					}
+	if st.Kind() == reflect.Struct {
+		numField := st.NumField()
+		for i := 0; i < numField; i++ {
+			if result == emptyValue {
+				result = reflect.Indirect(reflect.New(st))
+			}
+			f := result.Field(i)
+			if f.CanInterface() {
+				if f.Type().Kind() == reflect.Struct {
+					result.Field(i).Set(recursivePointerTo(deepZero(f.Type().Elem())))
 				}
 			}
 		}
 	}
-
-	return
+	return result
 }
 
 // recursivePointerTo calls reflect.New(v.Type) but recursively for its fields inside

diff --git a/bson/bsoncodec/struct_codec_test.go b/bson/bsoncodec/struct_codec_test.go
@@ -7,6 +7,7 @@
 package bsoncodec
 
 import (
+	"reflect"
 	"testing"
 	"time"
 
@@ -147,7 +148,7 @@ func TestIsZero(t *testing.T) {
 		t.Run(tc.description, func(t *testing.T) {
 			t.Parallel()
 
-			got := isZero(tc.value, tc.omitZeroStruct)
+			got := isZero(reflect.ValueOf(tc.value), tc.omitZeroStruct)
 			assert.Equal(t, tc.want, got, "expected and actual isZero return are different")
 		})
 	}

diff --git a/bson/bsoncodec/types.go b/bson/bsoncodec/types.go
@@ -34,6 +34,7 @@ var tValueUnmarshaler = reflect.TypeOf((*ValueUnmarshaler)(nil)).Elem()
 var tMarshaler = reflect.TypeOf((*Marshaler)(nil)).Elem()
 var tUnmarshaler = reflect.TypeOf((*Unmarshaler)(nil)).Elem()
 var tProxy = reflect.TypeOf((*Proxy)(nil)).Elem()
+var tZeroer = reflect.TypeOf((*Zeroer)(nil)).Elem()
 
 var tBinary = reflect.TypeOf(primitive.Binary{})
 var tUndefined = reflect.TypeOf(primitive.Undefined{})

diff --git a/bson/bsonrw/copier.go b/bson/bsonrw/copier.go
@@ -193,7 +193,7 @@ func (c Copier) AppendDocumentBytes(dst []byte, src ValueReader) ([]byte, error)
 	}
 
 	vw := vwPool.Get().(*valueWriter)
-	defer vwPool.Put(vw)
+	defer putValueWriter(vw)
 
 	vw.reset(dst)
 
@@ -213,7 +213,7 @@ func (c Copier) AppendArrayBytes(dst []byte, src ValueReader) ([]byte, error) {
 	}
 
 	vw := vwPool.Get().(*valueWriter)
-	defer vwPool.Put(vw)
+	defer putValueWriter(vw)
 
 	vw.reset(dst)
 
@@ -258,7 +258,7 @@ func (c Copier) AppendValueBytes(dst []byte, src ValueReader) (bsontype.Type, []
 	}
 
 	vw := vwPool.Get().(*valueWriter)
-	defer vwPool.Put(vw)
+	defer putValueWriter(vw)
 
 	start := len(dst)
 

diff --git a/bson/bsonrw/value_reader.go b/bson/bsonrw/value_reader.go
@@ -739,8 +739,7 @@ func (vr *valueReader) ReadValue() (ValueReader, error) {
 		return nil, ErrEOA
 	}
 
-	_, err = vr.readCString()
-	if err != nil {
+	if err := vr.skipCString(); err != nil {
 		return nil, err
 	}
 
@@ -794,6 +793,15 @@ func (vr *valueReader) readByte() (byte, error) {
 	return vr.d[vr.offset-1], nil
 }
 
+func (vr *valueReader) skipCString() error {
+	idx := bytes.IndexByte(vr.d[vr.offset:], 0x00)
+	if idx < 0 {
+		return io.EOF
+	}
+	vr.offset += int64(idx) + 1
+	return nil
+}
+
 func (vr *valueReader) readCString() (string, error) {
 	idx := bytes.IndexByte(vr.d[vr.offset:], 0x00)
 	if idx < 0 {

diff --git a/bson/bsonrw/value_writer.go b/bson/bsonrw/value_writer.go
@@ -28,6 +28,13 @@ var vwPool = sync.Pool{
 	},
 }
 
+func putValueWriter(vw *valueWriter) {
+	if vw != nil {
+		vw.w = nil // don't leak the writer
+		vwPool.Put(vw)
+	}
+}
+
 // BSONValueWriterPool is a pool for BSON ValueWriters.
 //
 // Deprecated: BSONValueWriterPool will not be supported in Go Driver 2.0.
@@ -149,32 +156,21 @@ type valueWriter struct {
 }
 
 func (vw *valueWriter) advanceFrame() {
-	if vw.frame+1 >= int64(len(vw.stack)) { // We need to grow the stack
-		length := len(vw.stack)
-		if length+1 >= cap(vw.stack) {
-			// double it
-			buf := make([]vwState, 2*cap(vw.stack)+1)
-			copy(buf, vw.stack)
-			vw.stack = buf
-		}
-		vw.stack = vw.stack[:length+1]
-	}
 	vw.frame++
+	if vw.frame >= int64(len(vw.stack)) {
+		vw.stack = append(vw.stack, vwState{})
+	}
 }
 
 func (vw *valueWriter) push(m mode) {
 	vw.advanceFrame()
 
 	// Clean the stack
-	vw.stack[vw.frame].mode = m
-	vw.stack[vw.frame].key = ""
-	vw.stack[vw.frame].arrkey = 0
-	vw.stack[vw.frame].start = 0
+	vw.stack[vw.frame] = vwState{mode: m}
 
-	vw.stack[vw.frame].mode = m
 	switch m {
 	case mDocument, mArray, mCodeWithScope:
-		vw.reserveLength()
+		vw.reserveLength() // WARN: this is not needed
 	}
 }
 
@@ -213,6 +209,7 @@ func newValueWriter(w io.Writer) *valueWriter {
 	return vw
 }
 
+// TODO: only used in tests
 func newValueWriterFromSlice(buf []byte) *valueWriter {
 	vw := new(valueWriter)
 	stack := make([]vwState, 1, 5)
@@ -249,17 +246,16 @@ func (vw *valueWriter) invalidTransitionError(destination mode, name string, mod
 }
 
 func (vw *valueWriter) writeElementHeader(t bsontype.Type, destination mode, callerName string, addmodes ...mode) error {
-	switch vw.stack[vw.frame].mode {
+	frame := &vw.stack[vw.frame]
+	switch frame.mode {
 	case mElement:
-		key := vw.stack[vw.frame].key
+		key := frame.key
 		if !isValidCString(key) {
 			return errors.New("BSON element key cannot contain null bytes")
 		}
-
-		vw.buf = bsoncore.AppendHeader(vw.buf, t, key)
+		vw.appendHeader(t, key)
 	case mValue:
-		// TODO: Do this with a cache of the first 1000 or so array keys.
-		vw.buf = bsoncore.AppendHeader(vw.buf, t, strconv.Itoa(vw.stack[vw.frame].arrkey))
+		vw.appendIntHeader(t, frame.arrkey)
 	default:
 		modes := []mode{mElement, mValue}
 		if addmodes != nil {
@@ -601,9 +597,11 @@ func (vw *valueWriter) writeLength() error {
 	if length > maxSize {
 		return errMaxDocumentSizeExceeded{size: int64(len(vw.buf))}
 	}
-	length = length - int(vw.stack[vw.frame].start)
-	start := vw.stack[vw.frame].start
+	frame := &vw.stack[vw.frame]
+	length = length - int(frame.start)
+	start := frame.start
 
+	_ = vw.buf[start+3] // BCE
 	vw.buf[start+0] = byte(length)
 	vw.buf[start+1] = byte(length >> 8)
 	vw.buf[start+2] = byte(length >> 16)
@@ -612,5 +610,31 @@ func (vw *valueWriter) writeLength() error {
 }
 
 func isValidCString(cs string) bool {
-	return !strings.ContainsRune(cs, '\x00')
+	// Disallow the zero byte in a cstring because the zero byte is used as the
+	// terminating character.
+	//
+	// It's safe to check bytes instead of runes because all multibyte UTF-8
+	// code points start with (binary) 11xxxxxx or 10xxxxxx, so 00000000 (i.e.
+	// 0) will never be part of a multibyte UTF-8 code point. This logic is the
+	// same as the "r < utf8.RuneSelf" case in strings.IndexRune but can be
+	// inlined.
+	//
+	// https://cs.opensource.google/go/go/+/refs/tags/go1.21.1:src/strings/strings.go;l=127
+	return strings.IndexByte(cs, 0) == -1
-	return strings.IndexByte(cs, 0) == -1
+	// Disallow the zero byte in a cstring because the zero byte is used as the
+	// terminating character. It's safe to check bytes instead of runes because
+	// all multibyte UTF-8 code points start with "11xxxxxx" or "10xxxxxx", so
+	// "00000000" will never be part of a multibyte UTF-8 code point.
+	return strings.IndexByte(cs, 0) == -1
-	return strings.IndexByte(cs, 0) == -1
+	// Disallow the zero byte in a cstring because the zero byte is used as the
+	// terminating character. It's safe to check bytes instead of runes because
+	// all multibyte UTF-8 code points start with "11xxxxxx" or "10xxxxxx", so
+	// "00000000" will never be part of a multibyte UTF-8 code point.
+	return strings.IndexByte(cs, 0) == -1
+}
+
+// appendHeader is the same as bsoncore.AppendHeader but does not check if the
+// key is a valid C string since the caller has already checked for that.
+//
+// The caller of this function must check if key is a valid C string.
+func (vw *valueWriter) appendHeader(t bsontype.Type, key string) {
+	vw.buf = bsoncore.AppendType(vw.buf, t)
+	vw.buf = append(vw.buf, key...)
+	vw.buf = append(vw.buf, 0x00)
+}
+
+func (vw *valueWriter) appendIntHeader(t bsontype.Type, key int) {
+	vw.buf = bsoncore.AppendType(vw.buf, t)
+	vw.buf = strconv.AppendInt(vw.buf, int64(key), 10)
+	vw.buf = append(vw.buf, 0x00)
 }
diff --git a/bson/marshal.go b/bson/marshal.go
@@ -9,6 +9,7 @@ package bson
 import (
 	"bytes"
 	"encoding/json"
+	"sync"
 
 	"go.mongodb.org/mongo-driver/bson/bsoncodec"
 	"go.mongodb.org/mongo-driver/bson/bsonrw"
@@ -141,6 +142,13 @@ func MarshalAppendWithRegistry(r *bsoncodec.Registry, dst []byte, val interface{
 	return MarshalAppendWithContext(bsoncodec.EncodeContext{Registry: r}, dst, val)
 }
 
+// Pool of buffers for marshalling BSON.
+var bufPool = sync.Pool{
+	New: func() interface{} {
+		return new(bytes.Buffer)
+	},
+}
+
 // MarshalAppendWithContext will encode val as a BSON document using Registry r and EncodeContext ec and append the
 // bytes to dst. If dst is not large enough to hold the bytes, it will be grown. If val is not a type that can be
 // transformed into a document, MarshalValueAppendWithContext should be used instead.
@@ -162,8 +170,26 @@ func MarshalAppendWithRegistry(r *bsoncodec.Registry, dst []byte, val interface{
 //
 // See [Encoder] for more examples.
 func MarshalAppendWithContext(ec bsoncodec.EncodeContext, dst []byte, val interface{}) ([]byte, error) {
-	sw := new(bsonrw.SliceWriter)
-	*sw = dst
+	sw := bufPool.Get().(*bytes.Buffer)
+	defer func() {
+		// Proper usage of a sync.Pool requires each entry to have approximately
+		// the same memory cost. To obtain this property when the stored type
+		// contains a variably-sized buffer, we add a hard limit on the maximum
+		// buffer to place back in the pool. We limit the size to 16MiB because
+		// that's the maximum wire message size supported by any current MongoDB
+		// server.
+		//
+		// Comment based on
+		// https://cs.opensource.google/go/go/+/refs/tags/go1.19:src/fmt/print.go;l=147
+		//
+		// Recycle byte slices that are smaller than 16MiB and at least half
+		// occupied.
+		if sw.Cap() < 16*1024*1024 && sw.Cap()/2 < sw.Len() {
+			bufPool.Put(sw)
+		}
+	}()
+
+	sw.Reset()
 	vw := bvwPool.Get(sw)
 	defer bvwPool.Put(vw)
 
@@ -184,7 +210,7 @@ func MarshalAppendWithContext(ec bsoncodec.EncodeContext, dst []byte, val interf
 		return nil, err
 	}
 
-	return *sw, nil
+	return append(dst, sw.Bytes()...), nil
 }
 
 // MarshalValue returns the BSON encoding of val.