mongodb · blink1073 · Sep 12, 2023 · Jul 7, 2023 · Sep 5, 2023 · Sep 5, 2023
diff --git a/bson/bsonrw/value_reader.go b/bson/bsonrw/value_reader.go
@@ -739,7 +739,7 @@ func (vr *valueReader) ReadValue() (ValueReader, error) {
 		return nil, ErrEOA
 	}
 
-	if err := vr.consumeCString(); err != nil {
+	if err := vr.skipCString(); err != nil {
 		return nil, err
 	}
 
@@ -793,7 +793,7 @@ func (vr *valueReader) readByte() (byte, error) {
 	return vr.d[vr.offset-1], nil
 }
 
-func (vr *valueReader) consumeCString() error {
+func (vr *valueReader) skipCString() error {
 	idx := bytes.IndexByte(vr.d[vr.offset:], 0x00)
 	if idx < 0 {
 		return io.EOF

diff --git a/bson/bsonrw/value_writer.go b/bson/bsonrw/value_writer.go
@@ -255,7 +255,6 @@ func (vw *valueWriter) writeElementHeader(t bsontype.Type, destination mode, cal
 		}
 		vw.appendHeader(t, key)
 	case mValue:
-		// TODO: Do this with a cache of the first 1000 or so array keys.
 		vw.appendIntHeader(t, frame.arrkey)
 	default:
 		modes := []mode{mElement, mValue}
@@ -611,6 +610,12 @@ func (vw *valueWriter) writeLength() error {
 }
 
 func isValidCString(cs string) bool {
+	// Disallow the zero byte in a cstring because the zero byte is used as the
+	// terminating character. It's safe to check bytes instead of runes because
+	// all multibyte UTF-8 code points start with "11xxxxxx" or "10xxxxxx", so
+	// "00000000" will never be part of a multibyte UTF-8 code point.
+	//
+	// See https://en.wikipedia.org/wiki/UTF-8#Encoding for details.
 	return strings.IndexByte(cs, 0) == -1
-	return strings.IndexByte(cs, 0) == -1
+	// Disallow the zero byte in a cstring because the zero byte is used as the
+	// terminating character. It's safe to check bytes instead of runes because
+	// all multibyte UTF-8 code points start with "11xxxxxx" or "10xxxxxx", so
+	// "00000000" will never be part of a multibyte UTF-8 code point.
+	return strings.IndexByte(cs, 0) == -1
-	return strings.IndexByte(cs, 0) == -1
+	// Disallow the zero byte in a cstring because the zero byte is used as the
+	// terminating character. It's safe to check bytes instead of runes because
+	// all multibyte UTF-8 code points start with "11xxxxxx" or "10xxxxxx", so
+	// "00000000" will never be part of a multibyte UTF-8 code point.
+	return strings.IndexByte(cs, 0) == -1
 }
 

diff --git a/bson/marshal.go b/bson/marshal.go
@@ -171,7 +171,24 @@ var bufPool = sync.Pool{
 // See [Encoder] for more examples.
 func MarshalAppendWithContext(ec bsoncodec.EncodeContext, dst []byte, val interface{}) ([]byte, error) {
 	sw := bufPool.Get().(*bytes.Buffer)
-	defer bufPool.Put(sw)
+	defer func() {
+		// Proper usage of a sync.Pool requires each entry to have approximately
+		// the same memory cost. To obtain this property when the stored type
+		// contains a variably-sized buffer, we add a hard limit on the maximum
+		// buffer to place back in the pool. We limit the size to 16MiB because
+		// that's the maximum wire message size supported by any current MongoDB
+		// server.
+		//
+		// Comment based on
+		// https://cs.opensource.google/go/go/+/refs/tags/go1.19:src/fmt/print.go;l=147
+		//
+		// Recycle byte slices that are smaller than 16MiB and at least half
+		// occupied.
+		if sw.Cap() < 16*1024*1024 && sw.Cap()/2 < sw.Len() {
+			bufPool.Put(sw)
+		}
+	}()
+
 	sw.Reset()
 	vw := bvwPool.Get(sw)
 	defer bvwPool.Put(vw)