From 4bcbc290c45a64ae40b762a9d8a4c98bc24c4d30 Mon Sep 17 00:00:00 2001 From: Bailey Pearson Date: Wed, 4 Sep 2024 09:07:54 -0600 Subject: [PATCH 1/6] fix(NODE-6355): respect utf8 validation options when iterating cursors (#4214) --- src/cmap/wire_protocol/on_demand/document.ts | 21 +- src/cmap/wire_protocol/responses.ts | 19 -- .../bson-options/utf8_validation.test.ts | 188 +++++++++++++++++- .../unit/cmap/wire_protocol/responses.test.ts | 85 +++++--- 4 files changed, 261 insertions(+), 52 deletions(-) diff --git a/src/cmap/wire_protocol/on_demand/document.ts b/src/cmap/wire_protocol/on_demand/document.ts index 944916f10b..67f5b3a091 100644 --- a/src/cmap/wire_protocol/on_demand/document.ts +++ b/src/cmap/wire_protocol/on_demand/document.ts @@ -1,15 +1,16 @@ import { Binary, - BSON, type BSONElement, BSONError, type BSONSerializeOptions, BSONType, + deserialize, getBigInt64LE, getFloat64LE, getInt32LE, ObjectId, parseToElementsToArray, + pluckBSONSerializeOptions, Timestamp, toUTF8 } from '../../../bson'; @@ -330,11 +331,23 @@ export class OnDemandDocument { * @param options - BSON deserialization options */ public toObject(options?: BSONSerializeOptions): Record { - return BSON.deserialize(this.bson, { - ...options, + const exactBSONOptions = { + ...pluckBSONSerializeOptions(options ?? {}), + validation: this.parseBsonSerializationOptions(options), index: this.offset, allowObjectSmallerThanBufferSize: true - }); + }; + return deserialize(this.bson, exactBSONOptions); + } + + private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): { + utf8: { writeErrors: false } | false; + } { + const enableUtf8Validation = options?.enableUtf8Validation; + if (enableUtf8Validation === false) { + return { utf8: false }; + } + return { utf8: { writeErrors: false } }; } /** Returns this document's bytes only */ diff --git a/src/cmap/wire_protocol/responses.ts b/src/cmap/wire_protocol/responses.ts index 0ef048e8da..9837634bfb 100644 --- a/src/cmap/wire_protocol/responses.ts +++ b/src/cmap/wire_protocol/responses.ts @@ -5,7 +5,6 @@ import { type Document, Long, parseToElementsToArray, - pluckBSONSerializeOptions, type Timestamp } from '../../bson'; import { MongoUnexpectedServerResponseError } from '../../error'; @@ -166,24 +165,6 @@ export class MongoDBResponse extends OnDemandDocument { } return this.clusterTime ?? null; } - - public override toObject(options?: BSONSerializeOptions): Record { - const exactBSONOptions = { - ...pluckBSONSerializeOptions(options ?? {}), - validation: this.parseBsonSerializationOptions(options) - }; - return super.toObject(exactBSONOptions); - } - - private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): { - utf8: { writeErrors: false } | false; - } { - const enableUtf8Validation = options?.enableUtf8Validation; - if (enableUtf8Validation === false) { - return { utf8: false }; - } - return { utf8: { writeErrors: false } }; - } } /** @internal */ diff --git a/test/integration/node-specific/bson-options/utf8_validation.test.ts b/test/integration/node-specific/bson-options/utf8_validation.test.ts index 5c3f94e7fb..d6345a884d 100644 --- a/test/integration/node-specific/bson-options/utf8_validation.test.ts +++ b/test/integration/node-specific/bson-options/utf8_validation.test.ts @@ -1,11 +1,16 @@ import { expect } from 'chai'; +import * as net from 'net'; import * as sinon from 'sinon'; +import { inspect } from 'util'; import { BSON, + BSONError, + type Collection, + deserialize, type MongoClient, - MongoDBResponse, MongoServerError, + OnDemandDocument, OpMsgResponse } from '../../../mongodb'; @@ -23,12 +28,12 @@ describe('class MongoDBResponse', () => { let bsonSpy: sinon.SinonSpy; beforeEach(() => { - bsonSpy = sinon.spy(MongoDBResponse.prototype, 'parseBsonSerializationOptions'); + // @ts-expect-error private function + bsonSpy = sinon.spy(OnDemandDocument.prototype, 'parseBsonSerializationOptions'); }); afterEach(() => { bsonSpy?.restore(); - // @ts-expect-error: Allow this to be garbage collected bsonSpy = null; }); @@ -153,3 +158,180 @@ describe('class MongoDBResponse', () => { } ); }); + +describe('utf8 validation with cursors', function () { + let client: MongoClient; + let collection: Collection; + + /** + * Inserts a document with malformed utf8 bytes. This method spies on socket.write, and then waits + * for an OP_MSG payload corresponding to `collection.insertOne({ field: 'é' })`, and then modifies the + * bytes of the character 'é', to produce invalid utf8. + */ + async function insertDocumentWithInvalidUTF8() { + const stub = sinon.stub(net.Socket.prototype, 'write').callsFake(function (...args) { + const providedBuffer = args[0].toString('hex'); + const targetBytes = Buffer.from(document.field, 'utf-8').toString('hex'); + + if (providedBuffer.includes(targetBytes)) { + if (providedBuffer.split(targetBytes).length !== 2) { + sinon.restore(); + const message = `too many target bytes sequences: received ${providedBuffer.split(targetBytes).length}\n. command: ${inspect(deserialize(args[0]), { depth: Infinity })}`; + throw new Error(message); + } + const buffer = Buffer.from(providedBuffer.replace(targetBytes, 'c301'.repeat(8)), 'hex'); + const result = stub.wrappedMethod.apply(this, [buffer]); + sinon.restore(); + return result; + } + const result = stub.wrappedMethod.apply(this, args); + return result; + }); + + const document = { + field: 'é'.repeat(8) + }; + + await collection.insertOne(document); + + sinon.restore(); + } + + beforeEach(async function () { + client = this.configuration.newClient(); + await client.connect(); + const db = client.db('test'); + collection = db.collection('invalidutf'); + + await collection.deleteMany({}); + await insertDocumentWithInvalidUTF8(); + }); + + afterEach(async function () { + sinon.restore(); + await client.close(); + }); + + context('when utf-8 validation is explicitly disabled', function () { + it('documents can be read using a for-await loop without errors', async function () { + for await (const _doc of collection.find({}, { enableUtf8Validation: false })); + }); + it('documents can be read using next() without errors', async function () { + const cursor = collection.find({}, { enableUtf8Validation: false }); + + while (await cursor.hasNext()) { + await cursor.next(); + } + }); + + it('documents can be read using toArray() without errors', async function () { + const cursor = collection.find({}, { enableUtf8Validation: false }); + await cursor.toArray(); + }); + + it('documents can be read using .stream() without errors', async function () { + const cursor = collection.find({}, { enableUtf8Validation: false }); + await cursor.stream().toArray(); + }); + + it('documents can be read with tryNext() without error', async function () { + const cursor = collection.find({}, { enableUtf8Validation: false }); + + while (await cursor.hasNext()) { + await cursor.tryNext(); + } + }); + }); + + async function expectReject(fn: () => Promise) { + try { + await fn(); + expect.fail('expected the provided callback function to reject, but it did not.'); + } catch (error) { + expect(error).to.match(/Invalid UTF-8 string in BSON document/); + expect(error).to.be.instanceOf(BSONError); + } + } + + context('when utf-8 validation is explicitly enabled', function () { + it('a for-await loop throw a BSON error', async function () { + await expectReject(async () => { + for await (const _doc of collection.find({}, { enableUtf8Validation: true })); + }); + }); + it('next() throws a BSON error', async function () { + await expectReject(async () => { + const cursor = collection.find({}, { enableUtf8Validation: true }); + + while (await cursor.hasNext()) { + await cursor.next(); + } + }); + }); + + it('toArray() throws a BSON error', async function () { + await expectReject(async () => { + const cursor = collection.find({}, { enableUtf8Validation: true }); + await cursor.toArray(); + }); + }); + + it('.stream() throws a BSONError', async function () { + await expectReject(async () => { + const cursor = collection.find({}, { enableUtf8Validation: true }); + await cursor.stream().toArray(); + }); + }); + + it('tryNext() throws a BSONError', async function () { + await expectReject(async () => { + const cursor = collection.find({}, { enableUtf8Validation: true }); + + while (await cursor.hasNext()) { + await cursor.tryNext(); + } + }); + }); + }); + + context('utf-8 validation defaults to enabled', function () { + it('a for-await loop throw a BSON error', async function () { + await expectReject(async () => { + for await (const _doc of collection.find({})); + }); + }); + it('next() throws a BSON error', async function () { + await expectReject(async () => { + const cursor = collection.find({}); + + while (await cursor.hasNext()) { + await cursor.next(); + } + }); + }); + + it('toArray() throws a BSON error', async function () { + await expectReject(async () => { + const cursor = collection.find({}); + await cursor.toArray(); + }); + }); + + it('.stream() throws a BSONError', async function () { + await expectReject(async () => { + const cursor = collection.find({}); + await cursor.stream().toArray(); + }); + }); + + it('tryNext() throws a BSONError', async function () { + await expectReject(async () => { + const cursor = collection.find({}, { enableUtf8Validation: true }); + + while (await cursor.hasNext()) { + await cursor.tryNext(); + } + }); + }); + }); +}); diff --git a/test/unit/cmap/wire_protocol/responses.test.ts b/test/unit/cmap/wire_protocol/responses.test.ts index 9498765cf4..7fccbfc7fc 100644 --- a/test/unit/cmap/wire_protocol/responses.test.ts +++ b/test/unit/cmap/wire_protocol/responses.test.ts @@ -1,54 +1,87 @@ import { expect } from 'chai'; import * as sinon from 'sinon'; +// to spy on the bson module, we must import it from the driver +// eslint-disable-next-line @typescript-eslint/no-restricted-imports +import * as mdb from '../../../../src/bson'; import { - BSON, CursorResponse, Int32, MongoDBResponse, MongoUnexpectedServerResponseError, - OnDemandDocument + OnDemandDocument, + serialize } from '../../../mongodb'; describe('class MongoDBResponse', () => { it('is a subclass of OnDemandDocument', () => { - expect(new MongoDBResponse(BSON.serialize({ ok: 1 }))).to.be.instanceOf(OnDemandDocument); + expect(new MongoDBResponse(serialize({ ok: 1 }))).to.be.instanceOf(OnDemandDocument); }); context('utf8 validation', () => { - afterEach(() => sinon.restore()); + let deseriailzeSpy: sinon.SinonStub>; + beforeEach(function () { + const deserialize = mdb.deserialize; + deseriailzeSpy = sinon.stub>().callsFake(deserialize); + sinon.stub(mdb, 'deserialize').get(() => { + return deseriailzeSpy; + }); + }); + afterEach(function () { + sinon.restore(); + }); context('when enableUtf8Validation is not specified', () => { const options = { enableUtf8Validation: undefined }; it('calls BSON deserialize with writeErrors validation turned off', () => { - const res = new MongoDBResponse(BSON.serialize({})); - const toObject = sinon.spy(Object.getPrototypeOf(Object.getPrototypeOf(res)), 'toObject'); + const res = new MongoDBResponse(serialize({})); res.toObject(options); - expect(toObject).to.have.been.calledWith( - sinon.match({ validation: { utf8: { writeErrors: false } } }) - ); + + expect(deseriailzeSpy).to.have.been.called; + + const [ + { + args: [_buffer, { validation }] + } + ] = deseriailzeSpy.getCalls(); + + expect(validation).to.deep.equal({ utf8: { writeErrors: false } }); }); }); context('when enableUtf8Validation is true', () => { const options = { enableUtf8Validation: true }; it('calls BSON deserialize with writeErrors validation turned off', () => { - const res = new MongoDBResponse(BSON.serialize({})); - const toObject = sinon.spy(Object.getPrototypeOf(Object.getPrototypeOf(res)), 'toObject'); + const res = new MongoDBResponse(serialize({})); res.toObject(options); - expect(toObject).to.have.been.calledWith( - sinon.match({ validation: { utf8: { writeErrors: false } } }) - ); + + expect(deseriailzeSpy).to.have.been.called; + + const [ + { + args: [_buffer, { validation }] + } + ] = deseriailzeSpy.getCalls(); + + expect(validation).to.deep.equal({ utf8: { writeErrors: false } }); }); }); context('when enableUtf8Validation is false', () => { const options = { enableUtf8Validation: false }; it('calls BSON deserialize with all validation disabled', () => { - const res = new MongoDBResponse(BSON.serialize({})); - const toObject = sinon.spy(Object.getPrototypeOf(Object.getPrototypeOf(res)), 'toObject'); + const res = new MongoDBResponse(serialize({})); res.toObject(options); - expect(toObject).to.have.been.calledWith(sinon.match({ validation: { utf8: false } })); + + expect(deseriailzeSpy).to.have.been.called; + + const [ + { + args: [_buffer, { validation }] + } + ] = deseriailzeSpy.getCalls(); + + expect(validation).to.deep.equal({ utf8: false }); }); }); }); @@ -57,7 +90,7 @@ describe('class MongoDBResponse', () => { describe('class CursorResponse', () => { describe('get cursor()', () => { it('throws if input does not contain cursor embedded document', () => { - expect(() => new CursorResponse(BSON.serialize({ ok: 1 })).cursor).to.throw( + expect(() => new CursorResponse(serialize({ ok: 1 })).cursor).to.throw( MongoUnexpectedServerResponseError, /"cursor" is missing/ ); @@ -66,7 +99,7 @@ describe('class CursorResponse', () => { describe('get id()', () => { it('throws if input does not contain cursor.id int64', () => { - expect(() => new CursorResponse(BSON.serialize({ ok: 1, cursor: {} })).id).to.throw( + expect(() => new CursorResponse(serialize({ ok: 1, cursor: {} })).id).to.throw( MongoUnexpectedServerResponseError, /"id" is missing/ ); @@ -77,22 +110,22 @@ describe('class CursorResponse', () => { it('throws if input does not contain firstBatch nor nextBatch', () => { expect( // @ts-expect-error: testing private getter - () => new CursorResponse(BSON.serialize({ ok: 1, cursor: { id: 0n, batch: [] } })).batch + () => new CursorResponse(serialize({ ok: 1, cursor: { id: 0n, batch: [] } })).batch ).to.throw(MongoUnexpectedServerResponseError, /did not contain a batch/); }); }); describe('get ns()', () => { it('sets namespace to null if input does not contain cursor.ns', () => { - expect(new CursorResponse(BSON.serialize({ ok: 1, cursor: { id: 0n, firstBatch: [] } })).ns) - .to.be.null; + expect(new CursorResponse(serialize({ ok: 1, cursor: { id: 0n, firstBatch: [] } })).ns).to.be + .null; }); }); describe('get batchSize()', () => { it('reports the returned batch size', () => { const response = new CursorResponse( - BSON.serialize({ ok: 1, cursor: { id: 0n, nextBatch: [{}, {}, {}] } }) + serialize({ ok: 1, cursor: { id: 0n, nextBatch: [{}, {}, {}] } }) ); expect(response.batchSize).to.equal(3); expect(response.shift()).to.deep.equal({}); @@ -103,7 +136,7 @@ describe('class CursorResponse', () => { describe('get length()', () => { it('reports number of documents remaining in the batch', () => { const response = new CursorResponse( - BSON.serialize({ ok: 1, cursor: { id: 0n, nextBatch: [{}, {}, {}] } }) + serialize({ ok: 1, cursor: { id: 0n, nextBatch: [{}, {}, {}] } }) ); expect(response).to.have.lengthOf(3); expect(response.shift()).to.deep.equal({}); @@ -116,7 +149,7 @@ describe('class CursorResponse', () => { beforeEach(async function () { response = new CursorResponse( - BSON.serialize({ + serialize({ ok: 1, cursor: { id: 0n, nextBatch: [{ _id: 1 }, { _id: 2 }, { _id: 3 }] } }) @@ -143,7 +176,7 @@ describe('class CursorResponse', () => { beforeEach(async function () { response = new CursorResponse( - BSON.serialize({ + serialize({ ok: 1, cursor: { id: 0n, nextBatch: [{ _id: 1 }, { _id: 2 }, { _id: 3 }] } }) From 1c0891a75c4b42ee1bcc6976f1a2cce216bd2895 Mon Sep 17 00:00:00 2001 From: Bailey Pearson Date: Fri, 6 Sep 2024 10:32:08 -0600 Subject: [PATCH 2/6] asdf --- src/bson.ts | 11 ++ src/cmap/connection.ts | 3 +- src/cmap/wire_protocol/on_demand/document.ts | 23 ++- src/cmap/wire_protocol/responses.ts | 28 +++- src/cursor/abstract_cursor.ts | 28 +++- src/cursor/aggregation_cursor.ts | 2 +- src/cursor/find_cursor.ts | 2 +- src/index.ts | 6 +- .../bson-options/utf8_validation.test.ts | 141 ++++-------------- .../unit/cmap/wire_protocol/responses.test.ts | 72 --------- 10 files changed, 106 insertions(+), 210 deletions(-) diff --git a/src/bson.ts b/src/bson.ts index ce5bd486b7..d749db2c4a 100644 --- a/src/bson.ts +++ b/src/bson.ts @@ -133,3 +133,14 @@ export function resolveBSONOptions( options?.enableUtf8Validation ?? parentOptions?.enableUtf8Validation ?? true }; } + +/** @internal */ +export function parseUtf8ValidationOption(options?: { enableUtf8Validation?: boolean }): { + utf8: { writeErrors: false } | false; +} { + const enableUtf8Validation = options?.enableUtf8Validation; + if (enableUtf8Validation === false) { + return { utf8: false }; + } + return { utf8: { writeErrors: false } }; +} diff --git a/src/cmap/connection.ts b/src/cmap/connection.ts index 1e4afc7f38..dd823ee07e 100644 --- a/src/cmap/connection.ts +++ b/src/cmap/connection.ts @@ -1,3 +1,4 @@ +import { type DeserializeOptions } from 'bson'; import { type Readable, Transform, type TransformCallback } from 'stream'; import { clearTimeout, setTimeout } from 'timers'; @@ -487,7 +488,7 @@ export class Connection extends TypedEventEmitter { // If `documentsReturnedIn` not set or raw is not enabled, use input bson options // Otherwise, support raw flag. Raw only works for cursors that hardcode firstBatch/nextBatch fields - const bsonOptions = + const bsonOptions: DeserializeOptions = options.documentsReturnedIn == null || !options.raw ? options : { diff --git a/src/cmap/wire_protocol/on_demand/document.ts b/src/cmap/wire_protocol/on_demand/document.ts index 67f5b3a091..fdb6f90e4d 100644 --- a/src/cmap/wire_protocol/on_demand/document.ts +++ b/src/cmap/wire_protocol/on_demand/document.ts @@ -1,8 +1,9 @@ +import { type DeserializeOptions } from 'bson'; + import { Binary, type BSONElement, BSONError, - type BSONSerializeOptions, BSONType, deserialize, getBigInt64LE, @@ -10,7 +11,6 @@ import { getInt32LE, ObjectId, parseToElementsToArray, - pluckBSONSerializeOptions, Timestamp, toUTF8 } from '../../../bson'; @@ -44,6 +44,15 @@ export type JSTypeOf = { /** @internal */ type CachedBSONElement = { element: BSONElement; value: any | undefined }; +/** + * @internal + * + * Options for `OnDemandDocument.toObject()`. Validation is required to ensure + * that callers provide utf8 validation options. */ +export type OnDemandDocumentDeserializeOptions = DeserializeOptions & { + validation: NonNullable; +}; + /** @internal */ export class OnDemandDocument { /** @@ -330,14 +339,12 @@ export class OnDemandDocument { * Deserialize this object, DOES NOT cache result so avoid multiple invocations * @param options - BSON deserialization options */ - public toObject(options?: BSONSerializeOptions): Record { - const exactBSONOptions = { - ...pluckBSONSerializeOptions(options ?? {}), - validation: this.parseBsonSerializationOptions(options), + public toObject(options?: OnDemandDocumentDeserializeOptions): Record { + return deserialize(this.bson, { + ...options, index: this.offset, allowObjectSmallerThanBufferSize: true - }; - return deserialize(this.bson, exactBSONOptions); + }); } private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): { diff --git a/src/cmap/wire_protocol/responses.ts b/src/cmap/wire_protocol/responses.ts index 9837634bfb..e69cf84cfc 100644 --- a/src/cmap/wire_protocol/responses.ts +++ b/src/cmap/wire_protocol/responses.ts @@ -1,3 +1,5 @@ +import { type DeserializeOptions } from 'bson'; + import { type BSONElement, type BSONSerializeOptions, @@ -5,12 +7,18 @@ import { type Document, Long, parseToElementsToArray, + parseUtf8ValidationOption, + pluckBSONSerializeOptions, type Timestamp } from '../../bson'; import { MongoUnexpectedServerResponseError } from '../../error'; import { type ClusterTime } from '../../sdam/common'; import { decorateDecryptionResult, ns } from '../../utils'; -import { type JSTypeOf, OnDemandDocument } from './on_demand/document'; +import { + type JSTypeOf, + OnDemandDocument, + type OnDemandDocumentDeserializeOptions +} from './on_demand/document'; // eslint-disable-next-line no-restricted-syntax const enum BSONElementOffset { @@ -112,7 +120,8 @@ export class MongoDBResponse extends OnDemandDocument { this.get('recoveryToken', BSONType.object)?.toObject({ promoteValues: false, promoteLongs: false, - promoteBuffers: false + promoteBuffers: false, + validation: { utf8: true } }) ?? null ); } @@ -165,6 +174,14 @@ export class MongoDBResponse extends OnDemandDocument { } return this.clusterTime ?? null; } + + public override toObject(options?: BSONSerializeOptions): Record { + const exactBSONOptions = { + ...pluckBSONSerializeOptions(options ?? {}), + validation: parseUtf8ValidationOption(options) + }; + return super.toObject(exactBSONOptions); + } } /** @internal */ @@ -248,12 +265,13 @@ export class CursorResponse extends MongoDBResponse { this.cursor.get('postBatchResumeToken', BSONType.object)?.toObject({ promoteValues: false, promoteLongs: false, - promoteBuffers: false + promoteBuffers: false, + validation: { utf8: true } }) ?? null ); } - public shift(options?: BSONSerializeOptions): any { + public shift(options: OnDemandDocumentDeserializeOptions): any { if (this.iterated >= this.batchSize) { return null; } @@ -305,7 +323,7 @@ export class ExplainedCursorResponse extends CursorResponse { return this._length; } - override shift(options?: BSONSerializeOptions | undefined) { + override shift(options?: DeserializeOptions) { if (this._length === 0) return null; this._length -= 1; return this.toObject(options); diff --git a/src/cursor/abstract_cursor.ts b/src/cursor/abstract_cursor.ts index da08f1a1a6..08a8bf7b31 100644 --- a/src/cursor/abstract_cursor.ts +++ b/src/cursor/abstract_cursor.ts @@ -1,6 +1,14 @@ +import { type DeserializeOptions } from 'bson'; import { Readable, Transform } from 'stream'; -import { type BSONSerializeOptions, type Document, Long, pluckBSONSerializeOptions } from '../bson'; +import { + type BSONSerializeOptions, + type Document, + Long, + parseUtf8ValidationOption, + pluckBSONSerializeOptions +} from '../bson'; +import { type OnDemandDocumentDeserializeOptions } from '../cmap/wire_protocol/on_demand/document'; import { type CursorResponse } from '../cmap/wire_protocol/responses'; import { MongoAPIError, @@ -153,6 +161,9 @@ export abstract class AbstractCursor< /** @event */ static readonly CLOSE = 'close' as const; + /** @internal */ + protected deserializationOptions: OnDemandDocumentDeserializeOptions; + /** @internal */ protected constructor( client: MongoClient, @@ -207,6 +218,13 @@ export abstract class AbstractCursor< } else { this.cursorSession = this.cursorClient.startSession({ owner: this, explicit: false }); } + + this.deserializationOptions = { + ...this.cursorOptions, + validation: { + utf8: options?.enableUtf8Validation === false ? false : true + } + }; } /** @@ -289,7 +307,7 @@ export abstract class AbstractCursor< ); for (let count = 0; count < documentsToRead; count++) { - const document = this.documents?.shift(this.cursorOptions); + const document = this.documents?.shift(this.deserializationOptions); if (document != null) { bufferedDocs.push(document); } @@ -390,7 +408,7 @@ export abstract class AbstractCursor< } do { - const doc = this.documents?.shift(this.cursorOptions); + const doc = this.documents?.shift(this.deserializationOptions); if (doc != null) { if (this.transform != null) return await this.transformDocument(doc); return doc; @@ -409,7 +427,7 @@ export abstract class AbstractCursor< throw new MongoCursorExhaustedError(); } - let doc = this.documents?.shift(this.cursorOptions); + let doc = this.documents?.shift(this.deserializationOptions); if (doc != null) { if (this.transform != null) return await this.transformDocument(doc); return doc; @@ -417,7 +435,7 @@ export abstract class AbstractCursor< await this.fetchBatch(); - doc = this.documents?.shift(this.cursorOptions); + doc = this.documents?.shift(this.deserializationOptions); if (doc != null) { if (this.transform != null) return await this.transformDocument(doc); return doc; diff --git a/src/cursor/aggregation_cursor.ts b/src/cursor/aggregation_cursor.ts index 1b1ad663ba..3281e77707 100644 --- a/src/cursor/aggregation_cursor.ts +++ b/src/cursor/aggregation_cursor.ts @@ -74,7 +74,7 @@ export class AggregationCursor extends AbstractCursor { explain: verbosity ?? true }) ) - ).shift(this.aggregateOptions); + ).shift(this.deserializationOptions); } /** Add a stage to the aggregation pipeline diff --git a/src/cursor/find_cursor.ts b/src/cursor/find_cursor.ts index e4b3dbc03c..ef21cea290 100644 --- a/src/cursor/find_cursor.ts +++ b/src/cursor/find_cursor.ts @@ -143,7 +143,7 @@ export class FindCursor extends AbstractCursor { explain: verbosity ?? true }) ) - ).shift(this.findOptions); + ).shift(this.deserializationOptions); } /** Set the cursor query */ diff --git a/src/index.ts b/src/index.ts index 8bf6c68617..6d0908198f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -299,7 +299,11 @@ export type { ClientMetadata, ClientMetadataOptions } from './cmap/handshake/cli export type { ConnectionPoolMetrics } from './cmap/metrics'; export type { StreamDescription, StreamDescriptionOptions } from './cmap/stream_description'; export type { CompressorName } from './cmap/wire_protocol/compression'; -export type { JSTypeOf, OnDemandDocument } from './cmap/wire_protocol/on_demand/document'; +export type { + JSTypeOf, + OnDemandDocument, + OnDemandDocumentDeserializeOptions +} from './cmap/wire_protocol/on_demand/document'; export type { CursorResponse, MongoDBResponse, diff --git a/test/integration/node-specific/bson-options/utf8_validation.test.ts b/test/integration/node-specific/bson-options/utf8_validation.test.ts index d6345a884d..df367f233b 100644 --- a/test/integration/node-specific/bson-options/utf8_validation.test.ts +++ b/test/integration/node-specific/bson-options/utf8_validation.test.ts @@ -10,121 +10,43 @@ import { deserialize, type MongoClient, MongoServerError, - OnDemandDocument, OpMsgResponse } from '../../../mongodb'; -const EXPECTED_VALIDATION_DISABLED_ARGUMENT = { - utf8: false -}; - -const EXPECTED_VALIDATION_ENABLED_ARGUMENT = { - utf8: { - writeErrors: false - } -}; - describe('class MongoDBResponse', () => { - let bsonSpy: sinon.SinonSpy; - - beforeEach(() => { - // @ts-expect-error private function - bsonSpy = sinon.spy(OnDemandDocument.prototype, 'parseBsonSerializationOptions'); - }); - - afterEach(() => { - bsonSpy?.restore(); - bsonSpy = null; - }); - let client; afterEach(async () => { + sinon.restore(); if (client) await client.close(); }); - describe('enableUtf8Validation option set to false', () => { - const option = { enableUtf8Validation: false }; + context( + 'when the server is given a long multibyte utf sequence and there is a writeError that includes invalid utf8', + () => { + let client: MongoClient; + let error: MongoServerError; + beforeEach(async function () { + client = this.configuration.newClient(); - for (const passOptionTo of ['client', 'db', 'collection', 'operation']) { - it(`should disable validation with option passed to ${passOptionTo}`, async function () { - client = this.configuration.newClient(passOptionTo === 'client' ? option : undefined); + async function generateWriteErrorWithInvalidUtf8() { + // Insert a large string of multibyte UTF-8 characters + const _id = '\u{1F92A}'.repeat(100); - const db = client.db('bson_utf8Validation_db', passOptionTo === 'db' ? option : undefined); - const collection = db.collection( - 'bson_utf8Validation_coll', - passOptionTo === 'collection' ? option : undefined - ); + const test = client.db('parsing').collection<{ _id: string }>('parsing'); + await test.insertOne({ _id }); - await collection.insertOne( - { name: 'John Doe' }, - passOptionTo === 'operation' ? option : {} - ); + const spy = sinon.spy(OpMsgResponse.prototype, 'parse'); - expect(bsonSpy).to.have.been.called; - const result = bsonSpy.lastCall.returnValue; - expect(result).to.deep.equal(EXPECTED_VALIDATION_DISABLED_ARGUMENT); - }); - } - }); + error = await test.insertOne({ _id }).catch(error => error); - describe('enableUtf8Validation option set to true', () => { - // define client and option for tests to use - const option = { enableUtf8Validation: true }; - for (const passOptionTo of ['client', 'db', 'collection', 'operation']) { - it(`should enable validation with option passed to ${passOptionTo}`, async function () { - client = this.configuration.newClient(passOptionTo === 'client' ? option : undefined); - await client.connect(); - - const db = client.db('bson_utf8Validation_db', passOptionTo === 'db' ? option : undefined); - const collection = db.collection( - 'bson_utf8Validation_coll', - passOptionTo === 'collection' ? option : undefined - ); - - await collection.insertOne( - { name: 'John Doe' }, - passOptionTo === 'operation' ? option : {} - ); - - expect(bsonSpy).to.have.been.called; - const result = bsonSpy.lastCall.returnValue; - expect(result).to.deep.equal(EXPECTED_VALIDATION_ENABLED_ARGUMENT); - }); - } - }); + // Check that the server sent us broken BSON (bad UTF) + expect(() => { + BSON.deserialize(spy.returnValues[0], { validation: { utf8: true } }); + }).to.throw(BSON.BSONError, /Invalid UTF/i, 'did not generate error with invalid utf8'); + } - describe('enableUtf8Validation option not set', () => { - const option = {}; - for (const passOptionTo of ['client', 'db', 'collection', 'operation']) { - it(`should default to enabled with option passed to ${passOptionTo}`, async function () { - client = this.configuration.newClient(passOptionTo === 'client' ? option : undefined); - await client.connect(); - - const db = client.db('bson_utf8Validation_db', passOptionTo === 'db' ? option : undefined); - const collection = db.collection( - 'bson_utf8Validation_coll', - passOptionTo === 'collection' ? option : undefined - ); - - await collection.insertOne( - { name: 'John Doe' }, - passOptionTo === 'operation' ? option : {} - ); - - expect(bsonSpy).to.have.been.called; - const result = bsonSpy.lastCall.returnValue; - expect(result).to.deep.equal(EXPECTED_VALIDATION_ENABLED_ARGUMENT); - }); - } - }); - - context( - 'when the server is given a long multibyte utf sequence and there is a writeError', - () => { - let client: MongoClient; - beforeEach(async function () { - client = this.configuration.newClient(); + await generateWriteErrorWithInvalidUtf8(); }); afterEach(async function () { @@ -133,22 +55,7 @@ describe('class MongoDBResponse', () => { await client.close(); }); - it('does not throw a UTF-8 parsing error', async () => { - // Insert a large string of multibyte UTF-8 characters - const _id = '\u{1F92A}'.repeat(100); - - const test = client.db('parsing').collection<{ _id: string }>('parsing'); - await test.insertOne({ _id }); - - const spy = sinon.spy(OpMsgResponse.prototype, 'parse'); - - const error = await test.insertOne({ _id }).catch(error => error); - - // Check that the server sent us broken BSON (bad UTF) - expect(() => { - BSON.deserialize(spy.returnValues[0], { validation: { utf8: true } }); - }).to.throw(BSON.BSONError, /Invalid UTF/i); - + it('does not throw a UTF-8 parsing error', function () { // Assert the driver squashed it expect(error).to.be.instanceOf(MongoServerError); expect(error.message).to.match(/duplicate/i); @@ -176,7 +83,9 @@ describe('utf8 validation with cursors', function () { if (providedBuffer.includes(targetBytes)) { if (providedBuffer.split(targetBytes).length !== 2) { sinon.restore(); - const message = `too many target bytes sequences: received ${providedBuffer.split(targetBytes).length}\n. command: ${inspect(deserialize(args[0]), { depth: Infinity })}`; + const message = `too many target bytes sequences: received ${ + providedBuffer.split(targetBytes).length + }\n. command: ${inspect(deserialize(args[0]), { depth: Infinity })}`; throw new Error(message); } const buffer = Buffer.from(providedBuffer.replace(targetBytes, 'c301'.repeat(8)), 'hex'); diff --git a/test/unit/cmap/wire_protocol/responses.test.ts b/test/unit/cmap/wire_protocol/responses.test.ts index 7fccbfc7fc..9f2527d8a3 100644 --- a/test/unit/cmap/wire_protocol/responses.test.ts +++ b/test/unit/cmap/wire_protocol/responses.test.ts @@ -1,9 +1,5 @@ import { expect } from 'chai'; -import * as sinon from 'sinon'; -// to spy on the bson module, we must import it from the driver -// eslint-disable-next-line @typescript-eslint/no-restricted-imports -import * as mdb from '../../../../src/bson'; import { CursorResponse, Int32, @@ -17,74 +13,6 @@ describe('class MongoDBResponse', () => { it('is a subclass of OnDemandDocument', () => { expect(new MongoDBResponse(serialize({ ok: 1 }))).to.be.instanceOf(OnDemandDocument); }); - - context('utf8 validation', () => { - let deseriailzeSpy: sinon.SinonStub>; - beforeEach(function () { - const deserialize = mdb.deserialize; - deseriailzeSpy = sinon.stub>().callsFake(deserialize); - sinon.stub(mdb, 'deserialize').get(() => { - return deseriailzeSpy; - }); - }); - afterEach(function () { - sinon.restore(); - }); - - context('when enableUtf8Validation is not specified', () => { - const options = { enableUtf8Validation: undefined }; - it('calls BSON deserialize with writeErrors validation turned off', () => { - const res = new MongoDBResponse(serialize({})); - res.toObject(options); - - expect(deseriailzeSpy).to.have.been.called; - - const [ - { - args: [_buffer, { validation }] - } - ] = deseriailzeSpy.getCalls(); - - expect(validation).to.deep.equal({ utf8: { writeErrors: false } }); - }); - }); - - context('when enableUtf8Validation is true', () => { - const options = { enableUtf8Validation: true }; - it('calls BSON deserialize with writeErrors validation turned off', () => { - const res = new MongoDBResponse(serialize({})); - res.toObject(options); - - expect(deseriailzeSpy).to.have.been.called; - - const [ - { - args: [_buffer, { validation }] - } - ] = deseriailzeSpy.getCalls(); - - expect(validation).to.deep.equal({ utf8: { writeErrors: false } }); - }); - }); - - context('when enableUtf8Validation is false', () => { - const options = { enableUtf8Validation: false }; - it('calls BSON deserialize with all validation disabled', () => { - const res = new MongoDBResponse(serialize({})); - res.toObject(options); - - expect(deseriailzeSpy).to.have.been.called; - - const [ - { - args: [_buffer, { validation }] - } - ] = deseriailzeSpy.getCalls(); - - expect(validation).to.deep.equal({ utf8: false }); - }); - }); - }); }); describe('class CursorResponse', () => { From 68eb03c67e928e85887a38f2cbd245ee1ea65d3e Mon Sep 17 00:00:00 2001 From: Bailey Pearson Date: Fri, 6 Sep 2024 11:15:30 -0600 Subject: [PATCH 3/6] fix lint --- src/cursor/abstract_cursor.ts | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/cursor/abstract_cursor.ts b/src/cursor/abstract_cursor.ts index 08a8bf7b31..54447fe70e 100644 --- a/src/cursor/abstract_cursor.ts +++ b/src/cursor/abstract_cursor.ts @@ -1,13 +1,6 @@ -import { type DeserializeOptions } from 'bson'; import { Readable, Transform } from 'stream'; -import { - type BSONSerializeOptions, - type Document, - Long, - parseUtf8ValidationOption, - pluckBSONSerializeOptions -} from '../bson'; +import { type BSONSerializeOptions, type Document, Long, pluckBSONSerializeOptions } from '../bson'; import { type OnDemandDocumentDeserializeOptions } from '../cmap/wire_protocol/on_demand/document'; import { type CursorResponse } from '../cmap/wire_protocol/responses'; import { From 3903e4e9a68d07bf72875b3b300bd4a98fc1249a Mon Sep 17 00:00:00 2001 From: Bailey Pearson Date: Fri, 6 Sep 2024 11:42:05 -0600 Subject: [PATCH 4/6] fix lint for pre-dependency bump --- .eslintrc.json | 3 ++- test/integration/change-streams/change_stream.test.ts | 3 +-- .../server_selection.prose.operation_count.test.ts | 2 +- test/unit/mongo_logger.test.ts | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.eslintrc.json b/.eslintrc.json index 8f19ffd909..81883d7d12 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -124,7 +124,8 @@ "@typescript-eslint/no-unused-vars": [ "error", { - "argsIgnorePattern": "^_" + "argsIgnorePattern": "^_", + "varsIgnorePattern": "^_" } ] }, diff --git a/test/integration/change-streams/change_stream.test.ts b/test/integration/change-streams/change_stream.test.ts index 3fa1e792ed..64297ad512 100644 --- a/test/integration/change-streams/change_stream.test.ts +++ b/test/integration/change-streams/change_stream.test.ts @@ -875,7 +875,7 @@ describe('Change Streams', function () { await lastWrite().catch(() => null); let counter = 0; - // eslint-disable-next-line @typescript-eslint/no-unused-vars + for await (const _ of changes) { counter += 1; if (counter === 2) { @@ -1027,7 +1027,6 @@ describe('Change Streams', function () { changeStream = collection.watch(); const loop = (async function () { - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _change of changeStream) { return 'loop entered'; // loop should never be entered } diff --git a/test/integration/server-selection/server_selection.prose.operation_count.test.ts b/test/integration/server-selection/server_selection.prose.operation_count.test.ts index 27636a6dfc..fec6d24e61 100644 --- a/test/integration/server-selection/server_selection.prose.operation_count.test.ts +++ b/test/integration/server-selection/server_selection.prose.operation_count.test.ts @@ -30,7 +30,7 @@ async function runTaskGroup(collection: Collection, count: 10 | 100 | 1000) { async function ensurePoolIsFull(client: MongoClient): Promise { let connectionCount = 0; - // eslint-disable-next-line @typescript-eslint/no-unused-vars + for await (const _event of on(client, 'connectionCreated')) { connectionCount++; if (connectionCount === POOL_SIZE * 2) { diff --git a/test/unit/mongo_logger.test.ts b/test/unit/mongo_logger.test.ts index 86e4242ffb..fff75bf9a2 100644 --- a/test/unit/mongo_logger.test.ts +++ b/test/unit/mongo_logger.test.ts @@ -1052,7 +1052,6 @@ describe('class MongoLogger', function () { context('when serviceId is not present', function () { beforeEach(function () { - // eslint-disable-next-line @typescript-eslint/no-unused-vars const { serviceId: _, ...connectionPoolClearedNoServiceId } = connectionPoolCleared; logger[severityLevel]('connection', connectionPoolClearedNoServiceId); From a7edde4a4a1672271cc63f2efdcd056513bcc57b Mon Sep 17 00:00:00 2001 From: Bailey Pearson Date: Fri, 6 Sep 2024 12:14:36 -0600 Subject: [PATCH 5/6] fix lint issues --- .eslintrc.json | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.eslintrc.json b/.eslintrc.json index 81883d7d12..25ccda830a 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -276,7 +276,15 @@ ], "parser": "@typescript-eslint/parser", "rules": { - "unused-imports/no-unused-imports": "error" + "unused-imports/no-unused-imports": "error", + "@typescript-eslint/consistent-type-imports": [ + "error", + { + "prefer": "type-imports", + "disallowTypeAnnotations": false, + "fixStyle": "separate-type-imports" + } + ] } } ] From 331072ca5c9b1c39eaa61540250664e14cb8e97e Mon Sep 17 00:00:00 2001 From: Bailey Pearson Date: Fri, 6 Sep 2024 13:55:12 -0600 Subject: [PATCH 6/6] comments --- src/cmap/wire_protocol/on_demand/document.ts | 15 +-- .../bson-options/utf8_validation.test.ts | 94 ++++++++++--------- 2 files changed, 54 insertions(+), 55 deletions(-) diff --git a/src/cmap/wire_protocol/on_demand/document.ts b/src/cmap/wire_protocol/on_demand/document.ts index fdb6f90e4d..efc7e32288 100644 --- a/src/cmap/wire_protocol/on_demand/document.ts +++ b/src/cmap/wire_protocol/on_demand/document.ts @@ -49,9 +49,8 @@ type CachedBSONElement = { element: BSONElement; value: any | undefined }; * * Options for `OnDemandDocument.toObject()`. Validation is required to ensure * that callers provide utf8 validation options. */ -export type OnDemandDocumentDeserializeOptions = DeserializeOptions & { - validation: NonNullable; -}; +export type OnDemandDocumentDeserializeOptions = Omit & + Required>; /** @internal */ export class OnDemandDocument { @@ -347,16 +346,6 @@ export class OnDemandDocument { }); } - private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): { - utf8: { writeErrors: false } | false; - } { - const enableUtf8Validation = options?.enableUtf8Validation; - if (enableUtf8Validation === false) { - return { utf8: false }; - } - return { utf8: { writeErrors: false } }; - } - /** Returns this document's bytes only */ toBytes() { const size = getInt32LE(this.bson, this.offset); diff --git a/test/integration/node-specific/bson-options/utf8_validation.test.ts b/test/integration/node-specific/bson-options/utf8_validation.test.ts index df367f233b..a2ef1c6358 100644 --- a/test/integration/node-specific/bson-options/utf8_validation.test.ts +++ b/test/integration/node-specific/bson-options/utf8_validation.test.ts @@ -1,13 +1,11 @@ import { expect } from 'chai'; import * as net from 'net'; import * as sinon from 'sinon'; -import { inspect } from 'util'; import { BSON, BSONError, type Collection, - deserialize, type MongoClient, MongoServerError, OpMsgResponse @@ -26,47 +24,59 @@ describe('class MongoDBResponse', () => { () => { let client: MongoClient; let error: MongoServerError; - beforeEach(async function () { - client = this.configuration.newClient(); - - async function generateWriteErrorWithInvalidUtf8() { - // Insert a large string of multibyte UTF-8 characters - const _id = '\u{1F92A}'.repeat(100); - - const test = client.db('parsing').collection<{ _id: string }>('parsing'); - await test.insertOne({ _id }); - - const spy = sinon.spy(OpMsgResponse.prototype, 'parse'); - - error = await test.insertOne({ _id }).catch(error => error); - - // Check that the server sent us broken BSON (bad UTF) - expect(() => { - BSON.deserialize(spy.returnValues[0], { validation: { utf8: true } }); - }).to.throw(BSON.BSONError, /Invalid UTF/i, 'did not generate error with invalid utf8'); - } - - await generateWriteErrorWithInvalidUtf8(); - }); - - afterEach(async function () { - sinon.restore(); - await client.db('parsing').dropDatabase(); - await client.close(); - }); - - it('does not throw a UTF-8 parsing error', function () { - // Assert the driver squashed it - expect(error).to.be.instanceOf(MongoServerError); - expect(error.message).to.match(/duplicate/i); - expect(error.message).to.not.match(/utf/i); - expect(error.errmsg).to.include('\uFFFD'); - }); + for (const { optionDescription, options } of [ + { optionDescription: 'explicitly enabled', options: { enableUtf8Validation: true } }, + { optionDescription: 'explicitly disabled', options: { enableUtf8Validation: false } }, + { optionDescription: 'omitted', options: {} } + ]) { + context('when utf8 validation is ' + optionDescription, function () { + beforeEach(async function () { + client = this.configuration.newClient(); + + async function generateWriteErrorWithInvalidUtf8() { + // Insert a large string of multibyte UTF-8 characters + const _id = '\u{1F92A}'.repeat(100); + + const test = client.db('parsing').collection<{ _id: string }>('parsing'); + await test.insertOne({ _id }, options); + + const spy = sinon.spy(OpMsgResponse.prototype, 'parse'); + + error = await test.insertOne({ _id }).catch(error => error); + + // Check that the server sent us broken BSON (bad UTF) + expect(() => { + BSON.deserialize(spy.returnValues[0], { validation: { utf8: true } }); + }).to.throw( + BSON.BSONError, + /Invalid UTF/i, + 'did not generate error with invalid utf8' + ); + } + + await generateWriteErrorWithInvalidUtf8(); + }); + + afterEach(async function () { + sinon.restore(); + await client.db('parsing').dropDatabase(); + await client.close(); + }); + + it('does not throw a UTF-8 parsing error', function () { + // Assert the driver squashed it + expect(error).to.be.instanceOf(MongoServerError); + expect(error.message).to.match(/duplicate/i); + expect(error.message).to.not.match(/utf/i); + expect(error.errmsg).to.include('\uFFFD'); + }); + }); + } } ); }); -describe('utf8 validation with cursors', function () { +describe('parsing of utf8-invalid documents wish cursors', function () { let client: MongoClient; let collection: Collection; @@ -85,7 +95,7 @@ describe('utf8 validation with cursors', function () { sinon.restore(); const message = `too many target bytes sequences: received ${ providedBuffer.split(targetBytes).length - }\n. command: ${inspect(deserialize(args[0]), { depth: Infinity })}`; + }`; throw new Error(message); } const buffer = Buffer.from(providedBuffer.replace(targetBytes, 'c301'.repeat(8)), 'hex'); @@ -163,7 +173,7 @@ describe('utf8 validation with cursors', function () { } context('when utf-8 validation is explicitly enabled', function () { - it('a for-await loop throw a BSON error', async function () { + it('a for-await loop throws a BSON error', async function () { await expectReject(async () => { for await (const _doc of collection.find({}, { enableUtf8Validation: true })); }); @@ -204,7 +214,7 @@ describe('utf8 validation with cursors', function () { }); context('utf-8 validation defaults to enabled', function () { - it('a for-await loop throw a BSON error', async function () { + it('a for-await loop throws a BSON error', async function () { await expectReject(async () => { for await (const _doc of collection.find({})); });