Skip to content

Commit

Permalink
fix(NODE-6355): respect utf8 validation options when iterating cursors (
Browse files Browse the repository at this point in the history
  • Loading branch information
baileympearson committed Sep 6, 2024
1 parent 25c84a4 commit 4bcbc29
Show file tree
Hide file tree
Showing 4 changed files with 261 additions and 52 deletions.
21 changes: 17 additions & 4 deletions src/cmap/wire_protocol/on_demand/document.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import {
Binary,
BSON,
type BSONElement,
BSONError,
type BSONSerializeOptions,
BSONType,
deserialize,
getBigInt64LE,
getFloat64LE,
getInt32LE,
ObjectId,
parseToElementsToArray,
pluckBSONSerializeOptions,
Timestamp,
toUTF8
} from '../../../bson';
Expand Down Expand Up @@ -330,11 +331,23 @@ export class OnDemandDocument {
* @param options - BSON deserialization options
*/
public toObject(options?: BSONSerializeOptions): Record<string, any> {
return BSON.deserialize(this.bson, {
...options,
const exactBSONOptions = {
...pluckBSONSerializeOptions(options ?? {}),
validation: this.parseBsonSerializationOptions(options),
index: this.offset,
allowObjectSmallerThanBufferSize: true
});
};
return deserialize(this.bson, exactBSONOptions);
}

private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
utf8: { writeErrors: false } | false;
} {
const enableUtf8Validation = options?.enableUtf8Validation;
if (enableUtf8Validation === false) {
return { utf8: false };
}
return { utf8: { writeErrors: false } };
}

/** Returns this document's bytes only */
Expand Down
19 changes: 0 additions & 19 deletions src/cmap/wire_protocol/responses.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {
type Document,
Long,
parseToElementsToArray,
pluckBSONSerializeOptions,
type Timestamp
} from '../../bson';
import { MongoUnexpectedServerResponseError } from '../../error';
Expand Down Expand Up @@ -166,24 +165,6 @@ export class MongoDBResponse extends OnDemandDocument {
}
return this.clusterTime ?? null;
}

public override toObject(options?: BSONSerializeOptions): Record<string, any> {
const exactBSONOptions = {
...pluckBSONSerializeOptions(options ?? {}),
validation: this.parseBsonSerializationOptions(options)
};
return super.toObject(exactBSONOptions);
}

private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
utf8: { writeErrors: false } | false;
} {
const enableUtf8Validation = options?.enableUtf8Validation;
if (enableUtf8Validation === false) {
return { utf8: false };
}
return { utf8: { writeErrors: false } };
}
}

/** @internal */
Expand Down
188 changes: 185 additions & 3 deletions test/integration/node-specific/bson-options/utf8_validation.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import { expect } from 'chai';
import * as net from 'net';
import * as sinon from 'sinon';
import { inspect } from 'util';

import {
BSON,
BSONError,
type Collection,
deserialize,
type MongoClient,
MongoDBResponse,
MongoServerError,
OnDemandDocument,
OpMsgResponse
} from '../../../mongodb';

Expand All @@ -23,12 +28,12 @@ describe('class MongoDBResponse', () => {
let bsonSpy: sinon.SinonSpy;

beforeEach(() => {
bsonSpy = sinon.spy(MongoDBResponse.prototype, 'parseBsonSerializationOptions');
// @ts-expect-error private function
bsonSpy = sinon.spy(OnDemandDocument.prototype, 'parseBsonSerializationOptions');
});

afterEach(() => {
bsonSpy?.restore();
// @ts-expect-error: Allow this to be garbage collected
bsonSpy = null;
});

Expand Down Expand Up @@ -153,3 +158,180 @@ describe('class MongoDBResponse', () => {
}
);
});

describe('utf8 validation with cursors', function () {
let client: MongoClient;
let collection: Collection;

/**
* Inserts a document with malformed utf8 bytes. This method spies on socket.write, and then waits
* for an OP_MSG payload corresponding to `collection.insertOne({ field: 'é' })`, and then modifies the
* bytes of the character 'é', to produce invalid utf8.
*/
async function insertDocumentWithInvalidUTF8() {
const stub = sinon.stub(net.Socket.prototype, 'write').callsFake(function (...args) {
const providedBuffer = args[0].toString('hex');
const targetBytes = Buffer.from(document.field, 'utf-8').toString('hex');

if (providedBuffer.includes(targetBytes)) {
if (providedBuffer.split(targetBytes).length !== 2) {
sinon.restore();
const message = `too many target bytes sequences: received ${providedBuffer.split(targetBytes).length}\n. command: ${inspect(deserialize(args[0]), { depth: Infinity })}`;
throw new Error(message);
}
const buffer = Buffer.from(providedBuffer.replace(targetBytes, 'c301'.repeat(8)), 'hex');
const result = stub.wrappedMethod.apply(this, [buffer]);
sinon.restore();
return result;
}
const result = stub.wrappedMethod.apply(this, args);
return result;
});

const document = {
field: 'é'.repeat(8)
};

await collection.insertOne(document);

sinon.restore();
}

beforeEach(async function () {
client = this.configuration.newClient();
await client.connect();
const db = client.db('test');
collection = db.collection('invalidutf');

await collection.deleteMany({});
await insertDocumentWithInvalidUTF8();
});

afterEach(async function () {
sinon.restore();
await client.close();
});

context('when utf-8 validation is explicitly disabled', function () {
it('documents can be read using a for-await loop without errors', async function () {
for await (const _doc of collection.find({}, { enableUtf8Validation: false }));
});
it('documents can be read using next() without errors', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });

while (await cursor.hasNext()) {
await cursor.next();
}
});

it('documents can be read using toArray() without errors', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });
await cursor.toArray();
});

it('documents can be read using .stream() without errors', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });
await cursor.stream().toArray();
});

it('documents can be read with tryNext() without error', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });

while (await cursor.hasNext()) {
await cursor.tryNext();
}
});
});

async function expectReject(fn: () => Promise<void>) {
try {
await fn();
expect.fail('expected the provided callback function to reject, but it did not.');
} catch (error) {
expect(error).to.match(/Invalid UTF-8 string in BSON document/);
expect(error).to.be.instanceOf(BSONError);
}
}

context('when utf-8 validation is explicitly enabled', function () {
it('a for-await loop throw a BSON error', async function () {
await expectReject(async () => {
for await (const _doc of collection.find({}, { enableUtf8Validation: true }));
});
});
it('next() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });

while (await cursor.hasNext()) {
await cursor.next();
}
});
});

it('toArray() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });
await cursor.toArray();
});
});

it('.stream() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });
await cursor.stream().toArray();
});
});

it('tryNext() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });

while (await cursor.hasNext()) {
await cursor.tryNext();
}
});
});
});

context('utf-8 validation defaults to enabled', function () {
it('a for-await loop throw a BSON error', async function () {
await expectReject(async () => {
for await (const _doc of collection.find({}));
});
});
it('next() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({});

while (await cursor.hasNext()) {
await cursor.next();
}
});
});

it('toArray() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({});
await cursor.toArray();
});
});

it('.stream() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({});
await cursor.stream().toArray();
});
});

it('tryNext() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });

while (await cursor.hasNext()) {
await cursor.tryNext();
}
});
});
});
});
Loading

0 comments on commit 4bcbc29

Please sign in to comment.