Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(NODE-6355): respect utf8 validation options when iterating cursors #4214

Merged
merged 10 commits into from
Sep 4, 2024
Merged
21 changes: 17 additions & 4 deletions src/cmap/wire_protocol/on_demand/document.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import {
Binary,
BSON,
type BSONElement,
BSONError,
type BSONSerializeOptions,
BSONType,
deserialize,
getBigInt64LE,
getFloat64LE,
getInt32LE,
ObjectId,
parseToElementsToArray,
pluckBSONSerializeOptions,
Timestamp,
toUTF8
} from '../../../bson';
Expand Down Expand Up @@ -330,11 +331,23 @@ export class OnDemandDocument {
* @param options - BSON deserialization options
*/
public toObject(options?: BSONSerializeOptions): Record<string, any> {
return BSON.deserialize(this.bson, {
...options,
const exactBSONOptions = {
...pluckBSONSerializeOptions(options ?? {}),
validation: this.parseBsonSerializationOptions(options),
index: this.offset,
allowObjectSmallerThanBufferSize: true
});
};
return deserialize(this.bson, exactBSONOptions);
}

private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
utf8: { writeErrors: false } | false;
} {
const enableUtf8Validation = options?.enableUtf8Validation;
if (enableUtf8Validation === false) {
return { utf8: false };
}
return { utf8: { writeErrors: false } };
}

/** Returns this document's bytes only */
Expand Down
19 changes: 0 additions & 19 deletions src/cmap/wire_protocol/responses.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import {
type Document,
Long,
parseToElementsToArray,
pluckBSONSerializeOptions,
type Timestamp
} from '../../bson';
import { MongoUnexpectedServerResponseError } from '../../error';
Expand Down Expand Up @@ -166,24 +165,6 @@ export class MongoDBResponse extends OnDemandDocument {
}
return this.clusterTime ?? null;
}

public override toObject(options?: BSONSerializeOptions): Record<string, any> {
const exactBSONOptions = {
...pluckBSONSerializeOptions(options ?? {}),
validation: this.parseBsonSerializationOptions(options)
};
return super.toObject(exactBSONOptions);
}

private parseBsonSerializationOptions(options?: { enableUtf8Validation?: boolean }): {
utf8: { writeErrors: false } | false;
} {
const enableUtf8Validation = options?.enableUtf8Validation;
if (enableUtf8Validation === false) {
return { utf8: false };
}
return { utf8: { writeErrors: false } };
}
}

/** @internal */
Expand Down
188 changes: 185 additions & 3 deletions test/integration/node-specific/bson-options/utf8_validation.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import { expect } from 'chai';
import * as net from 'net';
import * as sinon from 'sinon';
import { inspect } from 'util';

import {
BSON,
BSONError,
type Collection,
deserialize,
type MongoClient,
MongoDBResponse,
MongoServerError,
OnDemandDocument,
OpMsgResponse
} from '../../../mongodb';

Expand All @@ -23,12 +28,12 @@ describe('class MongoDBResponse', () => {
let bsonSpy: sinon.SinonSpy;

beforeEach(() => {
bsonSpy = sinon.spy(MongoDBResponse.prototype, 'parseBsonSerializationOptions');
// @ts-expect-error private function
bsonSpy = sinon.spy(OnDemandDocument.prototype, 'parseBsonSerializationOptions');
});

afterEach(() => {
bsonSpy?.restore();
// @ts-expect-error: Allow this to be garbage collected
bsonSpy = null;
});

Expand Down Expand Up @@ -153,3 +158,180 @@ describe('class MongoDBResponse', () => {
}
);
});

describe('utf8 validation with cursors', function () {
let client: MongoClient;
let collection: Collection;

/**
* Inserts a document with malformed utf8 bytes. This method spies on socket.write, and then waits
* for an OP_MSG payload corresponding to `collection.insertOne({ field: 'é' })`, and then modifies the
* bytes of the character 'é', to produce invalid utf8.
*/
async function insertDocumentWithInvalidUTF8() {
const stub = sinon.stub(net.Socket.prototype, 'write').callsFake(function (...args) {
const providedBuffer = args[0].toString('hex');
const targetBytes = Buffer.from(document.field, 'utf-8').toString('hex');

if (providedBuffer.includes(targetBytes)) {
if (providedBuffer.split(targetBytes).length !== 2) {
sinon.restore();
const message = `too many target bytes sequences: received ${providedBuffer.split(targetBytes).length}\n. command: ${inspect(deserialize(args[0]), { depth: Infinity })}`;
throw new Error(message);
}
const buffer = Buffer.from(providedBuffer.replace(targetBytes, 'c301'.repeat(8)), 'hex');
const result = stub.wrappedMethod.apply(this, [buffer]);
sinon.restore();
return result;
}
const result = stub.wrappedMethod.apply(this, args);
return result;
});

const document = {
field: 'é'.repeat(8)
};

await collection.insertOne(document);

sinon.restore();
}

beforeEach(async function () {
client = this.configuration.newClient();
await client.connect();
const db = client.db('test');
collection = db.collection('invalidutf');

await collection.deleteMany({});
await insertDocumentWithInvalidUTF8();
});

afterEach(async function () {
sinon.restore();
await client.close();
nbbeeken marked this conversation as resolved.
Show resolved Hide resolved
});

context('when utf-8 validation is explicitly disabled', function () {
it('documents can be read using a for-await loop without errors', async function () {
for await (const _doc of collection.find({}, { enableUtf8Validation: false }));
});
it('documents can be read using next() without errors', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });

while (await cursor.hasNext()) {
await cursor.next();
}
});

it('documents can be read using toArray() without errors', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });
await cursor.toArray();
});

it('documents can be read using .stream() without errors', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });
await cursor.stream().toArray();
});

it('documents can be read with tryNext() without error', async function () {
const cursor = collection.find({}, { enableUtf8Validation: false });

while (await cursor.hasNext()) {
await cursor.tryNext();
}
});
});

async function expectReject(fn: () => Promise<void>) {
try {
await fn();
expect.fail('expected the provided callback function to reject, but it did not.');
} catch (error) {
expect(error).to.match(/Invalid UTF-8 string in BSON document/);
expect(error).to.be.instanceOf(BSONError);
}
}

context('when utf-8 validation is explicitly enabled', function () {
it('a for-await loop throw a BSON error', async function () {
await expectReject(async () => {
for await (const _doc of collection.find({}, { enableUtf8Validation: true }));
});
});
it('next() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });

while (await cursor.hasNext()) {
await cursor.next();
}
});
});

it('toArray() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });
await cursor.toArray();
});
});

it('.stream() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });
await cursor.stream().toArray();
});
});

it('tryNext() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });

while (await cursor.hasNext()) {
await cursor.tryNext();
}
});
});
});

context('utf-8 validation defaults to enabled', function () {
it('a for-await loop throw a BSON error', async function () {
await expectReject(async () => {
for await (const _doc of collection.find({}));
});
});
it('next() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({});

while (await cursor.hasNext()) {
await cursor.next();
}
});
});

it('toArray() throws a BSON error', async function () {
await expectReject(async () => {
const cursor = collection.find({});
await cursor.toArray();
});
});

it('.stream() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({});
await cursor.stream().toArray();
});
});

it('tryNext() throws a BSONError', async function () {
await expectReject(async () => {
const cursor = collection.find({}, { enableUtf8Validation: true });

while (await cursor.hasNext()) {
await cursor.tryNext();
}
});
});
});
});
Loading