From 56a2d83a0cafbeb798b2bc1619da067d74e01aa0 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Fri, 31 May 2024 12:05:31 -0500 Subject: [PATCH] DRIVERS-2870 Fix content of retryable-writes-test readme (#1590) --- .github/workflows/lint.yml | 15 +- .pre-commit-config.yaml | 12 +- source/retryable-writes/tests/README.md | 241 +++++++++++++++++- .../server-monitoring.md | 2 +- source/transactions/transactions.md | 2 +- .../unified-test-format.md | 2 +- 6 files changed, 242 insertions(+), 32 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 2d3490a0da..02f52237e2 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -14,16 +14,7 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v4 - - # ref: https://github.com/pre-commit/action - - uses: pre-commit/action@v3.0.0 - - name: Help message if pre-commit fail - if: ${{ failure() }} + - name: "Run pre-commit" run: | - echo "You can install pre-commit hooks to automatically run formatting" - echo "on each commit with:" - echo " pre-commit install" - echo "or you can run by hand on staged files with" - echo " pre-commit run" - echo "or after-the-fact on already committed files with" - echo " pre-commit run --all-files --hook-stage manual" + pip install -U -q pre-commit + pre-commit run --all-files --hook-stage manual diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index db6ed1322d..5355d35978 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-case-conflict - id: check-executables-have-shebangs @@ -15,7 +15,7 @@ repos: # We use the Python version instead of the original version which seems to require Docker # https://github.com/koalaman/shellcheck-precommit - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.9.0.6 + rev: v0.10.0.1 hooks: - id: shellcheck name: shellcheck @@ -43,7 +43,7 @@ repos: [mdformat-gfm, mdformat-frontmatter, mdformat-footnote, mdformat-gfm-alerts] - repo: https://github.com/tcort/markdown-link-check - rev: v3.11.2 + rev: v3.12.2 hooks: - id: markdown-link-check args: ["-c", "markdown_link_config.json"] @@ -57,7 +57,7 @@ repos: stages: [manual] - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.27.3 + rev: 0.28.4 hooks: - id: check-github-workflows @@ -69,10 +69,10 @@ repos: - id: rst-inline-touching-normal - repo: https://github.com/codespell-project/codespell - rev: "v2.2.6" + rev: "v2.3.0" hooks: - id: codespell - args: ["-L", "fle,re-use,merchantibility,synching,crate,nin,infinit,te"] + args: ["-L", "fle,re-use,merchantibility,synching,crate,nin,infinit,te,checkin"] exclude: | (?x)^(.*\.rst )$ diff --git a/source/retryable-writes/tests/README.md b/source/retryable-writes/tests/README.md index 749d29ee54..e883ca368d 100644 --- a/source/retryable-writes/tests/README.md +++ b/source/retryable-writes/tests/README.md @@ -1,7 +1,5 @@ # Retryable Write Tests -______________________________________________________________________ - ## Introduction The YAML and JSON files in this directory are platform-independent tests meant to exercise a driver's implementation of @@ -106,17 +104,238 @@ Drivers should test that transactions IDs are always included in commands for su The following tests ensure that retryable writes work properly with replica sets and sharded clusters. -1. Test that retryable writes raise an exception when using the MMAPv1 storage engine. For this test, execute a write - operation, such as `insertOne`, which should generate an exception. Assert that the error message is the replacement - error message: +### 1. Test that retryable writes raise an exception when using the MMAPv1 storage engine. + +For this test, execute a write operation, such as `insertOne`, which should generate an exception. Assert that the error +message is the replacement error message: + +``` +This MongoDB deployment does not support retryable writes. Please add +retryWrites=false to your connection string. +``` + +and the error code is 20. + +> [!NOTE] +> Drivers that rely on `serverStatus` to determine the storage engine in use MAY skip this test for sharded clusters, +> since `mongos` does not report this information in its `serverStatus` response. + +### 2. Test that drivers properly retry after encountering PoolClearedErrors. + +This test MUST be implemented by any driver that implements the CMAP specification. + +This test requires MongoDB 4.3.4+ for both the `errorLabels` and `blockConnection` fail point options. + +1. Create a client with maxPoolSize=1 and retryWrites=true. If testing against a sharded deployment, be sure to connect + to only a single mongos. + +2. Enable the following failpoint: + + ```javascript + { + configureFailPoint: "failCommand", + mode: { times: 1 }, + data: { + failCommands: ["insert"], + errorCode: 91, + blockConnection: true, + blockTimeMS: 1000, + errorLabels: ["RetryableWriteError"] + } + } + ``` + +3. Start two threads and attempt to perform an `insertOne` simultaneously on both. + +4. Verify that both `insertOne` attempts succeed. + +5. Via CMAP monitoring, assert that the first check out succeeds. + +6. Via CMAP monitoring, assert that a PoolClearedEvent is then emitted. + +7. Via CMAP monitoring, assert that the second check out then fails due to a connection error. + +8. Via Command Monitoring, assert that exactly three `insert` CommandStartedEvents were observed in total. +9. Disable the failpoint. + +### 3. Test that drivers return the original error after encountering a WriteConcernError with a RetryableWriteError label. + +This test MUST: + +- be implemented by any driver that implements the Command Monitoring specification, +- only run against replica sets as mongos does not propagate the NoWritesPerformed label to the drivers. +- be run against server versions 6.0 and above. + +Additionally, this test requires drivers to set a fail point after an `insertOne` operation but before the subsequent +retry. Drivers that are unable to set a failCommand after the CommandSucceededEvent SHOULD use mocking or write a unit +test to cover the same sequence of events. + +1. Create a client with `retryWrites=true`. + +2. Configure a fail point with error code `91` (ShutdownInProgress): + + ```javascript + { + configureFailPoint: "failCommand", + mode: {times: 1}, + data: { + failCommands: ["insert"], + errorLabels: ["RetryableWriteError"], + writeConcernError: { code: 91 } + } + } + ``` + +3. Via the command monitoring CommandSucceededEvent, configure a fail point with error code `10107` (NotWritablePrimary) + and a NoWritesPerformed label: + + ```javascript + { + configureFailPoint: "failCommand", + mode: {times: 1}, + data: { + failCommands: ["insert"], + errorCode: 10107, + errorLabels: ["RetryableWriteError", "NoWritesPerformed"] + } + } ``` - This MongoDB deployment does not support retryable writes. Please add - retryWrites=false to your connection string. + + Drivers SHOULD only configure the `10107` fail point command if the the succeeded event is for the `91` error + configured in step 2. + +4. Attempt an `insertOne` operation on any record for any database and collection. For the resulting error, assert that + the associated error code is `91`. + +5. Disable the fail point: + + ```javascript + { + configureFailPoint: "failCommand", + mode: "off" + } ``` - and the error code is 20. +### 4. Test that in a sharded cluster writes are retried on a different mongos when one is available. + +This test MUST be executed against a sharded cluster that has at least two mongos instances, supports +`retryWrites=true`, has enabled the `configureFailPoint` command, and supports the `errorLabels` field (MongoDB 4.3.1+). + +> [!NOTE] +> This test cannot reliably distinguish "retry on a different mongos due to server deprioritization" (the behavior +> intended to be tested) from "retry on a different mongos due to normal SDAM randomized suitable server selection". +> Verify relevant code paths are correctly executed by the tests using external means such as a logging, debugger, code +> coverage tool, etc. + +1. Create two clients `s0` and `s1` that each connect to a single mongos from the sharded cluster. They must not connect + to the same mongos. + +2. Configure the following fail point for both `s0` and `s1`: + + ```javascript + { + configureFailPoint: "failCommand", + mode: { times: 1 }, + data: { + failCommands: ["insert"], + errorCode: 6, + errorLabels: ["RetryableWriteError"] + } + } + ``` + +3. Create a client `client` with `retryWrites=true` that connects to the cluster using the same two mongoses as `s0` and + `s1`. + +4. Enable failed command event monitoring for `client`. + +5. Execute an `insert` command with `client`. Assert that the command failed. + +6. Assert that two failed command events occurred. Assert that the failed command events occurred on different mongoses. + +7. Disable the fail points on both `s0` and `s1`. + +### 5. Test that in a sharded cluster writes are retried on the same mongos when no others are available. + +This test MUST be executed against a sharded cluster that supports `retryWrites=true`, has enabled the +`configureFailPoint` command, and supports the `errorLabels` field (MongoDB 4.3.1+). + +Note: this test cannot reliably distinguish "retry on a different mongos due to server deprioritization" (the behavior +intended to be tested) from "retry on a different mongos due to normal SDAM behavior of randomized suitable server +selection". Verify relevant code paths are correctly executed by the tests using external means such as a logging, +debugger, code coverage tool, etc. + +1. Create a client `s0` that connects to a single mongos from the cluster. + +2. Configure the following fail point for `s0`: + + ```javascript + { + configureFailPoint: "failCommand", + mode: { times: 1 }, + data: { + failCommands: ["insert"], + errorCode: 6, + errorLabels: ["RetryableWriteError"], + closeConnection: true + } + } + ``` + +3. Create a client `client` with `directConnection=false` (when not set by default) and `retryWrites=true` that connects + to the cluster using the same single mongos as `s0`. + +4. Enable succeeded and failed command event monitoring for `client`. + +5. Execute an `insert` command with `client`. Assert that the command succeeded. + +6. Assert that exactly one failed command event and one succeeded command event occurred. Assert that both events + occurred on the same mongos. + +7. Disable the fail point on `s0`. + +## Changelog + +- 2024-05-30: Migrated from reStructuredText to Markdown. + +- 2024-02-27: Convert legacy retryable writes tests to unified format. + +- 2024-02-21: Update prose test 4 and 5 to workaround SDAM behavior preventing\ + execution of deprioritization code + paths. + +- 2024-01-05: Fix typo in prose test title. + +- 2024-01-03: Note server version requirements for fail point options and revise\ + tests to specify the `errorLabels` + option at the top-level instead of within `writeConcernError`. + +- 2023-08-26: Add prose tests for retrying in a sharded cluster. + +- 2022-08-30: Add prose test verifying correct error handling for errors with\ + the NoWritesPerformed label, which is to + return the original error. + +- 2022-04-22: Clarifications to `serverless` and `useMultipleMongoses`. + +- 2021-08-27: Add `serverless` to `runOn`. Clarify behavior of\ + `useMultipleMongoses` for `LoadBalanced` topologies. + +- 2021-04-23: Add `load-balanced` to test topology requirements. + +- 2021-03-24: Add prose test verifying `PoolClearedErrors` are retried. + +- 2019-10-21: Add `errorLabelsContain` and `errorLabelsContain` fields to\ + `result` + +- 2019-08-07: Add Prose Tests section + +- 2019-06-07: Mention $merge stage for aggregate alongside $out + +- 2019-03-01: Add top-level `runOn` field to denote server version and/or\ + topology requirements requirements for the + test file. Removes the `minServerVersion` and `maxServerVersion` top-level fields, which are now expressed within + `runOn` elements. - [!NOTE] - storage engine in use MAY skip this test for sharded clusters, since `mongos` does not report this information in its - `serverStatus` response. + Add test-level `useMultipleMongoses` field. diff --git a/source/server-discovery-and-monitoring/server-monitoring.md b/source/server-discovery-and-monitoring/server-monitoring.md index 2e30583a4d..640685f2aa 100644 --- a/source/server-discovery-and-monitoring/server-monitoring.md +++ b/source/server-discovery-and-monitoring/server-monitoring.md @@ -581,7 +581,7 @@ class Monitor(Thread): wait() def setUpConnection(): - # Take the mutex to avoid a data race becauase this code writes to the connection field and a concurrent + # Take the mutex to avoid a data race because this code writes to the connection field and a concurrent # cancelCheck call could be reading from it. with lock: # Server API versioning implies that the server supports hello. diff --git a/source/transactions/transactions.md b/source/transactions/transactions.md index 5c922eee58..0ad7dd559c 100644 --- a/source/transactions/transactions.md +++ b/source/transactions/transactions.md @@ -636,7 +636,7 @@ Drivers MUST unpin a ClientSession in the following situations: 1. The transaction is aborted. The session MUST be unpinned regardless of whether or the `abortTransaction` command succeeds or fails, or was executed at all. If the operation fails with a retryable error, the session MUST be unpinned before performing server selection for the retry. -2. Any operation in the transcation, including `commitTransaction` fails with a TransientTransactionError. Transient +2. Any operation in the transaction, including `commitTransaction` fails with a TransientTransactionError. Transient errors indicate that the transaction in question has already been aborted or that the pinnned mongos is down/unavailable. Unpinning the session ensures that a subsequent `abortTransaction` (or `commitTransaction`) does not block waiting on a server that is unreachable. diff --git a/source/unified-test-format/unified-test-format.md b/source/unified-test-format/unified-test-format.md index 689e281353..883c783fab 100644 --- a/source/unified-test-format/unified-test-format.md +++ b/source/unified-test-format/unified-test-format.md @@ -834,7 +834,7 @@ The structure of this object is as follows: - `ignoreResultAndError`: Optional boolean. If true, both the error and result for the operation MUST be ignored. - This field is mutally exclusive with [expectResult](#operation_expectResult), [expectError](#operation_expectError), + This field is mutually exclusive with [expectResult](#operation_expectResult), [expectError](#operation_expectError), and [saveResultAsEntity](#operation_saveResultAsEntity). This field SHOULD NOT be used for [Special Test Operations](#special-test-operations) (i.e. `object: testRunner`).