mongodb · blink1073 · May 31, 2024 · May 30, 2024 · May 30, 2024 · May 30, 2024
@@ -14,16 +14,7 @@ jobs:
  steps:
  - uses: actions/checkout@v4
  - uses: actions/setup-python@v4
-
- # ref: https://github.com/pre-commit/action
- - uses: pre-commit/action@v3.0.0
- - name: Help message if pre-commit fail
- if: ${{ failure() }}
+ - name: "Run pre-commit"
  run: |
- echo "You can install pre-commit hooks to automatically run formatting"
- echo "on each commit with:"
- echo " pre-commit install"
- echo "or you can run by hand on staged files with"
- echo " pre-commit run"
- echo "or after-the-fact on already committed files with"
- echo " pre-commit run --all-files --hook-stage manual"
+ pip install -U -q pre-commit
+ pre-commit run --all-files --hook-stage manual
@@ -1,7 +1,7 @@
 
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.5.0
+ rev: v4.6.0
  hooks:
  - id: check-case-conflict
  - id: check-executables-have-shebangs
@@ -15,7 +15,7 @@ repos:
 # We use the Python version instead of the original version which seems to require Docker
 # https://github.com/koalaman/shellcheck-precommit
 - repo: https://github.com/shellcheck-py/shellcheck-py
- rev: v0.9.0.6
+ rev: v0.10.0.1
  hooks:
  - id: shellcheck
  name: shellcheck
@@ -43,7 +43,7 @@ repos:
  [mdformat-gfm, mdformat-frontmatter, mdformat-footnote, mdformat-gfm-alerts]
 
 - repo: https://github.com/tcort/markdown-link-check
- rev: v3.11.2
+ rev: v3.12.2
  hooks:
  - id: markdown-link-check
  args: ["-c", "markdown_link_config.json"]
@@ -57,7 +57,7 @@ repos:
  stages: [manual]
 
 - repo: https://github.com/python-jsonschema/check-jsonschema
- rev: 0.27.3
+ rev: 0.28.4
  hooks:
  - id: check-github-workflows
 
@@ -69,10 +69,10 @@ repos:
  - id: rst-inline-touching-normal
 
 - repo: https://github.com/codespell-project/codespell
- rev: "v2.2.6"
+ rev: "v2.3.0"
  hooks:
  - id: codespell
- args: ["-L", "fle,re-use,merchantibility,synching,crate,nin,infinit,te"]
+ args: ["-L", "fle,re-use,merchantibility,synching,crate,nin,infinit,te,checkin"]
  exclude: |
  (?x)^(.*\.rst
  )$
@@ -1,7 +1,5 @@
 # Retryable Write Tests
 
-______________________________________________________________________
-
 ## Introduction
 
 The YAML and JSON files in this directory are platform-independent tests meant to exercise a driver's implementation of
@@ -106,17 +104,238 @@ Drivers should test that transactions IDs are always included in commands for su
 
 The following tests ensure that retryable writes work properly with replica sets and sharded clusters.
 
-1. Test that retryable writes raise an exception when using the MMAPv1 storage engine. For this test, execute a write
- operation, such as `insertOne`, which should generate an exception. Assert that the error message is the replacement
- error message:
+### 1. Test that retryable writes raise an exception when using the MMAPv1 storage engine.
+
+For this test, execute a write operation, such as `insertOne`, which should generate an exception. Assert that the error
+message is the replacement error message:
+
+```
+This MongoDB deployment does not support retryable writes. Please add
+retryWrites=false to your connection string.
+```
+
+and the error code is 20.
+
+> [!NOTE]
+> Drivers that rely on `serverStatus` to determine the storage engine in use MAY skip this test for sharded clusters,
+> since `mongos` does not report this information in its `serverStatus` response.
+
+### 2. Test that drivers properly retry after encountering PoolClearedErrors.
+
+This test MUST be implemented by any driver that implements the CMAP specification.
+
+This test requires MongoDB 4.3.4+ for both the `errorLabels` and `blockConnection` fail point options.
+
+1. Create a client with maxPoolSize=1 and retryWrites=true. If testing against a sharded deployment, be sure to connect
+ to only a single mongos.
+
+2. Enable the following failpoint:
+
+ ```javascript
+ {
+ configureFailPoint: "failCommand",
+ mode: { times: 1 },
+ data: {
+ failCommands: ["insert"],
+ errorCode: 91,
+ blockConnection: true,
+ blockTimeMS: 1000,
+ errorLabels: ["RetryableWriteError"]
+ }
+ }
+ ```
+
+3. Start two threads and attempt to perform an `insertOne` simultaneously on both.
+
+4. Verify that both `insertOne` attempts succeed.
+
+5. Via CMAP monitoring, assert that the first check out succeeds.
+
+6. Via CMAP monitoring, assert that a PoolClearedEvent is then emitted.
+
+7. Via CMAP monitoring, assert that the second check out then fails due to a connection error.
+
+8. Via Command Monitoring, assert that exactly three `insert` CommandStartedEvents were observed in total.
 
+9. Disable the failpoint.
+
+### 3. Test that drivers return the original error after encountering a WriteConcernError with a RetryableWriteError label.
+
+This test MUST:
+
+- be implemented by any driver that implements the Command Monitoring specification,
+- only run against replica sets as mongos does not propagate the NoWritesPerformed label to the drivers.
+- be run against server versions 6.0 and above.
+
+Additionally, this test requires drivers to set a fail point after an `insertOne` operation but before the subsequent
+retry. Drivers that are unable to set a failCommand after the CommandSucceededEvent SHOULD use mocking or write a unit
+test to cover the same sequence of events.
+
+1. Create a client with `retryWrites=true`.
+
+2. Configure a fail point with error code `91` (ShutdownInProgress):
+
+ ```javascript
+ {
+ configureFailPoint: "failCommand",
+ mode: {times: 1},
+ data: {
+ failCommands: ["insert"],
+ errorLabels: ["RetryableWriteError"],
+ writeConcernError: { code: 91 }
+ }
+ }
+ ```
+
+3. Via the command monitoring CommandSucceededEvent, configure a fail point with error code `10107` (NotWritablePrimary)
+ and a NoWritesPerformed label:
+
+ ```javascript
+ {
+ configureFailPoint: "failCommand",
+ mode: {times: 1},
+ data: {
+ failCommands: ["insert"],
+ errorCode: 10107,
+ errorLabels: ["RetryableWriteError", "NoWritesPerformed"]
+ }
+ }
  ```
- This MongoDB deployment does not support retryable writes. Please add
- retryWrites=false to your connection string.
+
+ Drivers SHOULD only configure the `10107` fail point command if the the succeeded event is for the `91` error
+ configured in step 2.
+
+4. Attempt an `insertOne` operation on any record for any database and collection. For the resulting error, assert that
+ the associated error code is `91`.
+
+5. Disable the fail point:
+
+ ```javascript
+ {
+ configureFailPoint: "failCommand",
+ mode: "off"
+ }
  ```
 
- and the error code is 20.
+### 4. Test that in a sharded cluster writes are retried on a different mongos when one is available.
+
+This test MUST be executed against a sharded cluster that has at least two mongos instances, supports
+`retryWrites=true`, has enabled the `configureFailPoint` command, and supports the `errorLabels` field (MongoDB 4.3.1+).
+
+> [!NOTE]
+> This test cannot reliably distinguish "retry on a different mongos due to server deprioritization" (the behavior
+> intended to be tested) from "retry on a different mongos due to normal SDAM randomized suitable server selection".
+> Verify relevant code paths are correctly executed by the tests using external means such as a logging, debugger, code
+> coverage tool, etc.
+
+1. Create two clients `s0` and `s1` that each connect to a single mongos from the sharded cluster. They must not connect
+ to the same mongos.
+
+2. Configure the following fail point for both `s0` and `s1`:
+
+ ```javascript
+ {
+ configureFailPoint: "failCommand",
+ mode: { times: 1 },
+ data: {
+ failCommands: ["insert"],
+ errorCode: 6,
+ errorLabels: ["RetryableWriteError"]
+ }
+ }
+ ```
+
+3. Create a client `client` with `retryWrites=true` that connects to the cluster using the same two mongoses as `s0` and
+ `s1`.
+
+4. Enable failed command event monitoring for `client`.
+
+5. Execute an `insert` command with `client`. Assert that the command failed.
+
+6. Assert that two failed command events occurred. Assert that the failed command events occurred on different mongoses.
+
+7. Disable the fail points on both `s0` and `s1`.
+
+### 5. Test that in a sharded cluster writes are retried on the same mongos when no others are available.
+
+This test MUST be executed against a sharded cluster that supports `retryWrites=true`, has enabled the
+`configureFailPoint` command, and supports the `errorLabels` field (MongoDB 4.3.1+).
+
+Note: this test cannot reliably distinguish "retry on a different mongos due to server deprioritization" (the behavior
+intended to be tested) from "retry on a different mongos due to normal SDAM behavior of randomized suitable server
+selection". Verify relevant code paths are correctly executed by the tests using external means such as a logging,
+debugger, code coverage tool, etc.
+
+1. Create a client `s0` that connects to a single mongos from the cluster.
+
+2. Configure the following fail point for `s0`:
+
+ ```javascript
+ {
+ configureFailPoint: "failCommand",
+ mode: { times: 1 },
+ data: {
+ failCommands: ["insert"],
+ errorCode: 6,
+ errorLabels: ["RetryableWriteError"],
+ closeConnection: true
+ }
+ }
+ ```
+
+3. Create a client `client` with `directConnection=false` (when not set by default) and `retryWrites=true` that connects
+ to the cluster using the same single mongos as `s0`.
+
+4. Enable succeeded and failed command event monitoring for `client`.
+
+5. Execute an `insert` command with `client`. Assert that the command succeeded.
+
+6. Assert that exactly one failed command event and one succeeded command event occurred. Assert that both events
+ occurred on the same mongos.
+
+7. Disable the fail point on `s0`.
+
+## Changelog
+
+- 2024-05-30: Migrated from reStructuredText to Markdown.
+
+- 2024-02-27: Convert legacy retryable writes tests to unified format.
+
+- 2024-02-21: Update prose test 4 and 5 to workaround SDAM behavior preventing\
+ execution of deprioritization code
+ paths.
+
+- 2024-01-05: Fix typo in prose test title.
+
+- 2024-01-03: Note server version requirements for fail point options and revise\
+ tests to specify the `errorLabels`
+ option at the top-level instead of within `writeConcernError`.
+
+- 2023-08-26: Add prose tests for retrying in a sharded cluster.
+
+- 2022-08-30: Add prose test verifying correct error handling for errors with\
+ the NoWritesPerformed label, which is to
+ return the original error.
+
+- 2022-04-22: Clarifications to `serverless` and `useMultipleMongoses`.
+
+- 2021-08-27: Add `serverless` to `runOn`. Clarify behavior of\
+ `useMultipleMongoses` for `LoadBalanced` topologies.
+
+- 2021-04-23: Add `load-balanced` to test topology requirements.
+
+- 2021-03-24: Add prose test verifying `PoolClearedErrors` are retried.
+
+- 2019-10-21: Add `errorLabelsContain` and `errorLabelsContain` fields to\
+ `result`
+
+- 2019-08-07: Add Prose Tests section
+
+- 2019-06-07: Mention $merge stage for aggregate alongside $out
+
+- 2019-03-01: Add top-level `runOn` field to denote server version and/or\
+ topology requirements requirements for the
+ test file. Removes the `minServerVersion` and `maxServerVersion` top-level fields, which are now expressed within
+ `runOn` elements.
 
- [!NOTE]
- storage engine in use MAY skip this test for sharded clusters, since `mongos` does not report this information in its
- `serverStatus` response.
+ Add test-level `useMultipleMongoses` field.
@@ -581,7 +581,7 @@ class Monitor(Thread):
  wait()
 
  def setUpConnection():
- # Take the mutex to avoid a data race becauase this code writes to the connection field and a concurrent
+ # Take the mutex to avoid a data race because this code writes to the connection field and a concurrent
  # cancelCheck call could be reading from it.
  with lock:
  # Server API versioning implies that the server supports hello.

@@ -636,7 +636,7 @@ Drivers MUST unpin a ClientSession in the following situations:
 1. The transaction is aborted. The session MUST be unpinned regardless of whether or the `abortTransaction` command
  succeeds or fails, or was executed at all. If the operation fails with a retryable error, the session MUST be
  unpinned before performing server selection for the retry.
-2. Any operation in the transcation, including `commitTransaction` fails with a TransientTransactionError. Transient
+2. Any operation in the transaction, including `commitTransaction` fails with a TransientTransactionError. Transient
  errors indicate that the transaction in question has already been aborted or that the pinnned mongos is
  down/unavailable. Unpinning the session ensures that a subsequent `abortTransaction` (or `commitTransaction`) does
  not block waiting on a server that is unreachable.

@@ -834,7 +834,7 @@ The structure of this object is as follows:
 
 - `ignoreResultAndError`: Optional boolean. If true, both the error and result for the operation MUST be ignored.
 
- This field is mutally exclusive with [expectResult](#operation_expectResult), [expectError](#operation_expectError),
+ This field is mutually exclusive with [expectResult](#operation_expectResult), [expectError](#operation_expectError),
  and [saveResultAsEntity](#operation_saveResultAsEntity).
 
  This field SHOULD NOT be used for [Special Test Operations](#special-test-operations) (i.e. `object: testRunner`).