mongodb · blink1073 · May 31, 2024 · May 30, 2024 · May 30, 2024 · May 30, 2024
@@ -14,16 +14,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v4
-
-      # ref: https://github.com/pre-commit/action
-      - uses: pre-commit/action@v3.0.0
-      - name: Help message if pre-commit fail
-        if: ${{ failure() }}
+      - name: "Run pre-commit"
         run: |
-          echo "You can install pre-commit hooks to automatically run formatting"
-          echo "on each commit with:"
-          echo "    pre-commit install"
-          echo "or you can run by hand on staged files with"
-          echo "    pre-commit run"
-          echo "or after-the-fact on already committed files with"
-          echo "    pre-commit run --all-files --hook-stage manual"
+          pip install -U -q pre-commit
+          pre-commit run --all-files --hook-stage manual
@@ -1,7 +1,7 @@
 
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.5.0
+  rev: v4.6.0
   hooks:
   - id: check-case-conflict
   - id: check-executables-have-shebangs
@@ -15,7 +15,7 @@ repos:
 # We use the Python version instead of the original version which seems to require Docker
 # https://github.com/koalaman/shellcheck-precommit
 - repo: https://github.com/shellcheck-py/shellcheck-py
-  rev: v0.9.0.6
+  rev: v0.10.0.1
   hooks:
     - id: shellcheck
       name: shellcheck
@@ -43,7 +43,7 @@ repos:
         [mdformat-gfm, mdformat-frontmatter, mdformat-footnote, mdformat-gfm-alerts]
 
 - repo: https://github.com/tcort/markdown-link-check
-  rev: v3.11.2
+  rev: v3.12.2
   hooks:
     - id: markdown-link-check
       args: ["-c", "markdown_link_config.json"]
@@ -57,7 +57,7 @@ repos:
     stages: [manual]
 
 - repo: https://github.com/python-jsonschema/check-jsonschema
-  rev: 0.27.3
+  rev: 0.28.4
   hooks:
     - id: check-github-workflows
 
@@ -69,10 +69,10 @@ repos:
     - id: rst-inline-touching-normal
 
 - repo: https://github.com/codespell-project/codespell
-  rev: "v2.2.6"
+  rev: "v2.3.0"
   hooks:
   - id: codespell
-    args: ["-L", "fle,re-use,merchantibility,synching,crate,nin,infinit,te"]
+    args: ["-L", "fle,re-use,merchantibility,synching,crate,nin,infinit,te,checkin"]
     exclude: |
             (?x)^(.*\.rst
             )$
@@ -1,7 +1,5 @@
 # Retryable Write Tests
 
-______________________________________________________________________
-
 ## Introduction
 
 The YAML and JSON files in this directory are platform-independent tests meant to exercise a driver's implementation of
@@ -106,17 +104,199 @@ Drivers should test that transactions IDs are always included in commands for su
 
 The following tests ensure that retryable writes work properly with replica sets and sharded clusters.
 
-1. Test that retryable writes raise an exception when using the MMAPv1 storage engine. For this test, execute a write
-   operation, such as `insertOne`, which should generate an exception. Assert that the error message is the replacement
-   error message:
+### 1. Test that retryable writes raise an exception when using the MMAPv1 storage engine.
+
+For this test, execute a write operation, such as `insertOne`, which should generate an exception. Assert that the error
+message is the replacement error message:
+
+```
+This MongoDB deployment does not support retryable writes. Please add
+retryWrites=false to your connection string.
+```
+
+and the error code is 20.
+
+> [!NOTE]
+> Drivers that rely on `serverStatus` to determine the storage engine in use MAY skip this test for sharded clusters,
+> since `mongos` does not report this information in its `serverStatus` response.
+
+### 2. Test that drivers properly retry after encountering PoolClearedErrors.
+
+This test MUST be implemented by any driver that implements the CMAP specification.
+
+This test requires MongoDB 4.3.4+ for both the `errorLabels` and `blockConnection` fail point options.
+
+1. Create a client with maxPoolSize=1 and retryWrites=true. If testing against a sharded deployment, be sure to connect
+   to only a single mongos.
+
+2. Enable the following failpoint:
+
+   ```javascript
+   {
+       configureFailPoint: "failCommand",
+       mode: { times: 1 },
+       data: {
+           failCommands: ["insert"],
+           errorCode: 91,
+           blockConnection: true,
+           blockTimeMS: 1000,
+           errorLabels: ["RetryableWriteError"]
+       }
+   }
+   ```
+
+3. Start two threads and attempt to perform an `insertOne` simultaneously on both.
+
+4. Verify that both `insertOne` attempts succeed.
+
+5. Via CMAP monitoring, assert that the first check out succeeds.
 
+6. Via CMAP monitoring, assert that a PoolClearedEvent is then emitted.
+
+7. Via CMAP monitoring, assert that the second check out then fails due to a connection error.
+
+8. Via Command Monitoring, assert that exactly three `insert` CommandStartedEvents were observed in total.
+
+9. Disable the failpoint.
+
+### 3. Test that drivers return the original error after encountering a WriteConcernError with a RetryableWriteError label.
+
+This test MUST:
+
+- be implemented by any driver that implements the Command Monitoring specification,
+- only run against replica sets as mongos does not propagate the NoWritesPerformed label to the drivers.
+- be run against server versions 6.0 and above.
+
+Additionally, this test requires drivers to set a fail point after an `insertOne` operation but before the
+subsequent retry. Drivers that are unable to set a failCommand after the CommandSucceededEvent SHOULD use mocking or
+write a unit test to cover the same sequence of events.
+
+1. Create a client with `retryWrites=true`.
+
+2. Configure a fail point with error code `91` (ShutdownInProgress):
+
+   ```javascript
+   {
+       configureFailPoint: "failCommand",
+       mode: {times: 1},
+       data: {
+           failCommands: ["insert"],
+           errorLabels: ["RetryableWriteError"],
+           writeConcernError: { code: 91 }
+       }
+   }
    ```
-   This MongoDB deployment does not support retryable writes. Please add
-   retryWrites=false to your connection string.
+
+3. Via the command monitoring CommandSucceededEvent, configure a fail point with error code `10107` (NotWritablePrimary)
+   and a NoWritesPerformed label:
+
+   ```javascript
+   {
+       configureFailPoint: "failCommand",
+       mode: {times: 1},
+       data: {
+           failCommands: ["insert"],
+           errorCode: 10107,
+           errorLabels: ["RetryableWriteError", "NoWritesPerformed"]
+       }
+   }
    ```
 
-   and the error code is 20.
+   Drivers SHOULD only configure the `10107` fail point command if the the succeeded event is for the `91` error
+   configured in step 2.
+
+4. Attempt an `insertOne` operation on any record for any database and collection. For the resulting error, assert that
+   the associated error code is `91`.
+
+5. Disable the fail point:
+
+   ```javascript
+   {
+       configureFailPoint: "failCommand",
+       mode: "off"
+   }
+   ```
+
+### 4. Test that in a sharded cluster writes are retried on a different mongos when one is available.
+
+This test MUST be executed against a sharded cluster that has at least two mongos instances, supports
+`retryWrites=true`, has enabled the `configureFailPoint` command, and supports the `errorLabels` field (MongoDB 4.3.1+).
+
+> [!NOTE]
+> This test cannot reliably distinguish "retry on a different mongos due to server deprioritization" (the behavior
+> intended to be tested) from "retry on a different mongos due to normal SDAM randomized suitable server selection".
+> Verify relevant code paths are correctly executed by the tests using external means such as a logging, debugger, code
+> coverage tool, etc.
+
+1. Create two clients `s0` and `s1` that each connect to a single mongos from the sharded cluster. They must not connect
+   to the same mongos.
+
+2. Configure the following fail point for both `s0` and `s1`:
+
+   ```javascript
+   {
+       configureFailPoint: "failCommand",
+       mode: { times: 1 },
+       data: {
+           failCommands: ["insert"],
+           errorCode: 6,
+           errorLabels: ["RetryableWriteError"]
+       }
+   }
+   ```
+
+3. Create a client `client` with `retryWrites=true` that connects to the cluster using the same two mongoses as `s0` and
+   `s1`.
+
+4. Enable failed command event monitoring for `client`.
+
+5. Execute an `insert` command with `client`. Assert that the command failed.
+
+6. Assert that two failed command events occurred. Assert that the failed command events occurred on different mongoses.
+
+7. Disable the fail points on both `s0` and `s1`.
+
+## Changelog
+
+- 2024-05-30: Migrated from reStructuredText to Markdown.
+
+- 2024-02-27: Convert legacy retryable writes tests to unified format.
+
+- 2024-02-21: Update prose test 4 and 5 to workaround SDAM behavior preventing\
+  execution of deprioritization code
+  paths.
+
+- 2024-01-05: Fix typo in prose test title.
+
+- 2024-01-03: Note server version requirements for fail point options and revise\
+  tests to specify the `errorLabels`
+  option at the top-level instead of within `writeConcernError`.
+
+- 2023-08-26: Add prose tests for retrying in a sharded cluster.
+
+- 2022-08-30: Add prose test verifying correct error handling for errors with\
+  the NoWritesPerformed label, which is to
+  return the original error.
+
+- 2022-04-22: Clarifications to `serverless` and `useMultipleMongoses`.
+
+- 2021-08-27: Add `serverless` to `runOn`. Clarify behavior of\
+  `useMultipleMongoses` for `LoadBalanced` topologies.
+
+- 2021-04-23: Add `load-balanced` to test topology requirements.
+
+- 2021-03-24: Add prose test verifying `PoolClearedErrors` are retried.
+
+- 2019-10-21: Add `errorLabelsContain` and `errorLabelsContain` fields to\
+  `result`
+
+- 2019-08-07: Add Prose Tests section
+
+- 2019-06-07: Mention $merge stage for aggregate alongside $out
+
+- 2019-03-01: Add top-level `runOn` field to denote server version and/or\
+  topology requirements requirements for the
+  test file. Removes the `minServerVersion` and `maxServerVersion` top-level fields, which are now expressed within
+  `runOn` elements.
 
-   [!NOTE]
-   storage engine in use MAY skip this test for sharded clusters, since `mongos` does not report this information in its
-   `serverStatus` response.
+  Add test-level `useMultipleMongoses` field.
@@ -581,7 +581,7 @@ class Monitor(Thread):
           wait()
 
   def setUpConnection():
-      # Take the mutex to avoid a data race becauase this code writes to the connection field and a concurrent
+      # Take the mutex to avoid a data race because this code writes to the connection field and a concurrent
       # cancelCheck call could be reading from it.
       with lock:
           # Server API versioning implies that the server supports hello.

@@ -636,7 +636,7 @@ Drivers MUST unpin a ClientSession in the following situations:
 1. The transaction is aborted. The session MUST be unpinned regardless of whether or the `abortTransaction` command
    succeeds or fails, or was executed at all. If the operation fails with a retryable error, the session MUST be
    unpinned before performing server selection for the retry.
-2. Any operation in the transcation, including `commitTransaction` fails with a TransientTransactionError. Transient
+2. Any operation in the transaction, including `commitTransaction` fails with a TransientTransactionError. Transient
    errors indicate that the transaction in question has already been aborted or that the pinnned mongos is
    down/unavailable. Unpinning the session ensures that a subsequent `abortTransaction` (or `commitTransaction`) does
    not block waiting on a server that is unreachable.

@@ -834,7 +834,7 @@ The structure of this object is as follows:
 
 - `ignoreResultAndError`: Optional boolean. If true, both the error and result for the operation MUST be ignored.
 
-  This field is mutally exclusive with [expectResult](#operation_expectResult), [expectError](#operation_expectError),
+  This field is mutually exclusive with [expectResult](#operation_expectResult), [expectError](#operation_expectError),
   and [saveResultAsEntity](#operation_saveResultAsEntity).
 
   This field SHOULD NOT be used for [Special Test Operations](#special-test-operations) (i.e. `object: testRunner`).