From f3d26ba60af7980cf13a9052f57a9bc9f6ad534d Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Tue, 23 Sep 2025 11:35:53 -0700 Subject: [PATCH 01/14] DRIVERS-2884: CSOT avoid connection churn when operations timeout --- .../tests/pending-response.json | 661 +++++++++ .../tests/pending-response.yml | 349 +++++ .../connection-monitoring-and-pooling.md | 294 +++- .../tests/README.md | 68 + .../logging/connection-logging-csot.json | 578 ++++++++ .../tests/logging/connection-logging-csot.yml | 254 ++++ .../tests/logging/connection-logging.yml | 1 + source/unified-test-format/schema-1.28.json | 1264 +++++++++++++++++ source/unified-test-format/schema-latest.json | 22 + .../unified-test-format.md | 31 +- 10 files changed, 3513 insertions(+), 9 deletions(-) create mode 100644 source/client-side-operations-timeout/tests/pending-response.json create mode 100644 source/client-side-operations-timeout/tests/pending-response.yml create mode 100644 source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json create mode 100644 source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.yml create mode 100644 source/unified-test-format/schema-1.28.json diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json new file mode 100644 index 0000000000..278368da57 --- /dev/null +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -0,0 +1,661 @@ +{ + "description": "Connection churn is prevented by reading pending responses during connection checkout", + "schemaVersion": "1.27", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "single", + "replicaset" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "failPointClient", + "useMultipleMongoses": false + } + }, + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1, + "minPoolSize": 1 + }, + "awaitMinPoolSizeMS": 10000, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "commandSucceededEvent", + "connectionCheckedOutEvent", + "connectionCheckedInEvent", + "connectionClosedEvent", + "connectionPendingResponseSucceeded", + "connectionPendingResponseStarted", + "connectionPendingResponseFailed" + ] + } + }, + { + "database": { + "id": "test", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "coll", + "database": "test", + "collectionName": "coll" + } + } + ], + "initialData": [ + { + "collectionName": "coll", + "databaseName": "test", + "documents": [] + } + ], + "tests": [ + { + "description": "non-timeout op with response and no error", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 100 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 75, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseSucceeded": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + }, + { + "description": "non-timeout op with no response", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 3100 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseFailed": { + "reason": "timeout" + } + }, + { + "connectionClosedEvent": { + "reason": "error" + } + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + }, + { + "description": "timeout op with response and no error", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 250 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 75, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "timeoutMS": 200, + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseSucceeded": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + }, + { + "description": "multiple calls to drain buffer", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 500 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "filter": { + "_id": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseFailed": { + "reason": "timeout" + } + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseSucceeded": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + }, + { + "description": "read op retries when connection closes server-side while draining response", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 500, + "closeConnection": true + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "wait", + "object": "testRunner", + "arguments": { + "ms": 500 + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "filter": { + "_id": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseFailed": { + "reason": "error" + } + }, + { + "connectionClosedEvent": { + "reason": "error" + } + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + }, + { + "description": "write op retries when connection closes server-side while draining response", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 500, + "closeConnection": true + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "wait", + "object": "testRunner", + "arguments": { + "ms": 500 + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionPendingResponseStarted": {} + }, + { + "connectionPendingResponseFailed": { + "reason": "error" + } + }, + { + "connectionClosedEvent": { + "reason": "error" + } + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + } + ] +} diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml new file mode 100644 index 0000000000..4cc2f613fb --- /dev/null +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -0,0 +1,349 @@ +description: "Connection churn is prevented by reading pending responses during connection checkout" +schemaVersion: "1.27" +runOnRequirements: + - minServerVersion: "4.4" + # TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired + # after maxTimeMS, whereas mongod returns it after + # max(blockTimeMS, maxTimeMS). Until this ticket is resolved, these tests + # will not pass on sharded clusters. + topologies: ["single", "replicaset"] +createEntities: + - client: + id: &failPointClient failPointClient + useMultipleMongoses: false + - client: + id: &client client + uriOptions: + maxPoolSize: 1 + minPoolSize: 1 + awaitMinPoolSizeMS: 10000 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - commandSucceededEvent + - connectionCheckedOutEvent + - connectionCheckedInEvent + - connectionClosedEvent + - connectionPendingResponseSucceeded + - connectionPendingResponseStarted + - connectionPendingResponseFailed + - database: + id: &database test + client: *client + databaseName: *database + - collection: + id: &collection coll + database: *database + collectionName: *collection +initialData: + - collectionName: *collection + databaseName: *database + documents: [] +tests: + # Attempting a pending response read on a non-timeout operation that can + # immediately read from the TCP buffer should complete the pending read and + # the connection should be checked out. + - description: "non-timeout op with response and no error" + operations: + # Create a failpoint to block the first operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 100 + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 75 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + # Execute a subsequent operation to complete the read. + - name: find + object: *collection + arguments: + filter: {_id: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: find + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} + - connectionPendingResponseSucceeded: {} # Find operation drains connection. + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Find succeeds. + # Attempting a pending response read on a non-timeout operation that gets no + # response from the server after 3s should close the connection. + - description: "non-timeout op with no response" + operations: + # Create a failpoint to block the first operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 3100 + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + # Execute a subsequent operation to complete the read. We expect failure + # in the pending read, resulting in a closed connection. However, the + # find should retry and succeed. + - name: find + object: *collection + arguments: + filter: {_id: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: find + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} + - connectionPendingResponseFailed: + reason: timeout + - connectionClosedEvent: + reason: error + - connectionCheckedOutEvent: {} # Find's retry starts and succeeds + - connectionCheckedInEvent: {} + # Attempting a pending response read on a realistic timeout operation that can + # immediately read from the TCP buffer should complete the pending read and + # the connection should be checked out. + - description: "timeout op with response and no error" + operations: + # Create a failpoint to block the first operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 250 + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 75 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + # Execute a subsequent operation to complete the read. + - name: find + object: *collection + arguments: + timeoutMS: 200 + filter: {_id: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: find + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} + - connectionPendingResponseSucceeded: {} + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Find succeeds. + # It may take multiple calls to the pending response handler to drain the + # inbound buffer. + - description: "multiple calls to drain buffer" + operations: + # Create a failpoint to block the first and second operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 500 + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + # Execute a subsequent operation with a timeout less than the block time. + - name: find + object: *collection + arguments: + timeoutMS: 50 + filter: {_id: 1} + expectError: + isTimeoutError: true + # Execute a final operation to drain the buffer. + - name: find + object: *collection + arguments: + filter: {_id: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: find + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} # First find fails + - connectionPendingResponseFailed: + reason: timeout + - connectionPendingResponseStarted: {} # Second find drains the buffer. + - connectionPendingResponseSucceeded: {} + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Second find succeeds. + # If the connection is closed server-side while draining the response, the + # driver must retry with a different connection. + - description: "read op retries when connection closes server-side while draining response" + operations: + # Create a failpoint to block the first and second operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 500 + closeConnection: true + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + - name: wait + object: testRunner + arguments: + ms: 500 + # Execute a subsequent operation with a timeout less than the block time. + - name: find + object: *collection + arguments: + timeoutMS: 50 + filter: {_id: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: find + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} # Pending read fails on first find + - connectionPendingResponseFailed: + reason: error + - connectionClosedEvent: + reason: error + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Find finishes. + # If the connection is closed server-side while draining the response, the + # driver must retry with a different connection. + - description: "write op retries when connection closes server-side while draining response" + operations: + # Create a failpoint to block the first and second operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 500 + closeConnection: true + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + - name: wait + object: testRunner + arguments: + ms: 500 + # Execute a subsequent operation with a timeout less than the block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: insert + - client: *client + eventType: cmap + events: + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Insert fails. + - connectionPendingResponseStarted: {} # Pending read fails on first find + - connectionPendingResponseFailed: + reason: error + - connectionClosedEvent: + reason: error + - connectionCheckedOutEvent: {} + - connectionCheckedInEvent: {} # Find finishes. diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index b6e5ebc81b..c631275494 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -217,17 +217,19 @@ interface Connection { * The current state of the Connection. * * Possible values are the following: - * - "pending": The Connection has been created but has not yet been established. Contributes to - * totalConnectionCount and pendingConnectionCount. + * - "pending": The Connection has been created but has not yet been established. Contributes to + * totalConnectionCount and pendingConnectionCount. * - * - "available": The Connection has been established and is waiting in the pool to be checked - * out. Contributes to both totalConnectionCount and availableConnectionCount. + * - "pending response": The Connection is expecting a response for an operation that was timed out, but server's response was not fully read from the connection. * - * - "in use": The Connection has been established, checked out from the pool, and has yet - * to be checked back in. Contributes to totalConnectionCount. + * - "available": The Connection has been established and is waiting in the pool to be checked + * out. Contributes to both totalConnectionCount and availableConnectionCount. * - * - "closed": The Connection has had its socket closed and cannot be used for any future - * operations. Does not contribute to any connection counts. + * - "in use": The Connection has been established, checked out from the pool, and has yet + * to be checked back in. Contributes to totalConnectionCount. + * + * - "closed": The Connection has had its socket closed and cannot be used for any future + * operations. Does not contribute to any connection counts. * * Note: this field is mainly used for the purposes of describing state * in this specification. It is not required that drivers @@ -576,6 +578,87 @@ other threads from checking out [Connections](#connection) while establishing a Before a given [Connection](#connection) is returned from checkOut, it must be marked as "in use", and the pool's availableConnectionCount MUST be decremented. +##### Draining of Pending Response (drivers that support CSOT) + +If an operation times out while awaiting a server response: + +- **Non-exhaust commands**: If the command was not sent with the `exhaustAllowed` OP_MSG bit flag, the driver MUST: + 1. Put the connection into a "pending response" state. + 2. Record the current time in a way that can be updated while draining of pending response. +- **Exhaust commands**: If the command was sent with the `exhaustAllowed` bit, the driver MUST NOT transition the + connection to a "pending response" state and MUST instead close the connection. + +The next time connection in the "pending response" state is checked out, the driver MUST ensure that any remaining +response data is drained and the connection is ready to send the next command. + +1. **Persist and update timestamp**: The connection must record the current time immediately after the original timeout. + This timestamp MUST be updated to the current time whenever any bytes are successfully read, received, or consumed + while draining of the pending response. +2. **Aliveness check**: If driver uses synchronous read from the socket and the timestamp is older than 3 seconds, the + driver MUST perform aliveness check either performing a non-blocking read or using the minimal possible timeout to + check if at least one byte can be read/received. If at least one byte can be read the timestamp must be updated and + connection SHOULD be drained. If no bytes can be read, the connection MUST be closed. +3. **User-provided timeout**: If a user-provided timeout is specified for the operation, the driver MUST use the minimum + of (a) the remaining time before the 3 second "pending response" window elapses and (b) the remaining of + user-provided timeout as the effective timeout for the draining. +4. **Default timeout**: If no user-provided timeout is specified, the driver MUST use the minimum of (a) the remaining 3 + second "pending response" window and (b) the `waitQueueTimeoutMS` (if not supported by the driver remaining of + `serverSelectionTimeoutMS` should be used instead) as the effective timeout for the draining. +5. **On Timeout**: if timeout occurred while draining the pending response and the 3 seconds pending-response window was + not elapsed, Driver SHOULD return the connection to the pool, so the next operation can continue draining of the + pending response. +6. **Error or over-age**: If any error other than timeout occurs while draining the pending response, or if the draining + exceeds the 3 seconds pending-response window, the driver MUST close the connection. Driver SHOULD try to make + another checkout attempt accordingly to the Checking Out a Connection behavior. +7. **Clear pending state on success**: If the pending response is fully drained, the pending response state MUST be + cleared and connection can be used to execute the next command. + +```mermaid +sequenceDiagram + participant Driver + participant Pool + participant Conn as Connection (*) + participant Server + + Driver->>Pool: Checkout Connection (*) + Pool->>Driver: Return connection (*) + Driver->>Conn: Send operation (1) (CSOT enabled, maxTimeMS > 0, exhaustAllowed = false) + Conn->>Server: Send command + Server-->>Conn: (No response, socket times out) + + Conn->>Conn: Transition connection to "pending response" state, record current time + Conn->>Pool: Check connection back into pool + Pool->>Driver: Error + + Driver->>Pool: Checkout Connection (*) + Pool->>Conn: Draining of pending response from operation (1) + Conn->>Conn: Update pending read timestamp if bytes read + alt No error + Conn->>Conn: Clear pending response state + Conn->>Pool: Draining of pending response succeeded + Pool->>Driver: Return connection + else Error + alt Non-Timeout Error + Conn->>Conn: Close connection + Conn->>Pool: Draining of pending response failed + Pool->>Pool: Checkout next connection + else Timeout Error + alt Timeout window exceeded + Conn->>Conn: Close connection + Conn->>Pool: Draining of pending response failed + Pool->>Driver: Error + else Timeout window not exceeded + Conn->>Pool: Check connection back into pool + Pool->>Driver: Error + end + end + end +``` + +##### Pseudocode + +This subsection outlines the pseudocode steps for acquiring a connection from the pool. + ```text connection = Null tConnectionCheckOutStarted = current instant (use a monotonic clock if possible) @@ -592,6 +675,18 @@ try: if connection is perished: close connection connection = Null + if connection is in "pending response" state: + drain the pending response + if error: + if non-timeout error: + close connection + connection = Null + else if timeout error and last read timestamp on connection > 3 seconds old + close connection + connection = Null + else + check in the connection + connection = Null else if totalConnectionCount < maxPoolSize: if pendingConnectionCount < maxConnecting: connection = create connection @@ -633,6 +728,7 @@ if connection state is "pending": decrement pendingConnectionCount else: decrement availableConnectionCount + set connection state to "in use" # If there is no background thread, the pool MUST ensure that @@ -997,6 +1093,96 @@ interface ConnectionCheckedInEvent { */ connectionId: int64; } + +/** + * Emitted when the connection being checked out is attempting to read and + * discard a pending server response. + */ +interface PendingResponseStarted { + /** + * The ServerAddress of the Endpoint the pool is attempting to connect to. + */ + address: string; + + /** + * The ID of the Connection. + */ + connectionId: int64; + + /** + * The driver-generated request ID associated with the network timeout. + */ + requestId: int64; +} + +/** + * Emitted when the connection successfully read the pending read and is ready + * to be checked out. + */ +interface PendingResponseSucceeded { + /** + * The ServerAddress of the Endpoint the pool is attempting to connect to. + */ + address: string; + + /** + * The ID of the Connection. + */ + connectionId: int64; + + /** + * The time it took to complete the pending read. Measured from when the + * `PendingResponseStarted` event is emitted as part of the connection + * checkout. + * + * Drivers SHOULD choose an idiomatic duration type. If the type doesn't + * include units (e.g. `int64`), include them in the name (for example, + * `durationMS`). + */ + duration: Duration; + + /** + * The driver-generated request ID associated with the network timeout. + */ + requestId: int64; +} + +/** + * Emitted when the connection being checked out failed to complete the pending + * read. + */ +interface PendingResponseFailed { + /** + * The ServerAddress of the Endpoint the pool is attempting to connect to. + */ + address: string; + + /** + * The ID of the Connection. + */ + connectionId: int64; + + /** + * The driver-generated request ID associated with the network timeout. + */ + requestId: int64; + + /** + * Duration until the pending response drain failed. Measured from when the + * `PendingResponseStarted` event is emitted as part of the same connection + * checkout. + * + * Drivers SHOULD choose an idiomatic duration type. If the type doesn't + * include units (e.g. `int64`), include them in the name (for example, + * `durationMS`). + */ + duration: Duration; + + /** + * The reason for why the pending read failed. + */ + reason: string; +} ``` ### Connection Pool Logging @@ -1194,6 +1380,57 @@ placeholders as appropriate: > Connection checked in: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}} +#### Connection Pending Response Started + +In addition to the common fields defined above, this message MUST contain the following key-value pairs: + +| Key | Suggested Type | Value | +| ------------------ | -------------- | ------------------------------------------------------------------- | +| message | string | "Pending response started" | +| driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID associated with the network timeout | + +The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in +placeholders as appropriate: + +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, request +> ID={{requestId}} + +#### Connection Pending Response Succeeded + +In addition to the common fields defined above, this message MUST contain the following key-value pairs: + +| Key | Suggested Type | Value | +| ------------------ | ------------------ | -------------------------------------------------------------------- | +| message | string | "Pending response succeeded" | +| driverConnectionId | int64 | The driver-generated ID for the connection. | +| requestId | int64 | The driver-generated request ID associated with the network timeout. | +| durationMS | Int32/Int64/Double | `PendingResponseSucceeded.duration` converted to milliseconds. | + +The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in +placeholders as appropriate: + +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, request +> ID={{requestId}}, duration={{durationMS}} ms + +#### Connection Pending Response Failed + +In addition to the common fields defined above, this message MUST contain the following key-value pairs: + +| Key | Suggested Type | Value | +| ------------------ | ------------------ | ------------------------------------------------------------------- | +| message | string | "Pending response failed" | +| driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID associated with the network timeout | +| reason | string | The reason for why the pending response read failed | +| durationMS | Int32/Int64/Double | `PendingResponseFailed.duration` converted to milliseconds. | + +The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in +placeholders as appropriate: + +> Pending response started: address={{serverHost}}:{{serverPort}}, driver-generated ID={{driverConnectionId}}, request +> ID={{requestId}}, reason={{reason}}, duration={{durationMS}} ms + ### Connection Pool Errors A connection pool throws errors in specific circumstances. These Errors MUST be emitted by the pool. Errors SHOULD be @@ -1339,6 +1576,47 @@ some equivalent configuration, but this configuration will also require target f 5.0. The advantage of using Background Thread to manage perished connections is that it will work regardless of environment setup. +### Why is the pending response timeout not calculated dynamically? Why is it specifically 3 seconds? + +Using a dynamic timeout introduces additional complexity. In particular, RTT is an unreliable metric for predicting +future operation latency as both server and network conditions are unpredictable. Benchmarks demonstrate that even in +slow, high-latency scenarios (e.g. draining a 16MiB document over cross-country), reads reliably complete in under 1 +second. The 3-second timeout ensures that every realistic pending response draining will not result in premature +connection closure, but will still close in pathological conditions (e.g., a dead server or true network outage). + +### Why is the pending response read timeout not configurable? + +Because of the +["no knobs" mantra](https://github.com/mongodb/specifications/blob/master/source/driver-mantras.md#no-knobs). We can +always reconsider this in the future. + +### Why does the pending response timeout include the time the connection is idle in the pool? + +The pending response timeout includes idle time in the pool so that stale or unusable connections (e.g., if the socket +is dead) are detected and closed promptly instead of incurring an additional 3-second wait upon checkout. By tracking +the timeout during idle periods, we ensure that the driver can quickly determine if the connection should be closed with +a fast, non-blocking check as soon as it's checked out, avoiding introducing unnecessary latency while still protecting +connection availability. This approach maintains a balance between minimizing connection churn and ensuring users don't +encounter avoidable delays. + +### Why do we refresh the pending response timeout each time we successfully drain bytes from the TCP stream? + +By refreshing the timeout after each successful draining, we acknowledge that progress is being made, and we provide a +new time window for the next segment of data to arrive. Refreshing is less costly than re-establishing a connection +since there is no reason to believe that a new connection would reduce latency. + +### Why are exhaust cursors prohibited from transitioning a connection to the "pending response" state? + +Exhaust cursors are incompatible with the "pending response" connection state due to the non-deterministic nature of the +connection's completion, which occurs only when `moreToCome=0` is received. Consequently, discarding one of these +responses does not restore the connection to a reusable state. + +### Async IO Considerations for Awaiting a Pending Response + +Several drivers (e.g., event-loop or background-read designs) perform socket I/O asynchronously. After a socket times +out, the server's reply may be drained while the connection is idle in the pool. Therefore, the aliveness check only +applies to an undrained connection that has exceeded the pending-response window without progress. + ## Backwards Compatibility As mentioned in [Deprecated Options](#deprecated-options), some drivers currently implement the options `waitQueueSize` diff --git a/source/connection-monitoring-and-pooling/tests/README.md b/source/connection-monitoring-and-pooling/tests/README.md index 3d8aee40aa..fd8f0db706 100644 --- a/source/connection-monitoring-and-pooling/tests/README.md +++ b/source/connection-monitoring-and-pooling/tests/README.md @@ -21,6 +21,74 @@ The following tests have not yet been automated, but MUST still be tested: 5. When a check out attempt fails because connection set up throws an error, assert that a ConnectionCheckOutFailedEvent with reason="connectionError" is emitted. +### Pending Response + +Due to the complexity of managing a proxy layer, the following qualifying tests should only be run for non-ssl, +non-auth, and non-compression connections. + +#### Recover Partially Read Response + +This test verifies that if only part of a response was read before the timeout, the driver can drain the rest of the +response and reuse the connection for the next operation. + +1. Connect to the proxy server with `maxPoolSize=1` and `direct=true`, subscribing to the following CMAP events: + - `PendingResponseStarted` + - `PendingResponseSucceeded` + - `PendingResponseFailed` + - `ConnectionClosed` +2. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: + - `sendBytes`: We have 3 possible states here: + 1. Message size was partially read: random value between 1 and 3 inclusive + 2. Message size was read, body was not read at all: use 4 + 3. Message size was read, body read partially: random value between 5 and 100 inclusive + - `delayMS`: 400 ( to exceed the 200 ms timeout) + - `sendAll`: `true` +3. Assert that the operation failed with timeout error. +4. Issue another operation (e.g. another insert) and assert that it does not return an error. +5. Verify that the following sequence of events was observed: + - `ConnectionPendingResponseStarted` + - `ConnectionPendingResponseSucceeded` +6. Verify that NONE the following events was observed: + - `ConnectionPendingResponseFailed` + - `ConnectionClosed` + +#### Non-destructive Aliveness Check + +This test verifies that if a connection idles past the pending response window (3 seconds) after a partial header, the +aliveness check does not attempt to discard bytes from the TCP stream. + +1. Connect to the proxy server with `maxPoolSize=1` and `direct=true`, subscribing to the following CMAP events: + - `PendingResponseStarted` + - `PendingResponseSucceeded` + - `PendingResponseFailed` + - `ConnectionClosed` +2. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: + - `sendBytes`: any value between 1 and 3 + - `delayMS`: 400 ( to exceed the 200 ms timeout) + - `sendAll`: `true` +3. Assert that the operation failed with timeout error. +4. Sleep for 3 seconds +5. Issue another operation (e.g. another insert) and assert that it does not return an error. +6. Verify that the following sequence of events was observed: + - `ConnectionPendingResponseStarted` + - `ConnectionPendingResponseFailed` + - `ConnectionPendingResponseStarted` + - `ConnectionPendingResponseSucceeded` +7. Verify that NONE the following events was observed: + - `ConnectionClosed` + +#### Exhaust Cursors + +Drivers that support the `exhaustAllowed` `OP_MSG` bit flag must ensure that responses which contain `moreToCome` will +not result in a connection being put into a "pending response" state. Drivers that don't support this behavior can skip +this prose test. + +1. Configure a failpoint to block `getMore` for 500ms. +2. Insert > 2 records into the collection. +3. Create an exhaust cursor using `find` and iterate one `getMore` using `batchSize=1`. +4. Call a subsequent `getMore` on the exhaust cursor with a client-side timeout of 100ms. +5. Ensure that the `ConnectionClosed` event is emitted due to timeout. + ## Logging Tests Tests for connection pool logging can be found in the `/logging` subdirectory and are written in the diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json new file mode 100644 index 0000000000..7992911e85 --- /dev/null +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json @@ -0,0 +1,578 @@ +{ + "description": "connection-logging", + "schemaVersion": "1.24", + "runOnRequirements": [ + { + "topologies": [ + "single" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "failPointClient" + } + } + ], + "tests": [ + { + "description": "force a pending response read, fail first try, succeed second try", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "uriOptions": { + "retryReads": false, + "appname": "clientAppName", + "maxPoolSize": 1, + "heartbeatFrequencyMS": 10000 + }, + "observeLogMessages": { + "connection": "debug" + } + } + }, + { + "database": { + "id": "test", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "coll", + "database": "test", + "collectionName": "coll" + } + } + ] + } + }, + { + "name": "runCommand", + "object": "test", + "arguments": { + "command": { + "ping": 1 + }, + "commandName": "ping" + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 200 + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + }, + "timeoutMS": 50 + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "wait", + "object": "testRunner", + "arguments": { + "ms": 150 + } + }, + { + "name": "find", + "object": "coll", + "arguments": { + "filter": { + "_id": 1 + } + } + } + ], + "expectLogMessages": [ + { + "client": "client", + "ignoreExtraMessages": true, + "messages": [ + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection pool created", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection pool ready", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection created", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection ready", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked out", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked in", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked out", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked in", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response failed", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + }, + "reason": "timeout" + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout failed", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checkout started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response started", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Pending response succeeded", + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "requestId": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked out", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + }, + "durationMS": { + "$$type": [ + "double", + "int", + "long" + ] + } + } + }, + { + "level": "debug", + "component": "connection", + "data": { + "message": "Connection checked in", + "driverConnectionId": { + "$$type": [ + "int", + "long" + ] + }, + "serverHost": { + "$$type": "string" + }, + "serverPort": { + "$$type": [ + "int", + "long" + ] + } + } + } + ] + } + ] + } + ] +} diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.yml b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.yml new file mode 100644 index 0000000000..9153d35519 --- /dev/null +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.yml @@ -0,0 +1,254 @@ +description: "connection-logging" + +schemaVersion: "1.24" + +runOnRequirements: + - topologies: + - single # The number of log messages is different for each topology since there is a connection pool per host. + +createEntities: + - client: + id: &failPointClient failPointClient + +tests: + - description: "force a pending response read, fail first try, succeed second try" + operations: + - name: createEntities + object: testRunner + arguments: + entities: + - client: + id: &client client + uriOptions: + retryReads: false + appname: &clientAppName clientAppName + maxPoolSize: 1 + # use a high heartbeatFrequencyMS to avoid a successful monitor check marking the pool as + # ready (and emitting another event) during the course of test execution. + heartbeatFrequencyMS: 10000 + observeLogMessages: + connection: debug + - database: + id: &database test + client: *client + databaseName: *database + - collection: + id: &collection coll + database: *database + collectionName: *collection + + # Run a ping command to pre-load the pool with a connection. + - name: runCommand + object: *database + arguments: + command: + ping: 1 + commandName: ping + + # Create a failpoint to block the first operation + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: { times: 1 } + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 300 + + # Execute operation with timeout less than block time + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: { _id: 3, x: 1 } + expectError: + isTimeoutError: true + + # Execute a subsequent operation which should time out during the + # pending response read attempt. + - name: find + object: *collection + arguments: + filter: { _id: 1 } + timeoutMS: 75 + expectError: + isTimeoutError: true + + # Wait for the blocked connection to free up. + - name: wait + # operation. + object: testRunner + arguments: + ms: 200 + + # Execute a subsequent operation which should completed the pending read. + - name: find + object: *collection + arguments: + filter: { _id: 1 } + + expectLogMessages: + - client: *client + ignoreExtraMessages: true + messages: + - level: debug + component: connection + data: + message: "Connection pool created" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection pool ready" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + # Ping + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection created" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection ready" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked out" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked in" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + # Insert + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Connection checked out" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked in" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + # First Find + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response failed" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + reason: "timeout" + + - level: debug + component: connection + data: + message: "Connection checkout failed" + serverHost: { $$type: string } + serverPort: { $$type: [ int, long ] } + + # Second Find + - level: debug + component: connection + data: + message: "Connection checkout started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response started" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + + - level: debug + component: connection + data: + message: "Pending response succeeded" + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + driverConnectionId: { $$type: [int, long] } + requestId: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked out" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } + durationMS: { $$type: [double, int, long] } + + - level: debug + component: connection + data: + message: "Connection checked in" + driverConnectionId: { $$type: [int, long] } + serverHost: { $$type: string } + serverPort: { $$type: [int, long] } diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml index 49868a062e..5a697edcdf 100644 --- a/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging.yml @@ -222,3 +222,4 @@ tests: reason: "An error occurred while trying to establish a new connection" error: { $$exists: true } durationMS: { $$type: [double, int, long] } + diff --git a/source/unified-test-format/schema-1.28.json b/source/unified-test-format/schema-1.28.json new file mode 100644 index 0000000000..c678104db8 --- /dev/null +++ b/source/unified-test-format/schema-1.28.json @@ -0,0 +1,1264 @@ +{ + "$schema": "https://json-schema.org/draft/2019-09/schema#", + "title": "Unified Test Format", + "type": "object", + "additionalProperties": false, + "required": [ + "description", + "schemaVersion", + "tests" + ], + "properties": { + "description": { + "type": "string" + }, + "schemaVersion": { + "$ref": "#/definitions/version" + }, + "runOnRequirements": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/runOnRequirement" + } + }, + "createEntities": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/entity" + } + }, + "initialData": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/collectionData" + } + }, + "tests": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/test" + } + }, + "_yamlAnchors": { + "type": "object", + "additionalProperties": true + } + }, + "definitions": { + "version": { + "type": "string", + "pattern": "^[0-9]+(\\.[0-9]+){1,2}$" + }, + "runOnRequirement": { + "type": "object", + "additionalProperties": false, + "minProperties": 1, + "properties": { + "maxServerVersion": { + "$ref": "#/definitions/version" + }, + "minServerVersion": { + "$ref": "#/definitions/version" + }, + "topologies": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "single", + "replicaset", + "sharded", + "sharded-replicaset", + "load-balanced" + ] + } + }, + "serverless": { + "type": "string", + "enum": [ + "require", + "forbid", + "allow" + ] + }, + "serverParameters": { + "type": "object", + "minProperties": 1 + }, + "auth": { + "type": "boolean" + }, + "authMechanism": { + "type": "string" + }, + "csfle": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "object", + "additionalProperties": false, + "minProperties": 1, + "properties": { + "minLibmongocryptVersion": { + "$ref": "#/definitions/version" + } + } + } + ] + } + } + }, + "entity": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "client": { + "type": "object", + "additionalProperties": false, + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string" + }, + "uriOptions": { + "type": "object" + }, + "useMultipleMongoses": { + "type": "boolean" + }, + "observeEvents": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent", + "poolCreatedEvent", + "poolReadyEvent", + "poolClearedEvent", + "poolClosedEvent", + "connectionCreatedEvent", + "connectionReadyEvent", + "connectionClosedEvent", + "connectionCheckOutStartedEvent", + "connectionCheckOutFailedEvent", + "connectionCheckedOutEvent", + "connectionCheckedInEvent", + "connectionPendingResponseSucceeded", + "connectionPendingResponseStarted", + "connectionPendingResponseFailed", + "serverDescriptionChangedEvent", + "topologyDescriptionChangedEvent", + "topologyOpeningEvent", + "topologyClosedEvent" + ] + } + }, + "ignoreCommandMonitoringEvents": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "storeEventsAsEntities": { + "deprecated": true, + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/storeEventsAsEntity" + } + }, + "observeLogMessages": { + "type": "object", + "minProperties": 1, + "additionalProperties": false, + "properties": { + "command": { + "$ref": "#/definitions/logSeverityLevel" + }, + "topology": { + "$ref": "#/definitions/logSeverityLevel" + }, + "serverSelection": { + "$ref": "#/definitions/logSeverityLevel" + }, + "connection": { + "$ref": "#/definitions/logSeverityLevel" + } + } + }, + "observeTracingMessages": { + "type": "object", + "additionalProperties": false, + "properties": { + "enableCommandPayload": { + "type": "boolean" + } + } + }, + "serverApi": { + "$ref": "#/definitions/serverApi" + }, + "observeSensitiveCommands": { + "type": "boolean" + }, + "autoEncryptOpts": { + "type": "object", + "additionalProperties": false, + "required": [ + "keyVaultNamespace", + "kmsProviders" + ], + "properties": { + "keyVaultNamespace": { + "type": "string" + }, + "bypassAutoEncryption": { + "type": "boolean" + }, + "kmsProviders": { + "$ref": "#/definitions/kmsProviders" + }, + "schemaMap": { + "type": "object", + "additionalProperties": { + "type": "object" + } + }, + "extraOptions": { + "type": "object" + }, + "encryptedFieldsMap": { + "type": "object", + "additionalProperties": { + "type": "object" + } + }, + "bypassQueryAnalysis": { + "type": "boolean" + }, + "keyExpirationMS": { + "type": "integer" + } + } + }, + "awaitMinPoolSizeMS": { + "type": "integer" + } + } + }, + "clientEncryption": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "clientEncryptionOpts" + ], + "properties": { + "id": { + "type": "string" + }, + "clientEncryptionOpts": { + "$ref": "#/definitions/clientEncryptionOpts" + } + } + }, + "database": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "client", + "databaseName" + ], + "properties": { + "id": { + "type": "string" + }, + "client": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "databaseOptions": { + "$ref": "#/definitions/collectionOrDatabaseOptions" + } + } + }, + "collection": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "database", + "collectionName" + ], + "properties": { + "id": { + "type": "string" + }, + "database": { + "type": "string" + }, + "collectionName": { + "type": "string" + }, + "collectionOptions": { + "$ref": "#/definitions/collectionOrDatabaseOptions" + } + } + }, + "session": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "client" + ], + "properties": { + "id": { + "type": "string" + }, + "client": { + "type": "string" + }, + "sessionOptions": { + "type": "object" + } + } + }, + "bucket": { + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "database" + ], + "properties": { + "id": { + "type": "string" + }, + "database": { + "type": "string" + }, + "bucketOptions": { + "type": "object" + } + } + }, + "thread": { + "type": "object", + "additionalProperties": false, + "required": [ + "id" + ], + "properties": { + "id": { + "type": "string" + } + } + } + } + }, + "logComponent": { + "type": "string", + "enum": [ + "command", + "topology", + "serverSelection", + "connection" + ] + }, + "spanComponent": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "attributes" + ], + "properties": { + "name": { + "type": "string" + }, + "attributes": { + "type": "object" + }, + "nested": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/spanComponent" + } + } + } + }, + "logSeverityLevel": { + "type": "string", + "enum": [ + "emergency", + "alert", + "critical", + "error", + "warning", + "notice", + "info", + "debug", + "trace" + ] + }, + "clientEncryptionOpts": { + "type": "object", + "additionalProperties": false, + "required": [ + "keyVaultClient", + "keyVaultNamespace", + "kmsProviders" + ], + "properties": { + "keyVaultClient": { + "type": "string" + }, + "keyVaultNamespace": { + "type": "string" + }, + "kmsProviders": { + "$ref": "#/definitions/kmsProviders" + }, + "keyExpirationMS": { + "type": "integer" + } + } + }, + "kmsProviders": { + "$defs": { + "stringOrPlaceholder": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "object", + "additionalProperties": false, + "required": [ + "$$placeholder" + ], + "properties": { + "$$placeholder": {} + } + } + ] + } + }, + "type": "object", + "additionalProperties": false, + "patternProperties": { + "^aws(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "accessKeyId": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "secretAccessKey": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "sessionToken": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^azure(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "tenantId": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "clientId": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "clientSecret": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "identityPlatformEndpoint": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^gcp(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "email": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "privateKey": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + }, + "endpoint": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^kmip(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "endpoint": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + }, + "^local(:[a-zA-Z0-9_]+)?$": { + "type": "object", + "additionalProperties": false, + "properties": { + "key": { + "$ref": "#/definitions/kmsProviders/$defs/stringOrPlaceholder" + } + } + } + } + }, + "storeEventsAsEntity": { + "deprecated": true, + "type": "object", + "additionalProperties": false, + "required": [ + "id", + "events" + ], + "properties": { + "id": { + "type": "string" + }, + "events": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "PoolCreatedEvent", + "PoolReadyEvent", + "PoolClearedEvent", + "PoolClosedEvent", + "ConnectionCreatedEvent", + "ConnectionReadyEvent", + "ConnectionClosedEvent", + "ConnectionCheckOutStartedEvent", + "ConnectionCheckOutFailedEvent", + "ConnectionCheckedOutEvent", + "ConnectionCheckedInEvent", + "CommandStartedEvent", + "CommandSucceededEvent", + "CommandFailedEvent", + "ServerDescriptionChangedEvent", + "TopologyDescriptionChangedEvent" + ] + } + } + } + }, + "collectionData": { + "type": "object", + "additionalProperties": false, + "required": [ + "collectionName", + "databaseName", + "documents" + ], + "properties": { + "collectionName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "createOptions": { + "type": "object", + "properties": { + "writeConcern": false + } + }, + "documents": { + "type": "array", + "items": { + "type": "object" + } + } + } + }, + "expectedEventsForClient": { + "type": "object", + "additionalProperties": false, + "required": [ + "client", + "events" + ], + "properties": { + "client": { + "type": "string" + }, + "eventType": { + "type": "string", + "enum": [ + "command", + "cmap", + "sdam" + ] + }, + "events": { + "type": "array" + }, + "ignoreExtraEvents": { + "type": "boolean" + } + }, + "oneOf": [ + { + "required": [ + "eventType" + ], + "properties": { + "eventType": { + "const": "command" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedCommandEvent" + } + } + } + }, + { + "required": [ + "eventType" + ], + "properties": { + "eventType": { + "const": "cmap" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedCmapEvent" + } + } + } + }, + { + "required": [ + "eventType" + ], + "properties": { + "eventType": { + "const": "sdam" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedSdamEvent" + } + } + } + }, + { + "additionalProperties": false, + "properties": { + "client": { + "type": "string" + }, + "events": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedCommandEvent" + } + }, + "ignoreExtraEvents": { + "type": "boolean" + } + } + } + ] + }, + "expectedCommandEvent": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "commandStartedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "command": { + "type": "object" + }, + "commandName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "hasServiceId": { + "type": "boolean" + }, + "hasServerConnectionId": { + "type": "boolean" + } + } + }, + "commandSucceededEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "reply": { + "type": "object" + }, + "commandName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "hasServiceId": { + "type": "boolean" + }, + "hasServerConnectionId": { + "type": "boolean" + } + } + }, + "commandFailedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "commandName": { + "type": "string" + }, + "databaseName": { + "type": "string" + }, + "hasServiceId": { + "type": "boolean" + }, + "hasServerConnectionId": { + "type": "boolean" + } + } + } + } + }, + "expectedCmapEvent": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "poolCreatedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "poolReadyEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "poolClearedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "hasServiceId": { + "type": "boolean" + }, + "interruptInUseConnections": { + "type": "boolean" + } + } + }, + "poolClosedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionCreatedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionReadyEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionClosedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } + }, + "connectionCheckOutStartedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionCheckOutFailedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } + }, + "connectionCheckedOutEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionCheckedInEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseStarted": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseSucceeded": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseFailed": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } + } + } + }, + "expectedSdamEvent": { + "type": "object", + "additionalProperties": false, + "maxProperties": 1, + "minProperties": 1, + "properties": { + "serverDescriptionChangedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "previousDescription": { + "$ref": "#/definitions/serverDescription" + }, + "newDescription": { + "$ref": "#/definitions/serverDescription" + } + } + }, + "topologyDescriptionChangedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "previousDescription": { + "$ref": "#/definitions/topologyDescription" + }, + "newDescription": { + "$ref": "#/definitions/topologyDescription" + } + } + }, + "serverHeartbeatStartedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "awaited": { + "type": "boolean" + } + } + }, + "serverHeartbeatSucceededEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "awaited": { + "type": "boolean" + } + } + }, + "serverHeartbeatFailedEvent": { + "type": "object", + "additionalProperties": false, + "properties": { + "awaited": { + "type": "boolean" + } + } + }, + "topologyOpeningEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "topologyClosedEvent": { + "type": "object", + "additionalProperties": false, + "properties": {} + } + } + }, + "serverDescription": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "enum": [ + "Standalone", + "Mongos", + "PossiblePrimary", + "RSPrimary", + "RSSecondary", + "RSOther", + "RSArbiter", + "RSGhost", + "LoadBalancer", + "Unknown" + ] + } + } + }, + "topologyDescription": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string", + "enum": [ + "Single", + "Unknown", + "ReplicaSetNoPrimary", + "ReplicaSetWithPrimary", + "Sharded", + "LoadBalanced" + ] + } + } + }, + "expectedLogMessagesForClient": { + "type": "object", + "additionalProperties": false, + "required": [ + "client", + "messages" + ], + "properties": { + "client": { + "type": "string" + }, + "messages": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedLogMessage" + } + }, + "ignoreExtraMessages": { + "type": "boolean" + }, + "ignoreMessages": { + "type": "array", + "items": { + "$ref": "#/definitions/expectedLogMessage" + } + } + } + }, + "expectTracingMessagesForClient": { + "additionalProperties": false, + "type": "object", + "required": [ + "client", + "spans" + ], + "properties": { + "client": { + "type": "string" + }, + "ignoreExtraSpans": { + "type": "boolean" + }, + "spans": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/spanComponent" + } + } + } + }, + "expectedLogMessage": { + "type": "object", + "additionalProperties": false, + "required": [ + "level", + "component", + "data" + ], + "properties": { + "level": { + "$ref": "#/definitions/logSeverityLevel" + }, + "component": { + "$ref": "#/definitions/logComponent" + }, + "data": { + "type": "object" + }, + "failureIsRedacted": { + "type": "boolean" + } + } + }, + "collectionOrDatabaseOptions": { + "type": "object", + "additionalProperties": false, + "properties": { + "readConcern": { + "type": "object" + }, + "readPreference": { + "type": "object" + }, + "writeConcern": { + "type": "object" + }, + "timeoutMS": { + "type": "integer" + } + } + }, + "serverApi": { + "type": "object", + "additionalProperties": false, + "required": [ + "version" + ], + "properties": { + "version": { + "type": "string" + }, + "strict": { + "type": "boolean" + }, + "deprecationErrors": { + "type": "boolean" + } + } + }, + "operation": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "object" + ], + "properties": { + "name": { + "type": "string" + }, + "object": { + "type": "string" + }, + "arguments": { + "type": "object" + }, + "ignoreResultAndError": { + "type": "boolean" + }, + "expectError": { + "$ref": "#/definitions/expectedError" + }, + "expectResult": {}, + "saveResultAsEntity": { + "type": "string" + } + }, + "allOf": [ + { + "not": { + "required": [ + "expectError", + "expectResult" + ] + } + }, + { + "not": { + "required": [ + "expectError", + "saveResultAsEntity" + ] + } + }, + { + "not": { + "required": [ + "ignoreResultAndError", + "expectResult" + ] + } + }, + { + "not": { + "required": [ + "ignoreResultAndError", + "expectError" + ] + } + }, + { + "not": { + "required": [ + "ignoreResultAndError", + "saveResultAsEntity" + ] + } + } + ] + }, + "expectedError": { + "type": "object", + "additionalProperties": false, + "minProperties": 1, + "properties": { + "isError": { + "type": "boolean", + "const": true + }, + "isClientError": { + "type": "boolean" + }, + "isTimeoutError": { + "type": "boolean" + }, + "errorContains": { + "type": "string" + }, + "errorCode": { + "type": "integer" + }, + "errorCodeName": { + "type": "string" + }, + "errorLabelsContain": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "errorLabelsOmit": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + } + }, + "writeErrors": { + "type": "object" + }, + "writeConcernErrors": { + "type": "array", + "items": { + "type": "object" + } + }, + "errorResponse": { + "type": "object" + }, + "expectResult": {} + } + }, + "test": { + "type": "object", + "additionalProperties": false, + "required": [ + "description", + "operations" + ], + "properties": { + "description": { + "type": "string" + }, + "runOnRequirements": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/runOnRequirement" + } + }, + "skipReason": { + "type": "string" + }, + "operations": { + "type": "array", + "items": { + "$ref": "#/definitions/operation" + } + }, + "expectEvents": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/expectedEventsForClient" + } + }, + "expectLogMessages": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/expectedLogMessagesForClient" + } + }, + "expectTracingMessages": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/expectTracingMessagesForClient" + } + }, + "outcome": { + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/collectionData" + } + } + } + } + } +} diff --git a/source/unified-test-format/schema-latest.json b/source/unified-test-format/schema-latest.json index d195ba5af0..c678104db8 100644 --- a/source/unified-test-format/schema-latest.json +++ b/source/unified-test-format/schema-latest.json @@ -157,6 +157,9 @@ "connectionCheckOutFailedEvent", "connectionCheckedOutEvent", "connectionCheckedInEvent", + "connectionPendingResponseSucceeded", + "connectionPendingResponseStarted", + "connectionPendingResponseFailed", "serverDescriptionChangedEvent", "topologyDescriptionChangedEvent", "topologyOpeningEvent", @@ -837,6 +840,25 @@ "type": "object", "additionalProperties": false, "properties": {} + }, + "connectionPendingResponseStarted": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseSucceeded": { + "type": "object", + "additionalProperties": false, + "properties": {} + }, + "connectionPendingResponseFailed": { + "type": "object", + "additionalProperties": false, + "properties": { + "reason": { + "type": "string" + } + } } } }, diff --git a/source/unified-test-format/unified-test-format.md b/source/unified-test-format/unified-test-format.md index 59700ff510..0032aa9df3 100644 --- a/source/unified-test-format/unified-test-format.md +++ b/source/unified-test-format/unified-test-format.md @@ -2,7 +2,7 @@ - Status: Accepted - Minimum Server Version: N/A -- Current Schema Version: 1.27.0 +- Current Schema Version: 1.28.0 ______________________________________________________________________ @@ -486,6 +486,9 @@ The structure of this object is as follows: - [connectionCheckOutFailedEvent](#expectedEvent_connectionCheckOutFailedEvent) - [connectionCheckedOutEvent](#expectedEvent_connectionCheckedOutEvent) - [connectionCheckedInEvent](#expectedEvent_connectionCheckedInEvent) + - [connectionPendingResponseStarted](#expectedEvent_connectionPendingResponseStarted) + - [connectionPendingResponseSucceeded](#expectedEvent_connectionPendingResponseSucceeded) + - [connectionPendingResponseFailed](#expectedEvent_connectionPendingResponseFailed) - [serverDescriptionChangedEvent](#expectedEvent_serverDescriptionChangedEvent) - [serverHeartbeatStartedEvent](#expectedEvent_serverHeartbeatStartedEvent) - [serverHeartbeatSucceededEvent](#expectedEvent_serverHeartbeatSucceededEvent) @@ -1133,6 +1136,27 @@ The structure of this object is as follows: - `connectionCheckedInEvent`: Optional object. If present, this object MUST be an empty document as all fields in this event are non-deterministic. + + +- `connectionPendingResponseStarted`: Optional object. If present, this object MUST be an empty document as all fields + in this event are non-deterministic. + + + +- `connectionPendingResponseSucceeded`: Optional object. If present, this object MUST be an empty document as all fields + in this event are non-deterministic. + + + +- `connectionPendingResponseFailed`: Optional object. Assertions for one or more + [connectionPendingResponseFailed](../connection-monitoring-and-pooling/connection-monitoring-and-pooling.md#events) + fields. + + The structure of this object is as follows: + + - `reason`: Optional string. Test runners MUST assert that the reason in the published event matches this value. Valid + values for this field are defined in the CMAP spec. + ##### expectedSdamEvent The structure of this object is as follows: @@ -3459,6 +3483,11 @@ other specs *and* collating spec changes developed in parallel or during the sam ## Changelog +- 2025-09-25: **Schema version 1.28**. + + Add draining of pending response events to `expectEvents` and `observeEvents` for client entities. These include + `PendingResponseStartedEvent`, `PendingResponseSucceededEvent`, and `PendingResponseFailedEvent`. + - 2025-09-17: **Schema version 1.27.** Add `observeTracingMessages` configuration for clients and `expectTracingMessages` for test expectations. This allows From 3e1f8e8f827de1d0340d3af27b2cc610edd62d0a Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Thu, 25 Sep 2025 17:39:07 -0700 Subject: [PATCH 02/14] pr --- .../connection-monitoring-and-pooling.md | 57 +++++++++---------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index c631275494..04dd3ea95a 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -1095,8 +1095,7 @@ interface ConnectionCheckedInEvent { } /** - * Emitted when the connection being checked out is attempting to read and - * discard a pending server response. + * Emitted when the connection is attempting to drain of the pending response. */ interface PendingResponseStarted { /** @@ -1110,14 +1109,13 @@ interface PendingResponseStarted { connectionId: int64; /** - * The driver-generated request ID associated with the network timeout. + * The driver-generated request ID of the operation that caused the pending response state. */ requestId: int64; } /** - * Emitted when the connection successfully read the pending read and is ready - * to be checked out. + * Emitted when the connection successfully drained of the pending response. */ interface PendingResponseSucceeded { /** @@ -1131,7 +1129,7 @@ interface PendingResponseSucceeded { connectionId: int64; /** - * The time it took to complete the pending read. Measured from when the + * The time it took to complete the pending response drainig. Measured from when the * `PendingResponseStarted` event is emitted as part of the connection * checkout. * @@ -1142,14 +1140,13 @@ interface PendingResponseSucceeded { duration: Duration; /** - * The driver-generated request ID associated with the network timeout. + * The driver-generated request ID of the operation that caused the pending response state. */ requestId: int64; } /** - * Emitted when the connection being checked out failed to complete the pending - * read. + * Emitted when the connection failed to drain of the pending response. */ interface PendingResponseFailed { /** @@ -1163,13 +1160,13 @@ interface PendingResponseFailed { connectionId: int64; /** - * The driver-generated request ID associated with the network timeout. + * The driver-generated request ID of the operation that caused the pending response state. */ requestId: int64; /** - * Duration until the pending response drain failed. Measured from when the - * `PendingResponseStarted` event is emitted as part of the same connection + * Duration until the pending response draining failed. Measured from when the + * `PendingResponseStarted` event is emitted as part of the connection * checkout. * * Drivers SHOULD choose an idiomatic duration type. If the type doesn't @@ -1384,11 +1381,11 @@ placeholders as appropriate: In addition to the common fields defined above, this message MUST contain the following key-value pairs: -| Key | Suggested Type | Value | -| ------------------ | -------------- | ------------------------------------------------------------------- | -| message | string | "Pending response started" | -| driverConnectionId | int64 | The driver-generated ID for the connection | -| requestId | int64 | The driver-generated request ID associated with the network timeout | +| Key | Suggested Type | Value | +| ------------------ | -------------- | ---------------------------------------------------------------------------------------- | +| message | string | "Pending response started" | +| driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID of the operation that caused the pending response state. | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: @@ -1400,12 +1397,12 @@ placeholders as appropriate: In addition to the common fields defined above, this message MUST contain the following key-value pairs: -| Key | Suggested Type | Value | -| ------------------ | ------------------ | -------------------------------------------------------------------- | -| message | string | "Pending response succeeded" | -| driverConnectionId | int64 | The driver-generated ID for the connection. | -| requestId | int64 | The driver-generated request ID associated with the network timeout. | -| durationMS | Int32/Int64/Double | `PendingResponseSucceeded.duration` converted to milliseconds. | +| Key | Suggested Type | Value | +| ------------------ | ------------------ | ---------------------------------------------------------------------------------------- | +| message | string | "Pending response succeeded" | +| driverConnectionId | int64 | The driver-generated ID for the connection. | +| requestId | int64 | The driver-generated request ID of the operation that caused the pending response state. | +| durationMS | Int32/Int64/Double | `PendingResponseSucceeded.duration` converted to milliseconds. | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: @@ -1417,13 +1414,13 @@ placeholders as appropriate: In addition to the common fields defined above, this message MUST contain the following key-value pairs: -| Key | Suggested Type | Value | -| ------------------ | ------------------ | ------------------------------------------------------------------- | -| message | string | "Pending response failed" | -| driverConnectionId | int64 | The driver-generated ID for the connection | -| requestId | int64 | The driver-generated request ID associated with the network timeout | -| reason | string | The reason for why the pending response read failed | -| durationMS | Int32/Int64/Double | `PendingResponseFailed.duration` converted to milliseconds. | +| Key | Suggested Type | Value | +| ------------------ | ------------------ | ---------------------------------------------------------------------------------------- | +| message | string | "Pending response failed" | +| driverConnectionId | int64 | The driver-generated ID for the connection | +| requestId | int64 | The driver-generated request ID of the operation that caused the pending response state. | +| reason | string | The reason for why the pending response read failed | +| durationMS | Int32/Int64/Double | `PendingResponseFailed.duration` converted to milliseconds. | The unstructured form SHOULD be as follows, using the values defined in the structured format above to fill in placeholders as appropriate: From 250c1231993e415084954189cfc41e3c78d83bd4 Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Thu, 25 Sep 2025 20:09:39 -0700 Subject: [PATCH 03/14] Fix unified spec tests --- .../tests/pending-response.json | 2 +- .../tests/pending-response.yml | 2 +- .../tests/logging/connection-logging-csot.json | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index 278368da57..8a015344f3 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -1,6 +1,6 @@ { "description": "Connection churn is prevented by reading pending responses during connection checkout", - "schemaVersion": "1.27", + "schemaVersion": "1.28", "runOnRequirements": [ { "minServerVersion": "4.4", diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index 4cc2f613fb..b8e8679fa9 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -1,5 +1,5 @@ description: "Connection churn is prevented by reading pending responses during connection checkout" -schemaVersion: "1.27" +schemaVersion: "1.28" runOnRequirements: - minServerVersion: "4.4" # TODO(SERVER-96344): When using failpoints, mongos returns MaxTimeMSExpired diff --git a/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json index 7992911e85..f39aaf9b76 100644 --- a/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json +++ b/source/connection-monitoring-and-pooling/tests/logging/connection-logging-csot.json @@ -80,7 +80,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 200 + "blockTimeMS": 300 } } } @@ -106,7 +106,7 @@ "filter": { "_id": 1 }, - "timeoutMS": 50 + "timeoutMS": 75 }, "expectError": { "isTimeoutError": true @@ -116,7 +116,7 @@ "name": "wait", "object": "testRunner", "arguments": { - "ms": 150 + "ms": 200 } }, { From 1fcc0997b1296979c81aaae4536d276209360e6f Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov <31327136+sanych-sun@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:29:48 -0700 Subject: [PATCH 04/14] Update source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md Co-authored-by: Bailey Pearson --- .../connection-monitoring-and-pooling.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 04dd3ea95a..82b87447e6 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -602,8 +602,7 @@ response data is drained and the connection is ready to send the next command. of (a) the remaining time before the 3 second "pending response" window elapses and (b) the remaining of user-provided timeout as the effective timeout for the draining. 4. **Default timeout**: If no user-provided timeout is specified, the driver MUST use the minimum of (a) the remaining 3 - second "pending response" window and (b) the `waitQueueTimeoutMS` (if not supported by the driver remaining of - `serverSelectionTimeoutMS` should be used instead) as the effective timeout for the draining. + second "pending response" window and (b) remaining timeout for connection checkout. 5. **On Timeout**: if timeout occurred while draining the pending response and the 3 seconds pending-response window was not elapsed, Driver SHOULD return the connection to the pool, so the next operation can continue draining of the pending response. From d97a995944d4147c083e991f316f9ba635c9d3ba Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov <31327136+sanych-sun@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:33:01 -0700 Subject: [PATCH 05/14] Update source/client-side-operations-timeout/tests/pending-response.yml Co-authored-by: Bailey Pearson --- .../client-side-operations-timeout/tests/pending-response.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index b8e8679fa9..a649e2db19 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -294,7 +294,7 @@ tests: - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Find finishes. # If the connection is closed server-side while draining the response, the - # driver must retry with a different connection. + # driver must retry with a different connection (write operation) - description: "write op retries when connection closes server-side while draining response" operations: # Create a failpoint to block the first and second operation. From f442ccec4b6a439f4a5892e959a248408d5f0472 Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Mon, 6 Oct 2025 14:26:56 -0700 Subject: [PATCH 06/14] PR --- .../tests/pending-response.json | 104 +++++++++++++++--- .../tests/pending-response.yml | 70 ++++++++---- .../connection-monitoring-and-pooling.md | 19 +++- source/unified-test-format/schema-1.28.json | 12 +- source/unified-test-format/schema-latest.json | 12 +- .../unified-test-format.md | 24 ++-- 6 files changed, 174 insertions(+), 67 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index 8a015344f3..7c6c969ffb 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -29,12 +29,16 @@ "observeEvents": [ "commandFailedEvent", "commandSucceededEvent", + "connectionCheckOutStartedEvent", "connectionCheckedOutEvent", + "ConnectionCheckOutFailedEvent", "connectionCheckedInEvent", + "connectionCreatedEvent", + "connectionReadyEvent", "connectionClosedEvent", - "connectionPendingResponseSucceeded", - "connectionPendingResponseStarted", - "connectionPendingResponseFailed" + "connectionPendingResponseSucceededEvent", + "connectionPendingResponseStartedEvent", + "connectionPendingResponseFailedEvent" ] } }, @@ -128,6 +132,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -135,10 +142,13 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} + }, + { + "connectionPendingResponseStartedEvent": {} }, { - "connectionPendingResponseSucceeded": {} + "connectionPendingResponseSucceededEvent": {} }, { "connectionCheckedOutEvent": {} @@ -217,6 +227,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -224,10 +237,13 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} }, { - "connectionPendingResponseFailed": { + "connectionPendingResponseStartedEvent": {} + }, + { + "connectionPendingResponseFailedEvent": { "reason": "timeout" } }, @@ -236,6 +252,18 @@ "reason": "error" } }, + { + "connectionCheckOutFailedEvent": {} + }, + { + "connectionCheckOutStartedEvent": {} + }, + { + "connectionCreatedEvent": {} + }, + { + "connectionReadyEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -314,6 +342,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -321,10 +352,13 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} + }, + { + "connectionPendingResponseStartedEvent": {} }, { - "connectionPendingResponseSucceeded": {} + "connectionPendingResponseSucceededEvent": {} }, { "connectionCheckedOutEvent": {} @@ -416,6 +450,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -423,18 +460,27 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} }, { - "connectionPendingResponseFailed": { + "connectionPendingResponseStartedEvent": {} + }, + { + "connectionPendingResponseFailedEvent": { "reason": "timeout" } }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutFailedEvent": {} + }, + { + "connectionCheckOutStartedEvent": {} + }, + { + "connectionPendingResponseStartedEvent": {} }, { - "connectionPendingResponseSucceeded": {} + "connectionPendingResponseSucceededEvent": {} }, { "connectionCheckedOutEvent": {} @@ -522,6 +568,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -529,10 +578,13 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} }, { - "connectionPendingResponseFailed": { + "connectionPendingResponseStartedEvent": {} + }, + { + "connectionPendingResponseFailedEvent": { "reason": "error" } }, @@ -541,6 +593,12 @@ "reason": "error" } }, + { + "connectionCreatedEvent": {} + }, + { + "connectionReadyEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -628,6 +686,9 @@ "client": "client", "eventType": "cmap", "events": [ + { + "connectionCheckOutStartedEvent": {} + }, { "connectionCheckedOutEvent": {} }, @@ -635,10 +696,13 @@ "connectionCheckedInEvent": {} }, { - "connectionPendingResponseStarted": {} + "connectionCheckOutStartedEvent": {} }, { - "connectionPendingResponseFailed": { + "connectionPendingResponseStartedEvent": {} + }, + { + "connectionPendingResponseFailedEvent": { "reason": "error" } }, @@ -647,6 +711,12 @@ "reason": "error" } }, + { + "connectionCreatedEvent": {} + }, + { + "connectionReadyEvent": {} + }, { "connectionCheckedOutEvent": {} }, diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index a649e2db19..cfd3a70eb3 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -21,12 +21,16 @@ createEntities: observeEvents: - commandFailedEvent - commandSucceededEvent + - connectionCheckOutStartedEvent - connectionCheckedOutEvent + - ConnectionCheckOutFailedEvent - connectionCheckedInEvent + - connectionCreatedEvent + - connectionReadyEvent - connectionClosedEvent - - connectionPendingResponseSucceeded - - connectionPendingResponseStarted - - connectionPendingResponseFailed + - connectionPendingResponseSucceededEvent + - connectionPendingResponseStartedEvent + - connectionPendingResponseFailedEvent - database: id: &database test client: *client @@ -80,10 +84,12 @@ tests: - client: *client eventType: cmap events: + - connectionCheckOutStartedEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} - - connectionPendingResponseSucceeded: {} # Find operation drains connection. + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} + - connectionPendingResponseSucceededEvent: {} # Find operation drains connection. - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Find succeeds. # Attempting a pending response read on a non-timeout operation that gets no @@ -127,14 +133,20 @@ tests: - client: *client eventType: cmap events: + - connectionCheckOutStartedEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} - - connectionPendingResponseFailed: + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} + - connectionPendingResponseFailedEvent: reason: timeout - connectionClosedEvent: reason: error - - connectionCheckedOutEvent: {} # Find's retry starts and succeeds + - connectionCheckOutFailedEvent: {} + - connectionCheckOutStartedEvent: {} # Find's retry starts and succeeds + - connectionCreatedEvent: {} + - connectionReadyEvent: {} + - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Attempting a pending response read on a realistic timeout operation that can # immediately read from the TCP buffer should complete the pending read and @@ -177,10 +189,12 @@ tests: - client: *client eventType: cmap events: + - connectionCheckOutStartedEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} - - connectionPendingResponseSucceeded: {} + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} + - connectionPendingResponseSucceededEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Find succeeds. # It may take multiple calls to the pending response handler to drain the @@ -230,13 +244,17 @@ tests: - client: *client eventType: cmap events: + - connectionCheckOutStartedEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} # First find fails - - connectionPendingResponseFailed: + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} # First find fails + - connectionPendingResponseFailedEvent: reason: timeout - - connectionPendingResponseStarted: {} # Second find drains the buffer. - - connectionPendingResponseSucceeded: {} + - connectionCheckOutFailedEvent: {} + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} # Second find drains the buffer. + - connectionPendingResponseSucceededEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Second find succeeds. # If the connection is closed server-side while draining the response, the @@ -284,13 +302,17 @@ tests: - client: *client eventType: cmap events: + - connectionCheckOutStartedEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} # Pending read fails on first find - - connectionPendingResponseFailed: + - connectionCheckOutStartedEvent: {} + - connectionPendingResponseStartedEvent: {} # Pending read fails on first find + - connectionPendingResponseFailedEvent: reason: error - connectionClosedEvent: reason: error + - connectionCreatedEvent: { } + - connectionReadyEvent: { } - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Find finishes. # If the connection is closed server-side while draining the response, the @@ -338,12 +360,16 @@ tests: - client: *client eventType: cmap events: - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # Insert fails. - - connectionPendingResponseStarted: {} # Pending read fails on first find - - connectionPendingResponseFailed: + - connectionCheckOutStartedEvent: { } + - connectionCheckedOutEvent: { } + - connectionCheckedInEvent: { } # Insert fails. + - connectionCheckOutStartedEvent: { } + - connectionPendingResponseStartedEvent: { } # Pending read fails on first find + - connectionPendingResponseFailedEvent: reason: error - connectionClosedEvent: reason: error - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # Find finishes. + - connectionCreatedEvent: { } + - connectionReadyEvent: { } + - connectionCheckedOutEvent: { } + - connectionCheckedInEvent: { } # Find finishes. diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 82b87447e6..3e08de65ac 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -235,7 +235,7 @@ interface Connection { * in this specification. It is not required that drivers * actually include this field in their implementations of Connection. */ - state: "pending" | "available" | "in use" | "closed"; + state: "pending" | "pending response" | "available" | "in use" | "closed"; } ``` @@ -675,17 +675,26 @@ try: close connection connection = Null if connection is in "pending response" state: + tConnectionDrainingStarted = current instant (use a monotonic clock if possible) + emit PendingResponseStartedEvent and equivalent log message drain the pending response if error: + tConnectionDrainingFailed = current instant (use a monotonic clock if possible) if non-timeout error: + emit PendingResponseFailedEvent(reason="error", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message close connection connection = Null else if timeout error and last read timestamp on connection > 3 seconds old + emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message close connection connection = Null else + emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message check in the connection connection = Null + else + tConnectionDrainingSucceeded = current instant (use a monotonic clock if possible) + emit PendingResponseSucceededEvent(duration = tConnectionDrainingSucceeded - tConnectionDrainingStarted) and equivalent log message else if totalConnectionCount < maxPoolSize: if pendingConnectionCount < maxConnecting: connection = create connection @@ -1096,7 +1105,7 @@ interface ConnectionCheckedInEvent { /** * Emitted when the connection is attempting to drain of the pending response. */ -interface PendingResponseStarted { +interface PendingResponseStartedEvent { /** * The ServerAddress of the Endpoint the pool is attempting to connect to. */ @@ -1116,7 +1125,7 @@ interface PendingResponseStarted { /** * Emitted when the connection successfully drained of the pending response. */ -interface PendingResponseSucceeded { +interface PendingResponseSucceededEvent { /** * The ServerAddress of the Endpoint the pool is attempting to connect to. */ @@ -1147,7 +1156,7 @@ interface PendingResponseSucceeded { /** * Emitted when the connection failed to drain of the pending response. */ -interface PendingResponseFailed { +interface PendingResponseFailedEvent { /** * The ServerAddress of the Endpoint the pool is attempting to connect to. */ @@ -1649,6 +1658,8 @@ to close and remove from its pool a [Connection](#connection) which has unread e ## Changelog +- 2025-09-25: Added pending response draining behavior. + - 2025-01-22: Clarify durationMS in logs may be Int32/Int64/Double. - 2024-11-27: Relaxed the WaitQueue fairness requirement. diff --git a/source/unified-test-format/schema-1.28.json b/source/unified-test-format/schema-1.28.json index c678104db8..f3525c1160 100644 --- a/source/unified-test-format/schema-1.28.json +++ b/source/unified-test-format/schema-1.28.json @@ -157,9 +157,9 @@ "connectionCheckOutFailedEvent", "connectionCheckedOutEvent", "connectionCheckedInEvent", - "connectionPendingResponseSucceeded", - "connectionPendingResponseStarted", - "connectionPendingResponseFailed", + "connectionPendingResponseSucceededEvent", + "connectionPendingResponseStartedEvent", + "connectionPendingResponseFailedEvent", "serverDescriptionChangedEvent", "topologyDescriptionChangedEvent", "topologyOpeningEvent", @@ -841,17 +841,17 @@ "additionalProperties": false, "properties": {} }, - "connectionPendingResponseStarted": { + "connectionPendingResponseStartedEvent": { "type": "object", "additionalProperties": false, "properties": {} }, - "connectionPendingResponseSucceeded": { + "connectionPendingResponseSucceededEvent": { "type": "object", "additionalProperties": false, "properties": {} }, - "connectionPendingResponseFailed": { + "connectionPendingResponseFailedEvent": { "type": "object", "additionalProperties": false, "properties": { diff --git a/source/unified-test-format/schema-latest.json b/source/unified-test-format/schema-latest.json index c678104db8..f3525c1160 100644 --- a/source/unified-test-format/schema-latest.json +++ b/source/unified-test-format/schema-latest.json @@ -157,9 +157,9 @@ "connectionCheckOutFailedEvent", "connectionCheckedOutEvent", "connectionCheckedInEvent", - "connectionPendingResponseSucceeded", - "connectionPendingResponseStarted", - "connectionPendingResponseFailed", + "connectionPendingResponseSucceededEvent", + "connectionPendingResponseStartedEvent", + "connectionPendingResponseFailedEvent", "serverDescriptionChangedEvent", "topologyDescriptionChangedEvent", "topologyOpeningEvent", @@ -841,17 +841,17 @@ "additionalProperties": false, "properties": {} }, - "connectionPendingResponseStarted": { + "connectionPendingResponseStartedEvent": { "type": "object", "additionalProperties": false, "properties": {} }, - "connectionPendingResponseSucceeded": { + "connectionPendingResponseSucceededEvent": { "type": "object", "additionalProperties": false, "properties": {} }, - "connectionPendingResponseFailed": { + "connectionPendingResponseFailedEvent": { "type": "object", "additionalProperties": false, "properties": { diff --git a/source/unified-test-format/unified-test-format.md b/source/unified-test-format/unified-test-format.md index 0032aa9df3..6893a4fe47 100644 --- a/source/unified-test-format/unified-test-format.md +++ b/source/unified-test-format/unified-test-format.md @@ -486,9 +486,9 @@ The structure of this object is as follows: - [connectionCheckOutFailedEvent](#expectedEvent_connectionCheckOutFailedEvent) - [connectionCheckedOutEvent](#expectedEvent_connectionCheckedOutEvent) - [connectionCheckedInEvent](#expectedEvent_connectionCheckedInEvent) - - [connectionPendingResponseStarted](#expectedEvent_connectionPendingResponseStarted) - - [connectionPendingResponseSucceeded](#expectedEvent_connectionPendingResponseSucceeded) - - [connectionPendingResponseFailed](#expectedEvent_connectionPendingResponseFailed) + - [connectionPendingResponseStartedEvent](#expectedEvent_connectionPendingResponseStartedEvent) + - [connectionPendingResponseSucceededEvent](#expectedEvent_connectionPendingResponseSucceededEvent) + - [connectionPendingResponseFailedEvent](#expectedEvent_connectionPendingResponseFailedEvent) - [serverDescriptionChangedEvent](#expectedEvent_serverDescriptionChangedEvent) - [serverHeartbeatStartedEvent](#expectedEvent_serverHeartbeatStartedEvent) - [serverHeartbeatSucceededEvent](#expectedEvent_serverHeartbeatSucceededEvent) @@ -1136,20 +1136,20 @@ The structure of this object is as follows: - `connectionCheckedInEvent`: Optional object. If present, this object MUST be an empty document as all fields in this event are non-deterministic. - + -- `connectionPendingResponseStarted`: Optional object. If present, this object MUST be an empty document as all fields - in this event are non-deterministic. +- `connectionPendingResponseStartedEvent`: Optional object. If present, this object MUST be an empty document as all + fields in this event are non-deterministic. - + -- `connectionPendingResponseSucceeded`: Optional object. If present, this object MUST be an empty document as all fields - in this event are non-deterministic. +- `connectionPendingResponseSucceededEvent`: Optional object. If present, this object MUST be an empty document as all + fields in this event are non-deterministic. - + -- `connectionPendingResponseFailed`: Optional object. Assertions for one or more - [connectionPendingResponseFailed](../connection-monitoring-and-pooling/connection-monitoring-and-pooling.md#events) +- `connectionPendingResponseFailedEvent`: Optional object. Assertions for one or more + [connectionPendingResponseFailedEvent](../connection-monitoring-and-pooling/connection-monitoring-and-pooling.md#events) fields. The structure of this object is as follows: From e44e04c2946ca109e4b95c73bf72eead7be9aaec Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Mon, 6 Oct 2025 15:32:19 -0700 Subject: [PATCH 07/14] Fix test --- .../client-side-operations-timeout/tests/pending-response.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index cfd3a70eb3..1d5f0a1ff4 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -23,7 +23,7 @@ createEntities: - commandSucceededEvent - connectionCheckOutStartedEvent - connectionCheckedOutEvent - - ConnectionCheckOutFailedEvent + - connectionCheckOutFailedEvent - connectionCheckedInEvent - connectionCreatedEvent - connectionReadyEvent From fca4dc0f75cc10115d7449fc139715cd13db8fec Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Mon, 6 Oct 2025 15:33:15 -0700 Subject: [PATCH 08/14] Fix test --- .../client-side-operations-timeout/tests/pending-response.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index 7c6c969ffb..c9677dd7b3 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -31,7 +31,7 @@ "commandSucceededEvent", "connectionCheckOutStartedEvent", "connectionCheckedOutEvent", - "ConnectionCheckOutFailedEvent", + "connectionCheckOutFailedEvent", "connectionCheckedInEvent", "connectionCreatedEvent", "connectionReadyEvent", From 473503fae251f0082408c764c74393d517a460ae Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Tue, 7 Oct 2025 10:16:56 -0700 Subject: [PATCH 09/14] pr --- .../tests/pending-response.json | 35 +++++-------------- .../tests/pending-response.yml | 28 +++++---------- .../connection-monitoring-and-pooling.md | 24 ++++++------- 3 files changed, 29 insertions(+), 58 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index c9677dd7b3..98b46c411b 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -252,12 +252,6 @@ "reason": "error" } }, - { - "connectionCheckOutFailedEvent": {} - }, - { - "connectionCheckOutStartedEvent": {} - }, { "connectionCreatedEvent": {} }, @@ -292,7 +286,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 250 + "blockTimeMS": 100 } } } @@ -315,7 +309,7 @@ "name": "find", "object": "coll", "arguments": { - "timeoutMS": 200, + "timeoutMS": 100, "filter": { "_id": 1 } @@ -388,7 +382,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 500 + "blockTimeMS": 200 } } } @@ -424,6 +418,7 @@ "name": "find", "object": "coll", "arguments": { + "timeoutMS": 200, "filter": { "_id": 1 } @@ -510,7 +505,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 500, + "blockTimeMS": 200, "closeConnection": true } } @@ -530,18 +525,11 @@ "isTimeoutError": true } }, - { - "name": "wait", - "object": "testRunner", - "arguments": { - "ms": 500 - } - }, { "name": "find", "object": "coll", "arguments": { - "timeoutMS": 50, + "timeoutMS": 1000, "filter": { "_id": 1 } @@ -627,7 +615,7 @@ "insert" ], "blockConnection": true, - "blockTimeMS": 500, + "blockTimeMS": 200, "closeConnection": true } } @@ -647,18 +635,11 @@ "isTimeoutError": true } }, - { - "name": "wait", - "object": "testRunner", - "arguments": { - "ms": 500 - } - }, { "name": "insertOne", "object": "coll", "arguments": { - "timeoutMS": 50, + "timeoutMS": 1000, "document": { "_id": 3, "x": 1 diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index 1d5f0a1ff4..93ccbda963 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -142,8 +142,6 @@ tests: reason: timeout - connectionClosedEvent: reason: error - - connectionCheckOutFailedEvent: {} - - connectionCheckOutStartedEvent: {} # Find's retry starts and succeeds - connectionCreatedEvent: {} - connectionReadyEvent: {} - connectionCheckedOutEvent: {} @@ -164,7 +162,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 250 + blockTimeMS: 100 # Execute operation with timeout less than block time. - name: insertOne object: *collection @@ -177,7 +175,7 @@ tests: - name: find object: *collection arguments: - timeoutMS: 200 + timeoutMS: 100 filter: {_id: 1} expectEvents: - client: *client @@ -212,7 +210,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 500 + blockTimeMS: 200 # Execute operation with timeout less than block time. - name: insertOne object: *collection @@ -233,7 +231,9 @@ tests: - name: find object: *collection arguments: + timeoutMS: 200 filter: {_id: 1} + expectEvents: - client: *client events: @@ -272,7 +272,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 500 + blockTimeMS: 200 closeConnection: true # Execute operation with timeout less than block time. - name: insertOne @@ -282,15 +282,10 @@ tests: document: {_id: 3, x: 1} expectError: isTimeoutError: true - - name: wait - object: testRunner - arguments: - ms: 500 - # Execute a subsequent operation with a timeout less than the block time. - name: find object: *collection arguments: - timeoutMS: 50 + timeoutMS: 1000 filter: {_id: 1} expectEvents: - client: *client @@ -330,7 +325,7 @@ tests: data: failCommands: ["insert"] blockConnection: true - blockTimeMS: 500 + blockTimeMS: 200 closeConnection: true # Execute operation with timeout less than block time. - name: insertOne @@ -340,15 +335,10 @@ tests: document: {_id: 3, x: 1} expectError: isTimeoutError: true - - name: wait - object: testRunner - arguments: - ms: 500 - # Execute a subsequent operation with a timeout less than the block time. - name: insertOne object: *collection arguments: - timeoutMS: 50 + timeoutMS: 1000 document: {_id: 3, x: 1} expectEvents: - client: *client diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 3e08de65ac..98a7cccac9 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -677,22 +677,22 @@ try: if connection is in "pending response" state: tConnectionDrainingStarted = current instant (use a monotonic clock if possible) emit PendingResponseStartedEvent and equivalent log message - drain the pending response - if error: + try: + drain the pending response + except timeout: tConnectionDrainingFailed = current instant (use a monotonic clock if possible) - if non-timeout error: - emit PendingResponseFailedEvent(reason="error", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message + emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and quivalent log message + if last read timestamp on connection > 3 seconds old close connection - connection = Null - else if timeout error and last read timestamp on connection > 3 seconds old - emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message - close connection - connection = Null else - emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message check in the connection - connection = Null - else + connection = Null + except: + tConnectionDrainingFailed = current instant (use a monotonic clock if possible) + emit PendingResponseFailedEvent(reason="error", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message + close connection + connection = Null + else: tConnectionDrainingSucceeded = current instant (use a monotonic clock if possible) emit PendingResponseSucceededEvent(duration = tConnectionDrainingSucceeded - tConnectionDrainingStarted) and equivalent log message else if totalConnectionCount < maxPoolSize: From 8677cbcb48bf62f55bead4513d42132a902f095e Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Tue, 7 Oct 2025 18:10:48 -0700 Subject: [PATCH 10/14] pr --- .../tests/README.md | 119 ++++++++++++------ 1 file changed, 78 insertions(+), 41 deletions(-) diff --git a/source/connection-monitoring-and-pooling/tests/README.md b/source/connection-monitoring-and-pooling/tests/README.md index fd8f0db706..61654a8568 100644 --- a/source/connection-monitoring-and-pooling/tests/README.md +++ b/source/connection-monitoring-and-pooling/tests/README.md @@ -23,59 +23,96 @@ The following tests have not yet been automated, but MUST still be tested: ### Pending Response -Due to the complexity of managing a proxy layer, the following qualifying tests should only be run for non-ssl, -non-auth, and non-compression connections. +We use mongoproxy to emulate the network delays. See +[mongoproxy/README.md](https://github.com/mongodb-labs/drivers-evergreen-tools/blob/master/.evergreen/mongoproxy/README.md) +for more details. Due to the complexity of managing a proxy layer, the following qualifying tests should only be run for +non-ssl, non-auth, and non-compression connections. #### Recover Partially Read Response This test verifies that if only part of a response was read before the timeout, the driver can drain the rest of the -response and reuse the connection for the next operation. - -1. Connect to the proxy server with `maxPoolSize=1` and `direct=true`, subscribing to the following CMAP events: - - `PendingResponseStarted` - - `PendingResponseSucceeded` - - `PendingResponseFailed` - - `ConnectionClosed` -2. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: - - `sendBytes`: We have 3 possible states here: +response and reuse the connection for the next operation. This test has 3 variation, see step 4, `sendBytes` parameter. + +1. Setup mongoproxy. +2. Connect to the proxy server with `maxPoolSize=1`, `direct=true`, disable read and write retryability, subscribing to + the following CMAP events: + - `PendingResponseStartedEvent` + - `PendingResponseSucceededEvent` + - `PendingResponseFailedEvent` + - `ConnectionCreatedEvent` + - `ConnectionClosedEvent` +3. Execute `ping` command to populate the connection pool. +4. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: + - `sendBytes`: There are 3 test cases here: 1. Message size was partially read: random value between 1 and 3 inclusive 2. Message size was read, body was not read at all: use 4 3. Message size was read, body read partially: random value between 5 and 100 inclusive - - `delayMS`: 400 ( to exceed the 200 ms timeout) - - `sendAll`: `true` -3. Assert that the operation failed with timeout error. -4. Issue another operation (e.g. another insert) and assert that it does not return an error. -5. Verify that the following sequence of events was observed: - - `ConnectionPendingResponseStarted` - - `ConnectionPendingResponseSucceeded` -6. Verify that NONE the following events was observed: - - `ConnectionPendingResponseFailed` - - `ConnectionClosed` + - `delayMs`: 400 ( to exceed the 200 ms timeout) + - `sendAll`: `true` Example of run command payload: + ``` + { + insert: "coll", + documents: [{ _id : 42 }], + proxyTest: { + actions: [ + { sendBytes : 2 }, + { delayMs : 400 }, + { sendAll : true }, + ] + } + } + ``` +5. Assert that the operation failed with timeout error. +6. Issue another operation (e.g. another insert) and assert that it does not return an error. +7. Verify that the following sequence of events was observed: + - `ConnectionCreatedEvent` + - `ConnectionPendingResponseStartedEvent` + - `ConnectionPendingResponseSucceededEvent` +8. Verify that NONE the following events was observed: + - `ConnectionPendingResponseFailedEvent` + - `ConnectionClosedEvent` #### Non-destructive Aliveness Check This test verifies that if a connection idles past the pending response window (3 seconds) after a partial header, the aliveness check does not attempt to discard bytes from the TCP stream. -1. Connect to the proxy server with `maxPoolSize=1` and `direct=true`, subscribing to the following CMAP events: - - `PendingResponseStarted` - - `PendingResponseSucceeded` - - `PendingResponseFailed` - - `ConnectionClosed` -2. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: - - `sendBytes`: any value between 1 and 3 - - `delayMS`: 400 ( to exceed the 200 ms timeout) - - `sendAll`: `true` -3. Assert that the operation failed with timeout error. -4. Sleep for 3 seconds -5. Issue another operation (e.g. another insert) and assert that it does not return an error. -6. Verify that the following sequence of events was observed: - - `ConnectionPendingResponseStarted` - - `ConnectionPendingResponseFailed` - - `ConnectionPendingResponseStarted` - - `ConnectionPendingResponseSucceeded` -7. Verify that NONE the following events was observed: - - `ConnectionClosed` +1. Setup mongoproxy. +2. Connect to the proxy server with `maxPoolSize=1`, `direct=true`, disable read and write retryability, subscribing to + the following CMAP events: + - `PendingResponseStartedEvent` + - `PendingResponseSucceededEvent` + - `PendingResponseFailedEvent` + - `ConnectionCreatedEvent` + - `ConnectionClosedEvent` +3. Execute `ping` command to populate the connection pool. +4. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: + - `sendBytes`: random value between 1 and 3 inclusive + - `delayMs`: 400 ( to exceed the 200 ms timeout) + - `sendAll`: `true` Example of run command payload: + ``` + { + insert: "coll", + documents: [{ _id : 42 }], + proxyTest: { + actions: [ + { sendBytes : 2 }, + { delayMs : 400 }, + { sendAll : true }, + ] + } + } + ``` +5. Assert that the operation failed with timeout error. +6. Sleep for 4 seconds (to pass the 3-seconds expiration window) +7. Issue another operation (e.g. another insert) and assert that it does not return an error. +8. Verify that the following sequence of events was observed: + - `ConnectionCreatedEvent` + - `ConnectionPendingResponseStartedEvent` + - `ConnectionPendingResponseSucceededEvent` +9. Verify that NONE the following events was observed: + - `ConnectionClosedEvent` + - `ConnectionPendingResponseFailedEvent` #### Exhaust Cursors @@ -87,7 +124,7 @@ this prose test. 2. Insert > 2 records into the collection. 3. Create an exhaust cursor using `find` and iterate one `getMore` using `batchSize=1`. 4. Call a subsequent `getMore` on the exhaust cursor with a client-side timeout of 100ms. -5. Ensure that the `ConnectionClosed` event is emitted due to timeout. +5. Ensure that the `ConnectionClosedEvent` event is emitted due to timeout. ## Logging Tests From d33c5c21e48d485627b6cec90b39e801ce0c008d Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Wed, 8 Oct 2025 15:24:22 -0700 Subject: [PATCH 11/14] Move closed connection tests into separate file. --- .../pending-response-close-connection.json | 180 ++++++++++++++ .../pending-response-close-connection.yml | 98 ++++++++ .../tests/pending-response.json | 221 ------------------ .../tests/pending-response.yml | 107 --------- 4 files changed, 278 insertions(+), 328 deletions(-) create mode 100644 source/client-side-operations-timeout/tests/pending-response-close-connection.json create mode 100644 source/client-side-operations-timeout/tests/pending-response-close-connection.yml diff --git a/source/client-side-operations-timeout/tests/pending-response-close-connection.json b/source/client-side-operations-timeout/tests/pending-response-close-connection.json new file mode 100644 index 0000000000..84de8306c5 --- /dev/null +++ b/source/client-side-operations-timeout/tests/pending-response-close-connection.json @@ -0,0 +1,180 @@ +{ + "description": "Connection churn is prevented by reading pending responses during connection checkout", + "schemaVersion": "1.28", + "runOnRequirements": [ + { + "minServerVersion": "5.0", + "topologies": [ + "single", + "replicaset" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "failPointClient", + "useMultipleMongoses": false + } + }, + { + "client": { + "id": "client", + "uriOptions": { + "maxPoolSize": 1, + "minPoolSize": 1 + }, + "awaitMinPoolSizeMS": 10000, + "useMultipleMongoses": false, + "observeEvents": [ + "commandFailedEvent", + "commandSucceededEvent", + "connectionCheckOutStartedEvent", + "connectionCheckedOutEvent", + "connectionCheckOutFailedEvent", + "connectionCheckedInEvent", + "connectionCreatedEvent", + "connectionReadyEvent", + "connectionClosedEvent", + "connectionPendingResponseSucceededEvent", + "connectionPendingResponseStartedEvent", + "connectionPendingResponseFailedEvent" + ] + } + }, + { + "database": { + "id": "test", + "client": "client", + "databaseName": "test" + } + }, + { + "collection": { + "id": "coll", + "database": "test", + "collectionName": "coll" + } + } + ], + "initialData": [ + { + "collectionName": "coll", + "databaseName": "test", + "documents": [] + } + ], + "tests": [ + { + "description": "write op retries when connection closes server-side while draining response", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "blockConnection": true, + "blockTimeMS": 200, + "closeConnection": true + } + } + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 50, + "document": { + "_id": 3, + "x": 1 + } + }, + "expectError": { + "isTimeoutError": true + } + }, + { + "name": "insertOne", + "object": "coll", + "arguments": { + "timeoutMS": 1000, + "document": { + "_id": 3, + "x": 1 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + }, + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckOutStartedEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckOutStartedEvent": {} + }, + { + "connectionPendingResponseStartedEvent": {} + }, + { + "connectionPendingResponseFailedEvent": { + "reason": "error" + } + }, + { + "connectionClosedEvent": { + "reason": "error" + } + }, + { + "connectionCreatedEvent": {} + }, + { + "connectionReadyEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + } + ] +} diff --git a/source/client-side-operations-timeout/tests/pending-response-close-connection.yml b/source/client-side-operations-timeout/tests/pending-response-close-connection.yml new file mode 100644 index 0000000000..20f5375c19 --- /dev/null +++ b/source/client-side-operations-timeout/tests/pending-response-close-connection.yml @@ -0,0 +1,98 @@ +description: "Connection churn is prevented by reading pending responses during connection checkout" +schemaVersion: "1.28" +runOnRequirements: + - minServerVersion: "5.0" + # FailPoint on Server before 5.0 do not wait for blockTimeMS before closing the connection + topologies: ["single", "replicaset"] +createEntities: + - client: + id: &failPointClient failPointClient + useMultipleMongoses: false + - client: + id: &client client + uriOptions: + maxPoolSize: 1 + minPoolSize: 1 + awaitMinPoolSizeMS: 10000 + useMultipleMongoses: false + observeEvents: + - commandFailedEvent + - commandSucceededEvent + - connectionCheckOutStartedEvent + - connectionCheckedOutEvent + - connectionCheckOutFailedEvent + - connectionCheckedInEvent + - connectionCreatedEvent + - connectionReadyEvent + - connectionClosedEvent + - connectionPendingResponseSucceededEvent + - connectionPendingResponseStartedEvent + - connectionPendingResponseFailedEvent + - database: + id: &database test + client: *client + databaseName: *database + - collection: + id: &collection coll + database: *database + collectionName: *collection +initialData: + - collectionName: *collection + databaseName: *database + documents: [] +tests: + # If the connection is closed server-side while draining the response, the + # driver must retry with a different connection. + - description: "write op retries when connection closes server-side while draining response" + operations: + # Create a failpoint to block the first and second operation. + - name: failPoint + object: testRunner + arguments: + client: *failPointClient + failPoint: + configureFailPoint: failCommand + mode: {times: 1} + data: + failCommands: ["insert"] + blockConnection: true + blockTimeMS: 200 + closeConnection: true + # Execute operation with timeout less than block time. + - name: insertOne + object: *collection + arguments: + timeoutMS: 50 + document: {_id: 3, x: 1} + expectError: + isTimeoutError: true + # Draining pending response should failure because of closed connection, + # but another connection should be established within the checkout and operation should succeed. + - name: insertOne + object: *collection + arguments: + timeoutMS: 1000 + document: {_id: 3, x: 1} + expectEvents: + - client: *client + events: + - commandFailedEvent: + commandName: insert + - commandSucceededEvent: + commandName: insert + - client: *client + eventType: cmap + events: + - connectionCheckOutStartedEvent: { } + - connectionCheckedOutEvent: { } + - connectionCheckedInEvent: { } # Insert fails. + - connectionCheckOutStartedEvent: { } + - connectionPendingResponseStartedEvent: { } + - connectionPendingResponseFailedEvent: # Pending response failed. + reason: error + - connectionClosedEvent: + reason: error + - connectionCreatedEvent: { } # New connection should be created and checked out. + - connectionReadyEvent: { } + - connectionCheckedOutEvent: { } + - connectionCheckedInEvent: { } # Find finishes. diff --git a/source/client-side-operations-timeout/tests/pending-response.json b/source/client-side-operations-timeout/tests/pending-response.json index 98b46c411b..4e69ebe44c 100644 --- a/source/client-side-operations-timeout/tests/pending-response.json +++ b/source/client-side-operations-timeout/tests/pending-response.json @@ -486,227 +486,6 @@ ] } ] - }, - { - "description": "read op retries when connection closes server-side while draining response", - "operations": [ - { - "name": "failPoint", - "object": "testRunner", - "arguments": { - "client": "failPointClient", - "failPoint": { - "configureFailPoint": "failCommand", - "mode": { - "times": 1 - }, - "data": { - "failCommands": [ - "insert" - ], - "blockConnection": true, - "blockTimeMS": 200, - "closeConnection": true - } - } - } - }, - { - "name": "insertOne", - "object": "coll", - "arguments": { - "timeoutMS": 50, - "document": { - "_id": 3, - "x": 1 - } - }, - "expectError": { - "isTimeoutError": true - } - }, - { - "name": "find", - "object": "coll", - "arguments": { - "timeoutMS": 1000, - "filter": { - "_id": 1 - } - } - } - ], - "expectEvents": [ - { - "client": "client", - "events": [ - { - "commandFailedEvent": { - "commandName": "insert" - } - }, - { - "commandSucceededEvent": { - "commandName": "find" - } - } - ] - }, - { - "client": "client", - "eventType": "cmap", - "events": [ - { - "connectionCheckOutStartedEvent": {} - }, - { - "connectionCheckedOutEvent": {} - }, - { - "connectionCheckedInEvent": {} - }, - { - "connectionCheckOutStartedEvent": {} - }, - { - "connectionPendingResponseStartedEvent": {} - }, - { - "connectionPendingResponseFailedEvent": { - "reason": "error" - } - }, - { - "connectionClosedEvent": { - "reason": "error" - } - }, - { - "connectionCreatedEvent": {} - }, - { - "connectionReadyEvent": {} - }, - { - "connectionCheckedOutEvent": {} - }, - { - "connectionCheckedInEvent": {} - } - ] - } - ] - }, - { - "description": "write op retries when connection closes server-side while draining response", - "operations": [ - { - "name": "failPoint", - "object": "testRunner", - "arguments": { - "client": "failPointClient", - "failPoint": { - "configureFailPoint": "failCommand", - "mode": { - "times": 1 - }, - "data": { - "failCommands": [ - "insert" - ], - "blockConnection": true, - "blockTimeMS": 200, - "closeConnection": true - } - } - } - }, - { - "name": "insertOne", - "object": "coll", - "arguments": { - "timeoutMS": 50, - "document": { - "_id": 3, - "x": 1 - } - }, - "expectError": { - "isTimeoutError": true - } - }, - { - "name": "insertOne", - "object": "coll", - "arguments": { - "timeoutMS": 1000, - "document": { - "_id": 3, - "x": 1 - } - } - } - ], - "expectEvents": [ - { - "client": "client", - "events": [ - { - "commandFailedEvent": { - "commandName": "insert" - } - }, - { - "commandSucceededEvent": { - "commandName": "insert" - } - } - ] - }, - { - "client": "client", - "eventType": "cmap", - "events": [ - { - "connectionCheckOutStartedEvent": {} - }, - { - "connectionCheckedOutEvent": {} - }, - { - "connectionCheckedInEvent": {} - }, - { - "connectionCheckOutStartedEvent": {} - }, - { - "connectionPendingResponseStartedEvent": {} - }, - { - "connectionPendingResponseFailedEvent": { - "reason": "error" - } - }, - { - "connectionClosedEvent": { - "reason": "error" - } - }, - { - "connectionCreatedEvent": {} - }, - { - "connectionReadyEvent": {} - }, - { - "connectionCheckedOutEvent": {} - }, - { - "connectionCheckedInEvent": {} - } - ] - } - ] } ] } diff --git a/source/client-side-operations-timeout/tests/pending-response.yml b/source/client-side-operations-timeout/tests/pending-response.yml index 93ccbda963..4324e97476 100644 --- a/source/client-side-operations-timeout/tests/pending-response.yml +++ b/source/client-side-operations-timeout/tests/pending-response.yml @@ -233,7 +233,6 @@ tests: arguments: timeoutMS: 200 filter: {_id: 1} - expectEvents: - client: *client events: @@ -257,109 +256,3 @@ tests: - connectionPendingResponseSucceededEvent: {} - connectionCheckedOutEvent: {} - connectionCheckedInEvent: {} # Second find succeeds. - # If the connection is closed server-side while draining the response, the - # driver must retry with a different connection. - - description: "read op retries when connection closes server-side while draining response" - operations: - # Create a failpoint to block the first and second operation. - - name: failPoint - object: testRunner - arguments: - client: *failPointClient - failPoint: - configureFailPoint: failCommand - mode: {times: 1} - data: - failCommands: ["insert"] - blockConnection: true - blockTimeMS: 200 - closeConnection: true - # Execute operation with timeout less than block time. - - name: insertOne - object: *collection - arguments: - timeoutMS: 50 - document: {_id: 3, x: 1} - expectError: - isTimeoutError: true - - name: find - object: *collection - arguments: - timeoutMS: 1000 - filter: {_id: 1} - expectEvents: - - client: *client - events: - - commandFailedEvent: - commandName: insert - - commandSucceededEvent: - commandName: find - - client: *client - eventType: cmap - events: - - connectionCheckOutStartedEvent: {} - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # Insert fails. - - connectionCheckOutStartedEvent: {} - - connectionPendingResponseStartedEvent: {} # Pending read fails on first find - - connectionPendingResponseFailedEvent: - reason: error - - connectionClosedEvent: - reason: error - - connectionCreatedEvent: { } - - connectionReadyEvent: { } - - connectionCheckedOutEvent: {} - - connectionCheckedInEvent: {} # Find finishes. - # If the connection is closed server-side while draining the response, the - # driver must retry with a different connection (write operation) - - description: "write op retries when connection closes server-side while draining response" - operations: - # Create a failpoint to block the first and second operation. - - name: failPoint - object: testRunner - arguments: - client: *failPointClient - failPoint: - configureFailPoint: failCommand - mode: {times: 1} - data: - failCommands: ["insert"] - blockConnection: true - blockTimeMS: 200 - closeConnection: true - # Execute operation with timeout less than block time. - - name: insertOne - object: *collection - arguments: - timeoutMS: 50 - document: {_id: 3, x: 1} - expectError: - isTimeoutError: true - - name: insertOne - object: *collection - arguments: - timeoutMS: 1000 - document: {_id: 3, x: 1} - expectEvents: - - client: *client - events: - - commandFailedEvent: - commandName: insert - - commandSucceededEvent: - commandName: insert - - client: *client - eventType: cmap - events: - - connectionCheckOutStartedEvent: { } - - connectionCheckedOutEvent: { } - - connectionCheckedInEvent: { } # Insert fails. - - connectionCheckOutStartedEvent: { } - - connectionPendingResponseStartedEvent: { } # Pending read fails on first find - - connectionPendingResponseFailedEvent: - reason: error - - connectionClosedEvent: - reason: error - - connectionCreatedEvent: { } - - connectionReadyEvent: { } - - connectionCheckedOutEvent: { } - - connectionCheckedInEvent: { } # Find finishes. From b2c575257e405e2072ce74fe1691404fe8359a43 Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Thu, 23 Oct 2025 15:38:00 -0700 Subject: [PATCH 12/14] pr --- .../pending-response-close-connection.yml | 4 +- .../connection-monitoring-and-pooling.md | 94 +++++++++++-------- .../tests/README.md | 45 +++++++-- 3 files changed, 93 insertions(+), 50 deletions(-) diff --git a/source/client-side-operations-timeout/tests/pending-response-close-connection.yml b/source/client-side-operations-timeout/tests/pending-response-close-connection.yml index 20f5375c19..73d088ef98 100644 --- a/source/client-side-operations-timeout/tests/pending-response-close-connection.yml +++ b/source/client-side-operations-timeout/tests/pending-response-close-connection.yml @@ -43,7 +43,7 @@ initialData: tests: # If the connection is closed server-side while draining the response, the # driver must retry with a different connection. - - description: "write op retries when connection closes server-side while draining response" + - description: "op retries when connection closes server-side while draining response" operations: # Create a failpoint to block the first and second operation. - name: failPoint @@ -66,7 +66,7 @@ tests: document: {_id: 3, x: 1} expectError: isTimeoutError: true - # Draining pending response should failure because of closed connection, + # Draining pending response should fail because of closed connection, # but another connection should be established within the checkout and operation should succeed. - name: insertOne object: *collection diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index 98a7cccac9..ba16c7ae20 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -662,48 +662,56 @@ This subsection outlines the pseudocode steps for acquiring a connection from th connection = Null tConnectionCheckOutStarted = current instant (use a monotonic clock if possible) emit ConnectionCheckOutStartedEvent and equivalent log message +checkout_timeout = timeoutMS is set ? remaining computedServerSelectionTimeout : waitQueueTimeoutMS try: - enter WaitQueue - wait until at top of wait queue - # Note that in a lock-based implementation of the wait queue would - # only allow one thread in the following block at a time while connection is Null: - if a connection is available: - while connection is Null and a connection is available: - connection = next available connection - if connection is perished: - close connection - connection = Null - if connection is in "pending response" state: - tConnectionDrainingStarted = current instant (use a monotonic clock if possible) - emit PendingResponseStartedEvent and equivalent log message - try: - drain the pending response - except timeout: - tConnectionDrainingFailed = current instant (use a monotonic clock if possible) - emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and quivalent log message - if last read timestamp on connection > 3 seconds old + try: + enter WaitQueue + wait until at top of wait queue + # Note that in a lock-based implementation of the wait queue would + # only allow one thread in the following block at a time + while connection is Null: + if a connection is available: + while connection is Null and a connection is available: + connection = next available connection + if connection is perished: close connection - else - check in the connection - connection = Null - except: - tConnectionDrainingFailed = current instant (use a monotonic clock if possible) - emit PendingResponseFailedEvent(reason="error", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message - close connection - connection = Null + connection = Null + else if totalConnectionCount < maxPoolSize: + if pendingConnectionCount < maxConnecting: + connection = create connection else: - tConnectionDrainingSucceeded = current instant (use a monotonic clock if possible) - emit PendingResponseSucceededEvent(duration = tConnectionDrainingSucceeded - tConnectionDrainingStarted) and equivalent log message - else if totalConnectionCount < maxPoolSize: - if pendingConnectionCount < maxConnecting: - connection = create connection - else: - # this waiting MUST NOT prevent other threads from checking Connections - # back in to the pool. - wait until pendingConnectionCount < maxConnecting or a connection is available - continue - + # this waiting MUST NOT prevent other threads from checking Connections + # back in to the pool. + wait until pendingConnectionCount < maxConnecting or a connection is available + continue + finally: + # This must be done in all drivers + leave wait queue + + # If the Connection has pending response state it should be drained before it is returned. + # This MUST NOT block other threads from acquiring connections. + if connection is in "pending response" state: + tConnectionDrainingStarted = current instant (use a monotonic clock if possible) + emit PendingResponseStartedEvent and equivalent log message + try: + draining_timeout = min(remaining checkout_timeout, remaining expiration window) + drain the pending response + tConnectionDrainingSucceeded = current instant (use a monotonic clock if possible) + emit PendingResponseSucceededEvent(duration = tConnectionDrainingSucceeded - tConnectionDrainingStarted) and equivalent log message + except timeout: + tConnectionDrainingFailed = current instant (use a monotonic clock if possible) + emit PendingResponseFailedEvent(reason="timeout", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message + if last read timestamp on connection > 3 seconds old + close connection + else + return the connection to pool + connection = Null + except: + tConnectionDrainingFailed = current instant (use a monotonic clock if possible) + emit PendingResponseFailedEvent(reason="error", duration = tConnectionDrainingFailed - tConnectionDrainingStarted) and equivalent log message + close connection + connection = Null except pool is "closed": tConnectionCheckOutFailed = current instant (use a monotonic clock if possible) emit ConnectionCheckOutFailedEvent(reason="poolClosed", duration = tConnectionCheckOutFailed - tConnectionCheckOutStarted) and equivalent log message @@ -716,9 +724,6 @@ except timeout: tConnectionCheckOutFailed = current instant (use a monotonic clock if possible) emit ConnectionCheckOutFailedEvent(reason="timeout", duration = tConnectionCheckOutFailed - tConnectionCheckOutStarted) and equivalent log message throw WaitQueueTimeoutError -finally: - # This must be done in all drivers - leave wait queue # If the Connection has not been established yet (TCP, TLS, # handshake, compression, and auth), it must be established @@ -1581,6 +1586,13 @@ some equivalent configuration, but this configuration will also require target f 5.0. The advantage of using Background Thread to manage perished connections is that it will work regardless of environment setup. +### Why introduce the draining pending responses? + +Draining pending responses is necessary to address issues when connections are being closed client-side in cases when +the configured maxTimeMS does not allow enough time for the driver to read the MaxTimeMSExpired error and cases where +the server or network delays the response. Since establishing a new connection can be an expensive and time-consuming +operation it makes sense try to complete the previous read instead. + ### Why is the pending response timeout not calculated dynamically? Why is it specifically 3 seconds? Using a dynamic timeout introduces additional complexity. In particular, RTT is an unreliable metric for predicting diff --git a/source/connection-monitoring-and-pooling/tests/README.md b/source/connection-monitoring-and-pooling/tests/README.md index 61654a8568..67f54155dc 100644 --- a/source/connection-monitoring-and-pooling/tests/README.md +++ b/source/connection-monitoring-and-pooling/tests/README.md @@ -34,21 +34,31 @@ This test verifies that if only part of a response was read before the timeout, response and reuse the connection for the next operation. This test has 3 variation, see step 4, `sendBytes` parameter. 1. Setup mongoproxy. + 2. Connect to the proxy server with `maxPoolSize=1`, `direct=true`, disable read and write retryability, subscribing to the following CMAP events: + - `PendingResponseStartedEvent` - `PendingResponseSucceededEvent` - `PendingResponseFailedEvent` - `ConnectionCreatedEvent` - `ConnectionClosedEvent` + 3. Execute `ping` command to populate the connection pool. + 4. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: - - `sendBytes`: There are 3 test cases here: - 1. Message size was partially read: random value between 1 and 3 inclusive - 2. Message size was read, body was not read at all: use 4 - 3. Message size was read, body read partially: random value between 5 and 100 inclusive + + - `sendBytes`: Depending on implementation, we have 4 test cases here (some driver could read message size only to + determine the message size, other could read entire message header): + 1. Message size was partially read: 2 + 2. Message size was read: 4 + 3. Message header was read, body was not read: 16 + 4. Message header was read, body read partially: 20 - `delayMs`: 400 ( to exceed the 200 ms timeout) - - `sendAll`: `true` Example of run command payload: + - `sendAll`: `true` + + Example of run command payload: + ``` { insert: "coll", @@ -62,13 +72,19 @@ response and reuse the connection for the next operation. This test has 3 variat } } ``` + 5. Assert that the operation failed with timeout error. + 6. Issue another operation (e.g. another insert) and assert that it does not return an error. + 7. Verify that the following sequence of events was observed: + - `ConnectionCreatedEvent` - `ConnectionPendingResponseStartedEvent` - `ConnectionPendingResponseSucceededEvent` + 8. Verify that NONE the following events was observed: + - `ConnectionPendingResponseFailedEvent` - `ConnectionClosedEvent` @@ -78,18 +94,26 @@ This test verifies that if a connection idles past the pending response window ( aliveness check does not attempt to discard bytes from the TCP stream. 1. Setup mongoproxy. + 2. Connect to the proxy server with `maxPoolSize=1`, `direct=true`, disable read and write retryability, subscribing to the following CMAP events: + - `PendingResponseStartedEvent` - `PendingResponseSucceededEvent` - `PendingResponseFailedEvent` - `ConnectionCreatedEvent` - `ConnectionClosedEvent` + 3. Execute `ping` command to populate the connection pool. + 4. Send a command (e.g. an insert) with a 200 millisecond timeout and the following `proxyTest` actions: - - `sendBytes`: random value between 1 and 3 inclusive + + - `sendBytes`: 2 - `delayMs`: 400 ( to exceed the 200 ms timeout) - - `sendAll`: `true` Example of run command payload: + - `sendAll`: `true` + + Example of run command payload: + ``` { insert: "coll", @@ -103,14 +127,21 @@ aliveness check does not attempt to discard bytes from the TCP stream. } } ``` + 5. Assert that the operation failed with timeout error. + 6. Sleep for 4 seconds (to pass the 3-seconds expiration window) + 7. Issue another operation (e.g. another insert) and assert that it does not return an error. + 8. Verify that the following sequence of events was observed: + - `ConnectionCreatedEvent` - `ConnectionPendingResponseStartedEvent` - `ConnectionPendingResponseSucceededEvent` + 9. Verify that NONE the following events was observed: + - `ConnectionClosedEvent` - `ConnectionPendingResponseFailedEvent` From 131bcf0549575f3c159912bcd0af6bc0816dd45b Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Thu, 23 Oct 2025 15:48:41 -0700 Subject: [PATCH 13/14] Pr --- .../tests/pending-response-close-connection.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client-side-operations-timeout/tests/pending-response-close-connection.json b/source/client-side-operations-timeout/tests/pending-response-close-connection.json index 84de8306c5..cdf687de81 100644 --- a/source/client-side-operations-timeout/tests/pending-response-close-connection.json +++ b/source/client-side-operations-timeout/tests/pending-response-close-connection.json @@ -66,7 +66,7 @@ ], "tests": [ { - "description": "write op retries when connection closes server-side while draining response", + "description": "op retries when connection closes server-side while draining response", "operations": [ { "name": "failPoint", From 207c5cd2798f64a48945b1a563ab2b374ce5646d Mon Sep 17 00:00:00 2001 From: Oleksandr Poliakov Date: Wed, 12 Nov 2025 13:58:44 -0800 Subject: [PATCH 14/14] pr --- .../connection-monitoring-and-pooling.md | 22 ++++++++++++------- .../tests/README.md | 2 +- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md index ba16c7ae20..1eb08ab39a 100644 --- a/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md +++ b/source/connection-monitoring-and-pooling/connection-monitoring-and-pooling.md @@ -594,13 +594,13 @@ response data is drained and the connection is ready to send the next command. 1. **Persist and update timestamp**: The connection must record the current time immediately after the original timeout. This timestamp MUST be updated to the current time whenever any bytes are successfully read, received, or consumed while draining of the pending response. -2. **Aliveness check**: If driver uses synchronous read from the socket and the timestamp is older than 3 seconds, the - driver MUST perform aliveness check either performing a non-blocking read or using the minimal possible timeout to - check if at least one byte can be read/received. If at least one byte can be read the timestamp must be updated and - connection SHOULD be drained. If no bytes can be read, the connection MUST be closed. +2. **Aliveness check**: If driver uses synchronous read from the socket and the timestamp is older than 3 seconds, then + the driver MUST do an aliveness check by either performing a non-blocking read or using the minimal possible + timeout to check if at least one byte can be read/received. If at least one byte can be read, then the timestamp + must be updated and the connection SHOULD be drained. If no bytes can be read, the connection MUST be closed. 3. **User-provided timeout**: If a user-provided timeout is specified for the operation, the driver MUST use the minimum - of (a) the remaining time before the 3 second "pending response" window elapses and (b) the remaining of - user-provided timeout as the effective timeout for the draining. + of (a) the remaining time before the 3 second "pending response" window elapses and (b) the remaining user-provided + timeout as the effective timeout for the draining. 4. **Default timeout**: If no user-provided timeout is specified, the driver MUST use the minimum of (a) the remaining 3 second "pending response" window and (b) remaining timeout for connection checkout. 5. **On Timeout**: if timeout occurred while draining the pending response and the 3 seconds pending-response window was @@ -662,7 +662,10 @@ This subsection outlines the pseudocode steps for acquiring a connection from th connection = Null tConnectionCheckOutStarted = current instant (use a monotonic clock if possible) emit ConnectionCheckOutStartedEvent and equivalent log message -checkout_timeout = timeoutMS is set ? remaining computedServerSelectionTimeout : waitQueueTimeoutMS +if timeoutMS is not set and waitQueueTimeoutMS is supported and configured: + checkout_timeout = waitQueueTimeoutMS +else: + checkout_timeout = remaining computedServerSelectionTimeout try: while connection is Null: try: @@ -1620,7 +1623,10 @@ encounter avoidable delays. By refreshing the timeout after each successful draining, we acknowledge that progress is being made, and we provide a new time window for the next segment of data to arrive. Refreshing is less costly than re-establishing a connection -since there is no reason to believe that a new connection would reduce latency. +since there is no reason to believe that a new connection would reduce latency. Such approach might cause a problem in +case of extremely slow server response (1 byte each 3 seconds) what could lead to constant moving of the pending +response expiration window. The decision was made to ignore the "very slowly receiving data" problem for now under the +assumption that it's extremely unlikely to occur. ### Why are exhaust cursors prohibited from transitioning a connection to the "pending response" state? diff --git a/source/connection-monitoring-and-pooling/tests/README.md b/source/connection-monitoring-and-pooling/tests/README.md index 67f54155dc..bad5a97bd9 100644 --- a/source/connection-monitoring-and-pooling/tests/README.md +++ b/source/connection-monitoring-and-pooling/tests/README.md @@ -31,7 +31,7 @@ non-ssl, non-auth, and non-compression connections. #### Recover Partially Read Response This test verifies that if only part of a response was read before the timeout, the driver can drain the rest of the -response and reuse the connection for the next operation. This test has 3 variation, see step 4, `sendBytes` parameter. +response and reuse the connection for the next operation. This test has 3 variations, see step 4, `sendBytes` parameter. 1. Setup mongoproxy.