From fcb886fbeef84d7b1842ed6adcfe546d6788a40a Mon Sep 17 00:00:00 2001 From: Prem Palanisamy Date: Mon, 13 Apr 2026 02:14:52 +0100 Subject: [PATCH 1/3] feat: add upsertWithCounts() returning matched/modified/upserted separately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing upsert() method returns only MongoDB's raw `n` field (matched + upserted), which loses the distinction callers need to report "actually inserted" counts — useful for skipDuplicates-style flows in consumers like utopia-php/database. - Adds Client::upsertWithCounts() returning {matched, modified, upserted[]} where matched excludes upserts (derived as n - count(upserted)) - Threads a $returnRaw flag through query() / send() / receive() / parseResponse() so the caller can opt into getting the full stdClass response instead of the extracted $result->n short-circuit - Adds three tests: mixed (existing + new), all-new, all-existing --- src/Client.php | 99 ++++++++++++++++++++++++++++++++++++++++--- tests/MongoTest.php | 101 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+), 7 deletions(-) diff --git a/src/Client.php b/src/Client.php index 7481249..ee16755 100644 --- a/src/Client.php +++ b/src/Client.php @@ -243,10 +243,14 @@ public function createUuid(): string * * @param array $command Command to execute * @param string|null $db Database name + * @param bool $returnRaw When true, skip the int/extracted-field short-circuits + * in the response parser and return the full stdClass + * result — use when the caller needs fields beyond `n` + * (e.g. the `upserted` array from an update command). * @return stdClass|array|int Query result * @throws Exception */ - public function query(array $command, ?string $db = null): stdClass|array|int + public function query(array $command, ?string $db = null, bool $returnRaw = false): stdClass|array|int { // Validate connection state before each operation $this->validateConnection(); @@ -351,7 +355,7 @@ public function query(array $command, ?string $db = null): stdClass|array|int $sections = Document::fromPHP($params); $message = pack('V*', 21 + strlen($sections), $this->id, 0, 2013, 0) . "\0" . $sections; - $result = $this->send($message); + $result = $this->send($message, $returnRaw); $this->updateCausalConsistency($result); @@ -371,7 +375,7 @@ public function query(array $command, ?string $db = null): stdClass|array|int * @return stdClass|array|int * @throws Exception */ - public function send(mixed $data): stdClass|array|int + public function send(mixed $data, bool $returnRaw = false): stdClass|array|int { // Check if connection is alive, connect if not if (!$this->client->isConnected()) { @@ -390,14 +394,14 @@ public function send(mixed $data): stdClass|array|int } } - return $this->receive(); + return $this->receive($returnRaw); } /** * Receive a message from connection. * @throws Exception */ - private function receive(): stdClass|array|int + private function receive(bool $returnRaw = false): stdClass|array|int { $chunks = []; $receivedLength = 0; @@ -461,7 +465,7 @@ private function receive(): stdClass|array|int $res = \implode('', $chunks); - return $this->parseResponse($res, $responseLength); + return $this->parseResponse($res, $responseLength, $returnRaw); } /** @@ -899,6 +903,81 @@ public function upsert(string $collection, array $operations, array $options = [ ); } + /** + * Execute a batch of upserts and return detailed counts from the response. + * + * Unlike {@see upsert()}, which returns the raw `n` field (matched + upserted), + * this method separates matched-existing from newly-upserted documents so the + * caller can tell exactly which operations produced new docs. Useful for + * `skipDuplicates`-style callers that need to report "actually inserted" counts. + * + * @param string $collection + * @param array, update: array, multi?: bool}> $operations + * @param array $options + * + * @return array{ + * matched: int, + * modified: int, + * upserted: array + * } Response counts. `matched` is the number of pre-existing docs matched + * by a filter (derived as `n - count(upserted)` since MongoDB's `n` + * includes upserts). `modified` is docs whose contents changed. `upserted` + * is the list of newly-created docs, each carrying the source operation's + * `index` and the assigned `_id`. + * + * @throws Exception + */ + public function upsertWithCounts(string $collection, array $operations, array $options = []): array + { + $updates = []; + + foreach ($operations as $op) { + $updates[] = [ + 'q' => $op['filter'], + 'u' => $this->toObject($op['update']), + 'upsert' => true, + 'multi' => $op['multi'] ?? false, + ]; + } + + $result = $this->query( + array_merge( + [ + self::COMMAND_UPDATE => $collection, + 'updates' => $updates, + ], + $options + ), + null, + true // $returnRaw — we need the full response, not just $result->n + ); + + if (!$result instanceof stdClass) { + throw new Exception('Unexpected upsertWithCounts response: expected stdClass, got ' . \gettype($result)); + } + + $upserted = []; + if (\property_exists($result, 'upserted') && \is_iterable($result->upserted)) { + foreach ($result->upserted as $entry) { + $upserted[] = [ + 'index' => (int) $entry->index, + '_id' => $entry->_id, + ]; + } + } + + // MongoDB's `n` in the update response = matched-existing + upserted-new. + // Subtract the upserts so `matched` reflects only pre-existing docs. + $n = (int) ($result->n ?? 0); + $matched = \max(0, $n - \count($upserted)); + + return [ + 'matched' => $matched, + 'modified' => (int) ($result->nModified ?? 0), + 'upserted' => $upserted, + ]; + } + /** * Find document(s) with full transaction and session support. @@ -1584,7 +1663,7 @@ public function close(): void * @return stdClass|array|int Parsed response * @throws Exception */ - private function parseResponse(string $response, int $responseLength): stdClass|array|int + private function parseResponse(string $response, int $responseLength, bool $returnRaw = false): stdClass|array|int { /* * The first 21 bytes of the MongoDB wire protocol response consist of: @@ -1648,6 +1727,12 @@ private function parseResponse(string $response, int $responseLength): stdClass| ); } + // Callers that need the full response object (e.g. to read the + // `upserted` array from an update command) opt in with $returnRaw. + if ($returnRaw && $result->ok === 1.0) { + return $result; + } + // Check for operation success if (\property_exists($result, 'n') && $result->ok === 1.0) { return $result->n; diff --git a/tests/MongoTest.php b/tests/MongoTest.php index 2d48716..07bfd1f 100644 --- a/tests/MongoTest.php +++ b/tests/MongoTest.php @@ -271,6 +271,107 @@ public function testUpsert() self::assertEquals('English', $documents[1]->language); } + public function testUpsertWithCountsMixed() + { + $collection = 'upsert_counts_mixed'; + $this->getDatabase()->createCollection($collection); + try { + // Seed one document — it will be matched (no-op) by the first op. + $this->getDatabase()->insert($collection, [ + '_id' => 'existing', + 'name' => 'Original', + ]); + + $result = $this->getDatabase()->upsertWithCounts($collection, [ + [ + 'filter' => ['_id' => 'existing'], + // $setOnInsert is a no-op for existing docs — leaves 'Original' intact. + 'update' => ['$setOnInsert' => ['name' => 'ShouldNotWrite']], + ], + [ + 'filter' => ['_id' => 'fresh'], + 'update' => ['$setOnInsert' => ['name' => 'Fresh']], + ], + ]); + + // One pre-existing match + one upsert = n:2, matched (derived):1, upserted:1. + self::assertIsArray($result); + self::assertArrayHasKey('matched', $result); + self::assertArrayHasKey('modified', $result); + self::assertArrayHasKey('upserted', $result); + + self::assertSame(1, $result['matched'], 'matched should exclude upserts'); + self::assertSame(0, $result['modified']); + self::assertCount(1, $result['upserted']); + self::assertSame(1, $result['upserted'][0]['index']); + self::assertSame('fresh', $result['upserted'][0]['_id']); + + // Existing doc untouched, fresh doc created. + $existing = $this->getDatabase()->find($collection, ['_id' => 'existing'])->cursor->firstBatch ?? []; + self::assertCount(1, $existing); + self::assertEquals('Original', $existing[0]->name); + + $fresh = $this->getDatabase()->find($collection, ['_id' => 'fresh'])->cursor->firstBatch ?? []; + self::assertCount(1, $fresh); + self::assertEquals('Fresh', $fresh[0]->name); + } finally { + $this->getDatabase()->dropCollection($collection); + } + } + + public function testUpsertWithCountsAllNew() + { + $collection = 'upsert_counts_new'; + $this->getDatabase()->createCollection($collection); + try { + $result = $this->getDatabase()->upsertWithCounts($collection, [ + ['filter' => ['_id' => 'a'], 'update' => ['$setOnInsert' => ['name' => 'A']]], + ['filter' => ['_id' => 'b'], 'update' => ['$setOnInsert' => ['name' => 'B']]], + ['filter' => ['_id' => 'c'], 'update' => ['$setOnInsert' => ['name' => 'C']]], + ]); + + self::assertSame(0, $result['matched'], 'No docs matched — all were upserts'); + self::assertSame(0, $result['modified']); + self::assertCount(3, $result['upserted']); + + $ids = \array_column($result['upserted'], '_id'); + \sort($ids); + self::assertSame(['a', 'b', 'c'], $ids); + } finally { + $this->getDatabase()->dropCollection($collection); + } + } + + public function testUpsertWithCountsAllExisting() + { + $collection = 'upsert_counts_existing'; + $this->getDatabase()->createCollection($collection); + try { + $this->getDatabase()->insertMany($collection, [ + ['_id' => 'a', 'name' => 'A'], + ['_id' => 'b', 'name' => 'B'], + ]); + + $result = $this->getDatabase()->upsertWithCounts($collection, [ + ['filter' => ['_id' => 'a'], 'update' => ['$setOnInsert' => ['name' => 'NopeA']]], + ['filter' => ['_id' => 'b'], 'update' => ['$setOnInsert' => ['name' => 'NopeB']]], + ]); + + self::assertSame(2, $result['matched']); + self::assertSame(0, $result['modified']); + self::assertSame([], $result['upserted']); + + // Existing values preserved. + $docs = $this->getDatabase()->find($collection)->cursor->firstBatch ?? []; + self::assertCount(2, $docs); + foreach ($docs as $doc) { + self::assertContains($doc->name, ['A', 'B']); + } + } finally { + $this->getDatabase()->dropCollection($collection); + } + } + public function testToArrayWithNestedDocumentFromMongo() { $client = $this->getDatabase(); From 25d0a8c02b1a39d3f4c5b2a9cbb76171319c7ba5 Mon Sep 17 00:00:00 2001 From: Prem Palanisamy Date: Mon, 13 Apr 2026 02:32:46 +0100 Subject: [PATCH 2/3] Address review feedback on upsertWithCounts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Short-circuit upsertWithCounts() when $operations is empty (CodeRabbit P1): MongoDB rejects an update command with an empty updates array, so dynamic batch builders need a defensive guard. Returns the zero-count shape. - Add explicit index assertions to testUpsertWithCountsAllNew (Greptile P2) for parity with the mixed-case test. - Add testUpsertWithCountsModifiesExisting to exercise modified > 0 via $set/$inc on an existing doc — previously all tests had modified == 0 because they only used $setOnInsert (CodeRabbit nitpick). - Add testUpsertWithCountsEmpty covering the new short-circuit path. --- src/Client.php | 10 +++++++++ tests/MongoTest.php | 49 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/Client.php b/src/Client.php index ee16755..c3cc13a 100644 --- a/src/Client.php +++ b/src/Client.php @@ -929,6 +929,16 @@ public function upsert(string $collection, array $operations, array $options = [ */ public function upsertWithCounts(string $collection, array $operations, array $options = []): array { + // MongoDB rejects an `update` command with an empty `updates` array, so + // short-circuit dynamically-built batches before they hit the wire. + if (empty($operations)) { + return [ + 'matched' => 0, + 'modified' => 0, + 'upserted' => [], + ]; + } + $updates = []; foreach ($operations as $op) { diff --git a/tests/MongoTest.php b/tests/MongoTest.php index 07bfd1f..1cff68b 100644 --- a/tests/MongoTest.php +++ b/tests/MongoTest.php @@ -337,11 +337,60 @@ public function testUpsertWithCountsAllNew() $ids = \array_column($result['upserted'], '_id'); \sort($ids); self::assertSame(['a', 'b', 'c'], $ids); + + $indexes = \array_column($result['upserted'], 'index'); + \sort($indexes); + self::assertSame([0, 1, 2], $indexes); } finally { $this->getDatabase()->dropCollection($collection); } } + public function testUpsertWithCountsModifiesExisting() + { + $collection = 'upsert_counts_modify'; + $this->getDatabase()->createCollection($collection); + try { + $this->getDatabase()->insert($collection, [ + '_id' => 'exists', + 'name' => 'Old', + 'counter' => 1, + ]); + + // Use $set (not $setOnInsert) so the matched doc is actually modified. + $result = $this->getDatabase()->upsertWithCounts($collection, [ + [ + 'filter' => ['_id' => 'exists'], + 'update' => [ + '$set' => ['name' => 'New'], + '$inc' => ['counter' => 1], + ], + ], + ]); + + self::assertSame(1, $result['matched']); + self::assertSame(1, $result['modified'], 'modified should reflect nModified from MongoDB'); + self::assertSame([], $result['upserted']); + + // Verify the doc was actually updated on the server. + $docs = $this->getDatabase()->find($collection, ['_id' => 'exists'])->cursor->firstBatch ?? []; + self::assertCount(1, $docs); + self::assertEquals('New', $docs[0]->name); + self::assertEquals(2, $docs[0]->counter); + } finally { + $this->getDatabase()->dropCollection($collection); + } + } + + public function testUpsertWithCountsEmpty() + { + // Should short-circuit without hitting the wire — MongoDB rejects an + // update command with an empty `updates` array. + $result = $this->getDatabase()->upsertWithCounts('does_not_matter', []); + + self::assertSame(['matched' => 0, 'modified' => 0, 'upserted' => []], $result); + } + public function testUpsertWithCountsAllExisting() { $collection = 'upsert_counts_existing'; From b373e9552366c7b393bcbacc0a8de8874f307c6a Mon Sep 17 00:00:00 2001 From: Prem Palanisamy Date: Mon, 13 Apr 2026 03:57:10 +0100 Subject: [PATCH 3/3] Address review: validate response in upsertWithCounts, harden empty-batch test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review feedback raised two concerns: 1. Silent zero-counts under unacknowledged writes: if mongod ever omits `n` from the response, the previous code coerced it to 0 and reported "matched 0 / modified 0 / upserted []" for writes that may have applied. Add a defense-in-depth check that throws when `n` is missing. In practice this client always gets `n` over OP_MSG even for w:0 (send() does not set moreToCome), so the check is rarely tripped today — documented inline. 2. Empty-batch test relied on indirect proof: it only asserted the return value, not that the wire was untouched. Strengthen the comment to document the indirect proof (MongoDB rejects an `update` command with an empty `updates` array, which would propagate as an Exception), and add a separate test that explicitly verifies that server-side rejection so future MongoDB versions changing the behaviour are caught. Removes the writeConcern: { w: 0 } test that was added speculatively — the validation it covers is not reachable with this client's transport. --- src/Client.php | 25 ++++++++++++++++++++++--- tests/MongoTest.php | 39 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/src/Client.php b/src/Client.php index c3cc13a..3815282 100644 --- a/src/Client.php +++ b/src/Client.php @@ -911,6 +911,12 @@ public function upsert(string $collection, array $operations, array $options = [ * caller can tell exactly which operations produced new docs. Useful for * `skipDuplicates`-style callers that need to report "actually inserted" counts. * + * Counts are only meaningful for acknowledged writes. In practice mongod + * still returns `n` for `writeConcern: { w: 0 }` over OP_MSG, so this method + * normally works regardless — but if a future protocol change ever causes + * `n` to be omitted, the method raises an Exception rather than silently + * returning zeros for writes that may have applied. + * * @param string $collection * @param array, update: array, multi?: bool}> $operations * @param array $options @@ -925,7 +931,8 @@ public function upsert(string $collection, array $operations, array $options = [ * is the list of newly-created docs, each carrying the source operation's * `index` and the assigned `_id`. * - * @throws Exception + * @throws Exception When the response is missing the `n` field — typically + * the result of an unacknowledged write concern. */ public function upsertWithCounts(string $collection, array $operations, array $options = []): array { @@ -966,6 +973,19 @@ public function upsertWithCounts(string $collection, array $operations, array $o throw new Exception('Unexpected upsertWithCounts response: expected stdClass, got ' . \gettype($result)); } + // Defense-in-depth: the counts in this method are only meaningful for + // acknowledged writes. In practice mongod always replies with `n` over + // OP_MSG (even for writeConcern: { w: 0 }) because send() does not set + // the moreToCome flag, so this check is rarely tripped today — but if + // a future protocol change ever causes `n` to go missing, refuse loudly + // instead of silently returning zeros for writes that may have applied. + if (!\property_exists($result, 'n')) { + throw new Exception( + 'upsertWithCounts() requires acknowledged writes — the response did not include `n`. ' + . 'Do not pass writeConcern: { w: 0 } when calling this method.' + ); + } + $upserted = []; if (\property_exists($result, 'upserted') && \is_iterable($result->upserted)) { foreach ($result->upserted as $entry) { @@ -978,8 +998,7 @@ public function upsertWithCounts(string $collection, array $operations, array $o // MongoDB's `n` in the update response = matched-existing + upserted-new. // Subtract the upserts so `matched` reflects only pre-existing docs. - $n = (int) ($result->n ?? 0); - $matched = \max(0, $n - \count($upserted)); + $matched = \max(0, (int) $result->n - \count($upserted)); return [ 'matched' => $matched, diff --git a/tests/MongoTest.php b/tests/MongoTest.php index 1cff68b..ea6632a 100644 --- a/tests/MongoTest.php +++ b/tests/MongoTest.php @@ -384,13 +384,46 @@ public function testUpsertWithCountsModifiesExisting() public function testUpsertWithCountsEmpty() { - // Should short-circuit without hitting the wire — MongoDB rejects an - // update command with an empty `updates` array. - $result = $this->getDatabase()->upsertWithCounts('does_not_matter', []); + // The guard should short-circuit before any wire traffic. Indirect proof: + // if the guard regressed, this would build an `update` command with an + // empty `updates` array, which MongoDB rejects with + // "Failed to parse: updates: array is empty" + // — that error would propagate as an Exception and fail this assertion. + // We also use a non-existent collection name to ensure the guard runs + // before any collection lookup happens. + $result = $this->getDatabase()->upsertWithCounts('this_collection_does_not_exist', []); self::assertSame(['matched' => 0, 'modified' => 0, 'upserted' => []], $result); } + public function testUpsertWithCountsRejectsEmptyUpdatesAtServer() + { + // Documents the underlying MongoDB behavior the empty-batch guard + // relies on. If a future MongoDB version stops rejecting empty + // `updates` arrays, the empty-batch test above becomes a weaker check + // and we should add a stronger guarantee. + $collection = 'upsert_counts_empty_proof'; + $this->getDatabase()->createCollection($collection); + try { + $this->expectException(Exception::class); + + // Bypass upsertWithCounts() entirely and send an `update` command + // with an empty `updates` array directly through query(). + $this->getDatabase()->query([ + 'update' => $collection, + 'updates' => [], + ]); + } finally { + $this->getDatabase()->dropCollection($collection); + } + } + + // Note: there is no test for the "missing `n` in response" defensive check. + // Triggering it would require either a moreToCome OP_MSG flag (not used by + // this client) or a stubbed transport — mongod still replies with `n` for + // writeConcern: { w: 0 } in normal use, so the check is defense-in-depth + // against future protocol changes rather than something callers can hit. + public function testUpsertWithCountsAllExisting() { $collection = 'upsert_counts_existing';