From 6c2def347bb77e5bd135e89b8a962b18bcc96543 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vandon?= Date: Tue, 30 Dec 2025 15:55:54 +0100 Subject: [PATCH] add a limit to the number of elements we process in a mongo document --- .../trace/instrumentation/mongo/Context.java | 39 ++++++++++++++++--- .../instrumentation/mongo/BsonScrubber31.java | 10 ++++- .../src/test/groovy/BsonScrubber31Test.groovy | 24 ++++++++++++ .../instrumentation/mongo/BsonScrubber34.java | 10 ++++- .../src/test/groovy/BsonScrubber34Test.groovy | 24 ++++++++++++ 5 files changed, 100 insertions(+), 7 deletions(-) diff --git a/dd-java-agent/instrumentation/mongo/mongo-common/src/main/java/datadog/trace/instrumentation/mongo/Context.java b/dd-java-agent/instrumentation/mongo/mongo-common/src/main/java/datadog/trace/instrumentation/mongo/Context.java index fed65a9016b..6dd632e0018 100644 --- a/dd-java-agent/instrumentation/mongo/mongo-common/src/main/java/datadog/trace/instrumentation/mongo/Context.java +++ b/dd-java-agent/instrumentation/mongo/mongo-common/src/main/java/datadog/trace/instrumentation/mongo/Context.java @@ -2,14 +2,20 @@ final class Context { + private static final int MAX_DEPTH = 64; + private static final int MAX_SEQUENCE_LENGTH = 256; + private final StringBuilder buffer = new StringBuilder(); // specifies the depth below which everything must be discarded, // e.g. because we're inside an $in clause we want to collapse - private int discardDepth = 64; - private int keepDepth = 64; + private int discardDepth = MAX_DEPTH; + private int keepDepth = MAX_DEPTH; private int depth; + // tracks sequence element counts at each depth level to limit long arrays + private final int[] sequenceCounts = new int[MAX_DEPTH]; + public void discardSubTree() { if (depth < discardDepth) { this.discardDepth = depth; @@ -29,11 +35,32 @@ public void startDocument() { public void endDocument() { --depth; if (discardDepth == depth) { - discardDepth = 64; + discardDepth = MAX_DEPTH; } if (keepDepth == depth) { - keepDepth = 64; + keepDepth = MAX_DEPTH; + } + } + + public void startArray() { + if (depth < MAX_DEPTH) { + sequenceCounts[depth] = 0; + } + // note: nothing to do at the end of the array + } + + /** + * Signals that a new element is being processed in the current array. Returns true if this + * element should be written, false if the sequence limit has been reached. + */ + public boolean nextSequenceElement() { + // checking depth just to make sure we don't access the array out of bounds, but it should never + // be the case since we stop processing the document before reaching this code. + if (depth < MAX_DEPTH) { + sequenceCounts[depth]++; + return sequenceCounts[depth] <= MAX_SEQUENCE_LENGTH; } + return false; // theoretically unreachable } public boolean disableObfuscation() { @@ -48,7 +75,7 @@ private boolean tooDeep() { // this means we are parsing a huge document, which we will truncate anyway // note that MongoDB sets a default max nested depth of 100, and MongoDB users // are generally advised to avoid deep nesting for the sake of database performance - return depth >= 64; + return depth >= MAX_DEPTH; } private boolean shouldWrite() { @@ -82,6 +109,8 @@ public void write(String string) { public void clear() { buffer.setLength(0); depth = 0; + discardDepth = MAX_DEPTH; + keepDepth = MAX_DEPTH; } public String asString() { diff --git a/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.1/src/main/java/datadog/trace/instrumentation/mongo/BsonScrubber31.java b/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.1/src/main/java/datadog/trace/instrumentation/mongo/BsonScrubber31.java index c6b5d623df0..dd17820da77 100644 --- a/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.1/src/main/java/datadog/trace/instrumentation/mongo/BsonScrubber31.java +++ b/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.1/src/main/java/datadog/trace/instrumentation/mongo/BsonScrubber31.java @@ -270,6 +270,7 @@ public void writeRegularExpression(String name, BsonRegularExpression regularExp @Override public void writeStartArray() { + context.startArray(); context.write('['); } @@ -455,7 +456,11 @@ private void pipeArray(String attribute, BsonReader reader) { writeStartArray(attribute); BsonType type = reader.readBsonType(); while (type != BsonType.END_OF_DOCUMENT) { - pipeValue(null, reader); + if (context.nextSequenceElement()) { + pipeValue(null, reader); + } else { + reader.skipValue(); + } type = reader.readBsonType(); nextValue(type); } @@ -466,6 +471,9 @@ private void pipeArray(String attribute, BsonReader reader) { private void pipeArray(String attribute, final BsonArray array) { writeStartArray(attribute); for (BsonValue cur : array) { + if (!context.nextSequenceElement()) { + break; + } pipeValue(null, cur); } writeEndArray(); diff --git a/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.1/src/test/groovy/BsonScrubber31Test.groovy b/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.1/src/test/groovy/BsonScrubber31Test.groovy index b96c19f7034..0ea543f9abd 100644 --- a/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.1/src/test/groovy/BsonScrubber31Test.groovy +++ b/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.1/src/test/groovy/BsonScrubber31Test.groovy @@ -27,4 +27,28 @@ class BsonScrubber31Test extends InstrumentationSpecification { "{\"update\" : \"orders\", \"ordered\" : false, \"writeConcern\" : { \"w\" : \"majority\" }, \"updates\": [{ \"q\" : { \"_id\" : 1 }, \"u\" : { \"orderId\" : \"Account1\", \"qty\" : 10 } } ]}" | "{\"update\": \"orders\", \"ordered\": false, \"writeConcern\": {\"w\": \"majority\"}, \"updates\": []}" "{\"insert\" : \"stuff\", \"ordered\" : true, \"writeConcern\" : { \"w\" : 10 }, \"documents\": [{ \"_id\" : { \"s\" : 0, \"i\": \"DEADBEEF\" }, \"array\" : [0, \"foo\", {\"foo\": 10}], \"qty\" : 10 } ]}" | "{\"insert\": \"stuff\", \"ordered\": true, \"writeConcern\": {\"w\": 10}, \"documents\": []}" } + + def "test BSON scrubber truncates long sequences"() { + setup: + // Create a document with an array containing 300 elements (more than the limit) + def elements = (1..300).collect { "\"item$it\"" }.join(", ") + def input = "{\"find\": \"collection\", \"filter\": {\"\$or\": [$elements]}}" + BsonDocument doc = BsonDocument.parse(input) + + when: + BsonScrubber31 scrubber = new BsonScrubber31() + scrubber.pipe(new BsonDocumentReader(doc)) + String resourceName = scrubber.getResourceName() + + then: + // The output should contain exactly 256 "?" elements (the first 256 get obfuscated) + // and should still be valid JSON with a closing bracket + resourceName.startsWith("{\"find\": \"collection\", \"filter\": {\"\$or\": [") + resourceName.endsWith("]}}") + // Count the number of obfuscated values - should be exactly 256 + resourceName.count("\"?\"") == 256 + + cleanup: + scrubber.close() + } } diff --git a/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.4/src/main/java/datadog/trace/instrumentation/mongo/BsonScrubber34.java b/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.4/src/main/java/datadog/trace/instrumentation/mongo/BsonScrubber34.java index 26afed96a47..fc1b9700b1b 100644 --- a/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.4/src/main/java/datadog/trace/instrumentation/mongo/BsonScrubber34.java +++ b/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.4/src/main/java/datadog/trace/instrumentation/mongo/BsonScrubber34.java @@ -279,6 +279,7 @@ public void writeRegularExpression(String name, BsonRegularExpression regularExp @Override public void writeStartArray() { + context.startArray(); context.write('['); } @@ -467,7 +468,11 @@ private void pipeArray(String attribute, BsonReader reader) { writeStartArray(attribute); BsonType type = reader.readBsonType(); while (type != BsonType.END_OF_DOCUMENT) { - pipeValue(null, reader); + if (context.nextSequenceElement()) { + pipeValue(null, reader); + } else { + reader.skipValue(); + } type = reader.readBsonType(); nextValue(type); } @@ -478,6 +483,9 @@ private void pipeArray(String attribute, BsonReader reader) { private void pipeArray(String attribute, final BsonArray array) { writeStartArray(attribute); for (BsonValue cur : array) { + if (!context.nextSequenceElement()) { + break; + } pipeValue(null, cur); } writeEndArray(); diff --git a/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.4/src/test/groovy/BsonScrubber34Test.groovy b/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.4/src/test/groovy/BsonScrubber34Test.groovy index c019c2e5c2d..18e10848993 100644 --- a/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.4/src/test/groovy/BsonScrubber34Test.groovy +++ b/dd-java-agent/instrumentation/mongo/mongo-driver/mongo-driver-3/mongo-driver-3.4/src/test/groovy/BsonScrubber34Test.groovy @@ -27,4 +27,28 @@ class BsonScrubber34Test extends InstrumentationSpecification { "{\"update\" : \"orders\", \"ordered\" : false, \"writeConcern\" : { \"w\" : \"majority\" }, \"updates\": [{ \"q\" : { \"_id\" : 1 }, \"u\" : { \"orderId\" : \"Account1\", \"qty\" : 10 } } ]}" | "{\"update\": \"orders\", \"ordered\": false, \"writeConcern\": {\"w\": \"majority\"}, \"updates\": []}" "{\"insert\" : \"stuff\", \"ordered\" : true, \"writeConcern\" : { \"w\" : 10 }, \"documents\": [{ \"_id\" : { \"s\" : 0, \"i\": \"DEADBEEF\" }, \"array\" : [0, \"foo\", {\"foo\": 10}], \"qty\" : 10 } ]}" | "{\"insert\": \"stuff\", \"ordered\": true, \"writeConcern\": {\"w\": 10}, \"documents\": []}" } + + def "test BSON scrubber truncates long sequences"() { + setup: + // Create a document with an array containing 300 elements (more than the limit) + def elements = (1..300).collect { "\"item$it\"" }.join(", ") + def input = "{\"find\": \"collection\", \"filter\": {\"\$or\": [$elements]}}" + BsonDocument doc = BsonDocument.parse(input) + + when: + BsonScrubber34 scrubber = new BsonScrubber34() + scrubber.pipe(new BsonDocumentReader(doc)) + String resourceName = scrubber.getResourceName() + + then: + // The output should contain exactly 256 "?" elements (the first 256 get obfuscated) + // and should still be valid JSON with a closing bracket + resourceName.startsWith("{\"find\": \"collection\", \"filter\": {\"\$or\": [") + resourceName.endsWith("]}}") + // Count the number of obfuscated values - should be exactly 256 + resourceName.count("\"?\"") == 256 + + cleanup: + scrubber.close() + } }