DataDog · vandonr · Dec 30, 2025
@@ -2,14 +2,20 @@
 
 final class Context {
 
+  private static final int MAX_DEPTH = 64;
+  private static final int MAX_SEQUENCE_LENGTH = 256;
+
   private final StringBuilder buffer = new StringBuilder();
 
   // specifies the depth below which everything must be discarded,
   // e.g. because we're inside an $in clause we want to collapse
-  private int discardDepth = 64;
-  private int keepDepth = 64;
+  private int discardDepth = MAX_DEPTH;
+  private int keepDepth = MAX_DEPTH;
   private int depth;
 
+  // tracks sequence element counts at each depth level to limit long arrays
+  private final int[] sequenceCounts = new int[MAX_DEPTH];
+
   public void discardSubTree() {
     if (depth < discardDepth) {
       this.discardDepth = depth;
@@ -29,11 +35,32 @@ public void startDocument() {
   public void endDocument() {
     --depth;
     if (discardDepth == depth) {
-      discardDepth = 64;
+      discardDepth = MAX_DEPTH;
     }
     if (keepDepth == depth) {
-      keepDepth = 64;
+      keepDepth = MAX_DEPTH;
+    }
+  }
+
+  public void startArray() {
+    if (depth < MAX_DEPTH) {
+      sequenceCounts[depth] = 0;
+    }
+    // note: nothing to do at the end of the array
+  }
+
+  /**
+   * Signals that a new element is being processed in the current array. Returns true if this
+   * element should be written, false if the sequence limit has been reached.
+   */
+  public boolean nextSequenceElement() {
+    // checking depth just to make sure we don't access the array out of bounds, but it should never
+    // be the case since we stop processing the document before reaching this code.
+    if (depth < MAX_DEPTH) {
+      sequenceCounts[depth]++;
+      return sequenceCounts[depth] <= MAX_SEQUENCE_LENGTH;
     }
+    return false; // theoretically unreachable
   }
 
   public boolean disableObfuscation() {
@@ -48,7 +75,7 @@ private boolean tooDeep() {
     // this means we are parsing a huge document, which we will truncate anyway
     // note that MongoDB sets a default max nested depth of 100, and MongoDB users
     // are generally advised to avoid deep nesting for the sake of database performance
-    return depth >= 64;
+    return depth >= MAX_DEPTH;
   }
 
   private boolean shouldWrite() {
@@ -82,6 +109,8 @@ public void write(String string) {
   public void clear() {
     buffer.setLength(0);
     depth = 0;
+    discardDepth = MAX_DEPTH;
+    keepDepth = MAX_DEPTH;
   }
 
   public String asString() {

@@ -270,6 +270,7 @@ public void writeRegularExpression(String name, BsonRegularExpression regularExp
 
   @Override
   public void writeStartArray() {
+    context.startArray();
     context.write('[');
   }
 
@@ -455,7 +456,11 @@ private void pipeArray(String attribute, BsonReader reader) {
     writeStartArray(attribute);
     BsonType type = reader.readBsonType();
     while (type != BsonType.END_OF_DOCUMENT) {
-      pipeValue(null, reader);
+      if (context.nextSequenceElement()) {
+        pipeValue(null, reader);
+      } else {
+        reader.skipValue();
+      }
       type = reader.readBsonType();
       nextValue(type);
     }
@@ -466,6 +471,9 @@ private void pipeArray(String attribute, BsonReader reader) {
   private void pipeArray(String attribute, final BsonArray array) {
     writeStartArray(attribute);
     for (BsonValue cur : array) {
+      if (!context.nextSequenceElement()) {
+        break;
+      }
       pipeValue(null, cur);
     }
     writeEndArray();

@@ -27,4 +27,28 @@ class BsonScrubber31Test extends InstrumentationSpecification {
     "{\"update\" : \"orders\", \"ordered\" : false, \"writeConcern\" : { \"w\" : \"majority\" }, \"updates\": [{ \"q\" : { \"_id\" : 1 }, \"u\" : { \"orderId\" : \"Account1\", \"qty\" : 10 } } ]}"                                                                                 | "{\"update\": \"orders\", \"ordered\": false, \"writeConcern\": {\"w\": \"majority\"}, \"updates\": []}"
     "{\"insert\" : \"stuff\", \"ordered\" : true, \"writeConcern\" : { \"w\" : 10 }, \"documents\": [{ \"_id\" : { \"s\" : 0, \"i\": \"DEADBEEF\" }, \"array\" : [0, \"foo\", {\"foo\": 10}], \"qty\" : 10 } ]}"                                                                      | "{\"insert\": \"stuff\", \"ordered\": true, \"writeConcern\": {\"w\": 10}, \"documents\": []}"
   }
+
+  def "test BSON scrubber truncates long sequences"() {
+    setup:
+    // Create a document with an array containing 300 elements (more than the limit)
+    def elements = (1..300).collect { "\"item$it\"" }.join(", ")
+    def input = "{\"find\": \"collection\", \"filter\": {\"\$or\": [$elements]}}"
+    BsonDocument doc = BsonDocument.parse(input)
+
+    when:
+    BsonScrubber31 scrubber = new BsonScrubber31()
+    scrubber.pipe(new BsonDocumentReader(doc))
+    String resourceName = scrubber.getResourceName()
+
+    then:
+    // The output should contain exactly 256 "?" elements (the first 256 get obfuscated)
+    // and should still be valid JSON with a closing bracket
+    resourceName.startsWith("{\"find\": \"collection\", \"filter\": {\"\$or\": [")
+    resourceName.endsWith("]}}")
+    // Count the number of obfuscated values - should be exactly 256
+    resourceName.count("\"?\"") == 256
+
+    cleanup:
+    scrubber.close()
+  }
 }
@@ -279,6 +279,7 @@ public void writeRegularExpression(String name, BsonRegularExpression regularExp
 
   @Override
   public void writeStartArray() {
+    context.startArray();
     context.write('[');
   }
 
@@ -467,7 +468,11 @@ private void pipeArray(String attribute, BsonReader reader) {
     writeStartArray(attribute);
     BsonType type = reader.readBsonType();
     while (type != BsonType.END_OF_DOCUMENT) {
-      pipeValue(null, reader);
+      if (context.nextSequenceElement()) {
+        pipeValue(null, reader);
+      } else {
+        reader.skipValue();
+      }
       type = reader.readBsonType();
       nextValue(type);
     }
@@ -478,6 +483,9 @@ private void pipeArray(String attribute, BsonReader reader) {
   private void pipeArray(String attribute, final BsonArray array) {
     writeStartArray(attribute);
     for (BsonValue cur : array) {
+      if (!context.nextSequenceElement()) {
+        break;
+      }
       pipeValue(null, cur);
     }
     writeEndArray();

@@ -27,4 +27,28 @@ class BsonScrubber34Test extends InstrumentationSpecification {
     "{\"update\" : \"orders\", \"ordered\" : false, \"writeConcern\" : { \"w\" : \"majority\" }, \"updates\": [{ \"q\" : { \"_id\" : 1 }, \"u\" : { \"orderId\" : \"Account1\", \"qty\" : 10 } } ]}"                                                                                 | "{\"update\": \"orders\", \"ordered\": false, \"writeConcern\": {\"w\": \"majority\"}, \"updates\": []}"
     "{\"insert\" : \"stuff\", \"ordered\" : true, \"writeConcern\" : { \"w\" : 10 }, \"documents\": [{ \"_id\" : { \"s\" : 0, \"i\": \"DEADBEEF\" }, \"array\" : [0, \"foo\", {\"foo\": 10}], \"qty\" : 10 } ]}"                                                                      | "{\"insert\": \"stuff\", \"ordered\": true, \"writeConcern\": {\"w\": 10}, \"documents\": []}"
   }
+
+  def "test BSON scrubber truncates long sequences"() {
+    setup:
+    // Create a document with an array containing 300 elements (more than the limit)
+    def elements = (1..300).collect { "\"item$it\"" }.join(", ")
+    def input = "{\"find\": \"collection\", \"filter\": {\"\$or\": [$elements]}}"
+    BsonDocument doc = BsonDocument.parse(input)
+
+    when:
+    BsonScrubber34 scrubber = new BsonScrubber34()
+    scrubber.pipe(new BsonDocumentReader(doc))
+    String resourceName = scrubber.getResourceName()
+
+    then:
+    // The output should contain exactly 256 "?" elements (the first 256 get obfuscated)
+    // and should still be valid JSON with a closing bracket
+    resourceName.startsWith("{\"find\": \"collection\", \"filter\": {\"\$or\": [")
+    resourceName.endsWith("]}}")
+    // Count the number of obfuscated values - should be exactly 256
+    resourceName.count("\"?\"") == 256
+
+    cleanup:
+    scrubber.close()
+  }
 }