From c3720acc475642677098ce6b4bf28be2ba568c80 Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Tue, 2 Jun 2026 08:44:47 +0900 Subject: [PATCH] Add depth and size limits to Console::Format::Safe Fold maximum-size enforcement into Safe rather than a separate wrapper: - Rename `limit` to `depth_limit` (with a deprecated `limit:` alias) to clarify its purpose alongside the new `size_limit`. - Add `size_limit` (default 16 KiB, `nil` disables). When the fast serialization exceeds it, the record is rebuilt field-by-field, keeping as many top-level fields as fit. - Unify the failure and size diagnostics into a single `truncated` object mapping each degraded field to why: `true` (dropped for size) or its error (value recovered, could not serialize directly). Falls back to `truncated: true` when detail does not fit. - Serialize hash-like records via `to_hash`. Removes the separate Console::Format::Truncated class. --- lib/console/format.rb | 1 + lib/console/format/safe.rb | 167 +++++++++++++++++++++++++---- releases.md | 6 ++ test/console/format/safe.rb | 208 +++++++++++++++++++++++++++++++++--- test/console/output.rb | 4 +- 5 files changed, 352 insertions(+), 34 deletions(-) diff --git a/lib/console/format.rb b/lib/console/format.rb index 9015eb7..7c9ed07 100644 --- a/lib/console/format.rb +++ b/lib/console/format.rb @@ -6,6 +6,7 @@ require_relative "format/safe" module Console + # @namespace module Format # A safe format for converting objects to strings. # diff --git a/lib/console/format/safe.rb b/lib/console/format/safe.rb index e9bf3ab..2d58465 100644 --- a/lib/console/format/safe.rb +++ b/lib/console/format/safe.rb @@ -10,31 +10,166 @@ module Console module Format # A safe format for converting objects to strings. # - # Handles issues like circular references and encoding errors. + # Handles issues like circular references, encoding errors, excessive nesting depth, and excessive output size. class Safe + # The JSON fragment used as the truncation marker when dropped fields cannot be named. + TRUNCATED = "\"truncated\":true" + # Create a new safe format. # # @parameter format [JSON] The format to use for serialization. - # @parameter limit [Integer] The maximum depth to recurse into objects. + # @parameter depth_limit [Integer] The maximum depth to recurse into objects (the JSON `max_nesting`). + # @parameter size_limit [Integer | Nil] The maximum byte size of the serialized output, or `nil` to disable size limiting. Limits below {TRUNCATED} (the minimal marker) cannot be honoured. # @parameter encoding [Encoding] The encoding to use for strings. - def initialize(format: ::JSON, limit: 12, encoding: ::Encoding::UTF_8) + # @parameter limit [Integer | Nil] Deprecated alias for `depth_limit`. + def initialize(format: ::JSON, depth_limit: 12, size_limit: 16 * 1024, encoding: ::Encoding::UTF_8, limit: nil) + if limit + warn "Console::Format::Safe `limit:` is deprecated, use `depth_limit:` instead.", uplevel: 1, category: :deprecated + depth_limit = limit + end + @format = format - @limit = limit + @depth_limit = depth_limit + @size_limit = size_limit @encoding = encoding end + # @attribute [Integer] The maximum depth to recurse into objects. + attr :depth_limit + + # @attribute [Integer | Nil] The maximum byte size of the serialized output. + attr :size_limit + # Dump the given object to a string. # + # The common case is a single fast serialization. If that fails (e.g. circular + # references, excessive nesting, or encoding errors) or its output exceeds + # {size_limit}, it falls back to {safe_dump}, which rebuilds the record + # field-by-field within the limit. + # # @parameter object [Object] The object to dump. # @returns [String] The dumped object. def dump(object) - @format.dump(object, @limit) - rescue SystemStackError, StandardError => error - @format.dump(safe_dump(object, error)) + buffer = @format.dump(object, @depth_limit) + + if @size_limit and buffer.bytesize > @size_limit + return safe_dump(object) + end + + return buffer + rescue SystemStackError, StandardError + return safe_dump(object) end private + # Produce a safe, size-limited serialization of the given object. This is the + # fallback path, used both when direct serialization fails (an exception) and + # when its output exceeds {size_limit}. + # + # Each top-level value is serialized independently and defensively, so a single + # un-serializable or oversized value cannot break or bloat the whole record. + # Whenever a field is degraded, the reason is recorded in a trailing `"truncated"` + # object that maps the field name to why it was truncated: + # + # - `"key": true` — the value was dropped because it did not fit the size limit. + # - `"key": {error}` — the value could not be serialized directly; a safe + # representation was kept in its place and the triggering error is recorded. + # + # Fields are kept while they fit, always reserving room for at least a minimal + # `"truncated":true` marker. The detailed reason map is then emitted only if it + # fits in the remaining space; otherwise it degrades to `"truncated":true`. This + # is best-effort — in the worst case the per-field detail is lost — but it keeps + # the bookkeeping simple and the size guarantee hard. + # + # @parameter object [Object] The object to serialize. + # @returns [String] The safe, size-limited serialized record. + def safe_dump(object) + # Serialize hash-like objects field-by-field; anything else falls through to the + # error handler below, which emits a minimal truncated marker. + object = object.to_hash + + # Serialize each field once, capturing the error for any value that could not be + # serialized directly. Our own "truncated" key is skipped so it is never duplicated. + errors = {} + fragments = [] + object.each do |key, value| + name = key.to_s + next if name == "truncated" + + fragment, error = dump_pair(key, value) + errors[name] = error_info(error) if error + fragments << [name, fragment] + end + + # Assemble the body, keeping each field while it fits — always reserving room for + # at least a minimal `"truncated":true` marker. Each truncated field's reason is + # collected: its error (value recovered) or `true` (dropped for size). + buffer = +"{" + first = true + reasons = {} + + fragments.each do |name, fragment| + if buffer.bytesize + (first ? 0 : 1) + fragment.bytesize + TRUNCATED.bytesize + 2 <= @size_limit + buffer << "," unless first + buffer << fragment + first = false + + # The value was kept; if it had to be recovered, note why. + reasons[name] = errors[name] if errors[name] + else + # The value did not fit and was dropped entirely. + reasons[name] = true + end + end + + unless reasons.empty? + # Include the detailed reasons if they fit, otherwise fall back to the minimal + # marker so the truncation is still signalled. + detailed = "\"truncated\":#{@format.dump(reasons)}" + fits = buffer.bytesize + (first ? 0 : 1) + detailed.bytesize + 1 <= @size_limit + + buffer << "," unless first + buffer << (fits ? detailed : TRUNCATED) + end + + buffer << "}" + + return buffer + rescue SystemStackError, StandardError + return "{#{TRUNCATED}}" + end + + # Serialize a single top-level `"key":value` pair, safely handling values that + # cannot be serialized directly. + # + # @parameter key [Object] The field key. + # @parameter value [Object] The field value. + # @returns [Array(String, Exception | Nil)] The `"key":value` fragment and the error, if recovery was needed. + def dump_pair(key, value) + value_json, error = dump_value(value) + + return ["#{dump_string(String(key))}:#{value_json}", error] + end + + # Serialize a single value, falling back to a safe representation on failure. + # + # @parameter value [Object] The value to serialize. + # @returns [Array(String, Exception | Nil)] The serialized value and the error, if recovery was needed. + def dump_value(value) + [@format.dump(value, @depth_limit), nil] + rescue SystemStackError, StandardError => error + [@format.dump(safe_dump_recurse(value)), error] + end + + # Serialize a string as a JSON string, encoding it safely first. + # + # @parameter value [String] The string to serialize. + # @returns [String] The serialized (quoted) string. + def dump_string(value) + @format.dump(value.encode(@encoding, invalid: :replace, undef: :replace)) + end + # Filter the backtrace to remove duplicate frames and reduce verbosity. # # @parameter error [Exception] The exception to filter. @@ -76,24 +211,16 @@ def filter_backtrace(error) return frames end - # Dump the given object to a string, replacing it with a safe representation if there is an error. - # - # This is a slow path so we try to avoid it. + # Build a safe, primitive representation of an error for inclusion as an `"error"` field. # - # @parameter object [Object] The object to dump. # @parameter error [Exception] The error that occurred while dumping the object. - # @returns [Hash] The dumped (truncated) object including error details. - def safe_dump(object, error) - object = safe_dump_recurse(object) - - object[:truncated] = true - object[:error] = { + # @returns [Hash] The error details (class, message, filtered backtrace). + def error_info(error) + { class: safe_dump_recurse(error.class.name), message: safe_dump_recurse(error.message), backtrace: safe_dump_recurse(filter_backtrace(error)), } - - return object end # Create a new hash with identity comparison. @@ -107,7 +234,7 @@ def default_objects # @parameter limit [Integer] The maximum depth to recurse into objects. # @parameter objects [Hash] The objects that have already been visited. # @returns [Object] The dumped object as a primitive representation. - def safe_dump_recurse(object, limit = @limit, objects = default_objects) + def safe_dump_recurse(object, limit = @depth_limit, objects = default_objects) case object when Hash if limit <= 0 || objects[object] diff --git a/releases.md b/releases.md index 463d502..437dfe4 100644 --- a/releases.md +++ b/releases.md @@ -1,5 +1,11 @@ # Releases +## Unreleased + + - Add a `size_limit` to `Console::Format::Safe` (default 16KiB) which rebuilds oversized records field-by-field, keeping as many top-level fields as fit within the limit. + - Degraded fields are recorded in a `truncated` object that maps each field name to why it was truncated: `true` (dropped for size) or the error (the value could not be serialized directly and a safe representation was kept in its place). + - Rename `Console::Format::Safe`'s `limit:` to `depth_limit:` (with a deprecated `limit:` alias) to clarify its purpose alongside the new `size_limit:`. + ## v1.35.0 - Fix handling of `Errno::ENODEV` errors when calculating the width of a terminal that was been re-opened to `File::NULL`. diff --git a/test/console/format/safe.rb b/test/console/format/safe.rb index 4e0d5cb..1f3f6e7 100644 --- a/test/console/format/safe.rb +++ b/test/console/format/safe.rb @@ -37,18 +37,20 @@ def to_json(options = nil) end message = JSON.parse( - format.dump(object) + format.dump({broken: object}) ) + # The error is attributed to the specific field whose value could not serialize. expect(message).to have_keys( - "truncated" => be == true, - "error" => have_keys( - "class" => be == "SystemStackError", - "message" => be =~ /stack level too deep/, + "truncated" => have_keys( + "broken" => have_keys( + "class" => be == "SystemStackError", + "message" => be =~ /stack level too deep/, + ) ) ) - backtrace = message["error"]["backtrace"] + backtrace = message["truncated"]["broken"]["backtrace"] expect(backtrace).to be_a(Array) expect(backtrace).to be == [ "A", @@ -70,19 +72,201 @@ def to_json(options = nil) end message = JSON.parse( - format.dump(object) + format.dump({broken: object}) ) expect(message).to have_keys( - "truncated" => be == true, - "error" => have_keys( - "class" => be == "StandardError", - "message" => be =~ /something went wrong/, + "truncated" => have_keys( + "broken" => have_keys( + "class" => be == "StandardError", + "message" => be =~ /something went wrong/, + ) ) ) - backtrace = message["error"]["backtrace"] + backtrace = message["truncated"]["broken"]["backtrace"] expect(backtrace).to be_a(Array) end end + + with "size limiting" do + let(:format) {subject.new(size_limit: 128)} + + it "passes through records within the size limit" do + line = format.dump({severity: "info", message: "hi"}) + parsed = JSON.parse(line) + expect(parsed["message"]).to be == "hi" + expect(parsed["truncated"]).to be_nil + end + + it "drops oversized fields and names them in the marker" do + line = format.dump({severity: "info", message: "x" * 1024, subject: "short"}) + parsed = JSON.parse(line) + expect(line.bytesize).to be <= 128 + expect(parsed["severity"]).to be == "info" + expect(parsed["subject"]).to be == "short" + expect(parsed["message"]).to be_nil + # The marker maps the dropped field to its reason (`true` = dropped for size). + expect(parsed["truncated"]).to be == {"message" => true} + end + + it "keeps as many leading fields as fit when there are many" do + record = {} + 40.times{|i| record[:"field_#{i}"] = "ab"} + line = format.dump(record) + parsed = JSON.parse(line) + expect(line.bytesize).to be <= 128 + expect(parsed["field_0"]).to be == "ab" + expect(parsed["field_39"]).to be_nil + # Truncation is always reported (named fields where they fit, otherwise `true`). + expect(parsed["truncated"]).not.to be_nil + end + + it "falls back to a boolean marker when dropped names cannot be listed" do + # A tiny limit leaves no room to name dropped fields, so rather than an empty + # or misleading list, the marker stays as `true`. + tiny = subject.new(size_limit: 20) + line = tiny.dump({aaa: "x" * 50, bbb: "y" * 50}) + expect(line.bytesize).to be <= 20 + expect(JSON.parse(line)["truncated"]).to be == true + end + + it "skips a huge leading value so later small fields survive" do + line = format.dump({message: "x" * 5000, severity: "info", tag: "a"}) + parsed = JSON.parse(line) + expect(line.bytesize).to be <= 128 + expect(parsed["severity"]).to be == "info" + expect(parsed["tag"]).to be == "a" + expect(parsed["message"]).to be_nil + expect(parsed["truncated"]).to be == {"message" => true} + end + + it "stays valid and within the limit when the error path also overflows" do + # A circular reference forces the error path while an oversized field forces size + # truncation. Under a hard limit, the error details and data compete for space + # (either may be dropped), but the result is always valid and within the limit, + # with a single truncation marker. + record = {severity: "info", payload: "x" * 2000} + record[:self] = record + line = format.dump(record) + expect(line.bytesize).to be <= 128 + expect(JSON.parse(line)["truncated"]).not.to be_nil + expect(line.scan(/"truncated"/).size).to be == 1 + end + + it "returns a minimal marker for oversized non-hash records" do + # Non-hash records cannot be serialized field-by-field, so they degrade to the + # minimal truncated marker. + line = format.dump(["x" * 200] * 5) + expect(JSON.parse(line)).to be == {"truncated" => true} + end + + it "passes through output that is exactly at the limit" do + record = {keep: "v", note: "exactly"} + exact = JSON.dump(record).bytesize + expect(subject.new(size_limit: exact).dump(record)).to be == JSON.dump(record) + end + + it "truncates when the output is one byte over the limit" do + record = {keep: "v", drop: "x" * 100} + over = JSON.dump(record).bytesize - 1 + line = subject.new(size_limit: over).dump(record) + parsed = JSON.parse(line) + expect(line.bytesize).to be <= over + expect(parsed["keep"]).to be == "v" + expect(parsed["truncated"]).to be == {"drop" => true} + end + + it "keeps every result within the limit across a range of sizes" do + record = {severity: "info", message: "x" * 200, subject: "short", extra: "y" * 80} + (18..400).each do |limit| + line = subject.new(size_limit: limit).dump(record) + expect(line.bytesize).to be <= limit + expect(JSON.parse(line)).to be_a(Hash) + end + end + + it "respects the byte limit with multi-byte characters" do + # Each "é" is two bytes, so the byte limit must not be confused with length. + record = {message: "é" * 200, tag: "x"} + line = subject.new(size_limit: 64).dump(record) + expect(line.bytesize).to be <= 64 + expect(JSON.parse(line)).to be_a(Hash) + end + + it "emits valid JSON even when the limit is below the minimal marker size" do + # {"truncated":true} (18 bytes) cannot be made smaller, so limits below it + # cannot be honoured — but the output is still valid JSON. + line = subject.new(size_limit: 5).dump({a: "x" * 100}) + expect(JSON.parse(line)).to be == {"truncated" => true} + end + end + + with "failed fields" do + it "attributes the error to the field whose value could not be serialized" do + recursive = {} + recursive[:loop] = recursive + line = format.dump({severity: "info", payload: recursive}) + parsed = JSON.parse(line) + # The offending field is named in `truncated` with its error as the reason. + expect(parsed["truncated"]["payload"]).to have_keys("class", "message", "backtrace") + # Its value is still present as a recovered, safe representation. + expect(parsed["payload"]).not.to be_nil + # Fields that serialized fine are untouched, and carry no reason. + expect(parsed["severity"]).to be == "info" + expect(parsed["truncated"]["severity"]).to be_nil + end + + it "recovers nested primitives and objects within a failed value" do + # The recovered value exercises the safe recursion over a number, a custom + # object (converted via to_s), the same object seen twice, and a cycle. + shared = Object.new + inner = {count: 1, first: shared, second: shared} + inner[:loop] = inner + + line = format.dump({payload: inner}) + parsed = JSON.parse(line) + expect(parsed["payload"]["count"]).to be == 1 + expect(parsed["payload"]["loop"]).to be == "{...}" + expect(parsed["truncated"]["payload"]).not.to be_nil + end + end + + with "deprecated limit:" do + it "sets depth_limit and emits a deprecation warning" do + # `warn` is routed through Console itself, so intercept it at the source. + expect(Warning).to receive(:warn) + + format = subject.new(limit: 5) + expect(format.depth_limit).to be == 5 + end + end + + with "hash-like records" do + # A record that is not a Hash but implements the implicit to_hash protocol. + let(:record) do + klass = Class.new do + def initialize(hash) = (@hash = hash) + def to_hash = @hash + # Force the fast path to fail so the safe path handles it. + def to_json(*) = raise("no json") + end + klass.new({severity: "info", message: "hello"}) + end + + it "serializes hash-like objects field-by-field via to_hash" do + parsed = JSON.parse(format.dump(record)) + expect(parsed["severity"]).to be == "info" + expect(parsed["message"]).to be == "hello" + end + end + + with "size_limit: nil" do + let(:format) {subject.new(size_limit: nil)} + + it "does not truncate regardless of size" do + line = format.dump({message: "x" * 5000}) + expect(JSON.parse(line)["message"]).to be == "x" * 5000 + end + end end diff --git a/test/console/output.rb b/test/console/output.rb index 19a2042..db17e91 100644 --- a/test/console/output.rb +++ b/test/console/output.rb @@ -86,7 +86,7 @@ output.call("Hello", arguments) message = JSON.parse(capture.string) - expect(message["truncated"]).to be == true + expect(message["truncated"]["message"]).not.to be_nil expect(message["message"]).to be == ["[...]"] end @@ -97,7 +97,7 @@ output.call("Hello", arguments) message = JSON.parse(capture.string) - expect(message["truncated"]).to be == true + expect(message["truncated"]["message"]).not.to be_nil expect(message["message"]).to be == {"arguments"=>"{...}"} end end