From c3581e83644b1cdae6c2b85955a6ef5f4aab17ec Mon Sep 17 00:00:00 2001 From: He-Pin Date: Thu, 28 May 2026 13:32:39 +0800 Subject: [PATCH 1/7] perf: speed up manifest JSON rendering Motivation: std.manifestJson* still contributed to the local Scala Native gap versus source-built jrsonnet, especially in real-world object-heavy rendering. Modification: Add an internal StringBuilder-backed FastMaterializeJsonRenderer for std.manifestJson, std.manifestJsonMinified, and std.manifestJsonEx while preserving the public MaterializeJsonRenderer StringWriter API. Reuse an in-place codepoint key sorter backed by java.util.Arrays.sort, and fix raw-surrogate prefix ordering in compareStringsByCodepoint. Result: Full validation passed: ./mill --no-server --ticker false --color false __.reformat and ./mill --no-server --ticker false --color false -j 1 __.test reported 451/451 tests passing. JMH regressions: manifestJsonEx 0.055 ms/op, realistic2 43.596 ms/op, gen_big_object 0.842 ms/op. Direct hyperfine against source-built jrsonnet: manifestJsonEx sjsonnet-native 5.090 ms vs jrsonnet 4.075 ms; kube-prometheus sjsonnet-native 143.738 ms vs jrsonnet 97.385 ms. --- sjsonnet/src/sjsonnet/Renderer.scala | 122 ++++++++++++++++++ sjsonnet/src/sjsonnet/Util.scala | 8 +- .../src/sjsonnet/stdlib/ManifestModule.scala | 6 +- .../src/sjsonnet/UnicodeHandlingTests.scala | 15 +++ 4 files changed, 144 insertions(+), 7 deletions(-) diff --git a/sjsonnet/src/sjsonnet/Renderer.scala b/sjsonnet/src/sjsonnet/Renderer.scala index 39a123ae..0731ecf8 100644 --- a/sjsonnet/src/sjsonnet/Renderer.scala +++ b/sjsonnet/src/sjsonnet/Renderer.scala @@ -4,6 +4,41 @@ import java.io.{StringWriter, Writer} import upickle.core.{ArrVisitor, ObjVisitor} +final class StringBuilderWriter(initialCapacity: Int = 16) extends Writer { + private[this] val builder = new java.lang.StringBuilder(initialCapacity) + + override def write(c: Int): Unit = + builder.append(c.toChar) + + override def write(cbuf: Array[Char], off: Int, len: Int): Unit = + builder.append(cbuf, off, len) + + override def write(str: String): Unit = + builder.append(str) + + override def write(str: String, off: Int, len: Int): Unit = + builder.append(str, off, off + len) + + override def append(c: Char): Writer = { + builder.append(c) + this + } + + override def append(csq: CharSequence): Writer = { + builder.append(if (csq == null) "null" else csq) + this + } + + override def append(csq: CharSequence, start: Int, end: Int): Writer = { + builder.append(if (csq == null) "null" else csq, start, end) + this + } + + override def flush(): Unit = () + override def close(): Unit = () + override def toString: String = builder.toString +} + /** * Custom JSON renderer to try and match the behavior of google/jsonnet's render: * @@ -279,6 +314,93 @@ final case class MaterializeJsonRenderer( } } +private[sjsonnet] final class FastMaterializeJsonRenderer( + indent: Int = 4, + escapeUnicode: Boolean = false, + newline: String = "\n", + keyValueSeparator: String = ": ", + private val outWriter: StringBuilderWriter = new StringBuilderWriter()) + extends BaseCharRenderer( + outWriter, + indent, + escapeUnicode, + newline.toCharArray + ) { + private val newLineCharArray = newline.toCharArray + private val keyValueSeparatorCharArray = keyValueSeparator.toCharArray + + private val reusableArrVisitor: ArrVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + } = new ArrVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer = FastMaterializeJsonRenderer.this + def visitValue(v: StringBuilderWriter, index: Int): Unit = { + flushBuffer() + commaBuffered = true + } + def visitEnd(index: Int): StringBuilderWriter = { + commaBuffered = false + depth -= 1 + renderIndent() + elemBuilder.append(']') + flushCharBuilder() + outWriter + } + } + + private val reusableObjVisitor: ObjVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer + } = new ObjVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer = FastMaterializeJsonRenderer.this + def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer = + FastMaterializeJsonRenderer.this + def visitKeyValue(s: Any): Unit = { + elemBuilder.appendAll(keyValueSeparatorCharArray, keyValueSeparatorCharArray.length) + } + def visitValue(v: StringBuilderWriter, index: Int): Unit = { + commaBuffered = true + } + def visitEnd(index: Int): StringBuilderWriter = { + commaBuffered = false + depth -= 1 + renderIndent() + elemBuilder.append('}') + flushCharBuilder() + outWriter + } + } + + override def visitArray( + length: Int, + index: Int): upickle.core.ArrVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + } = { + flushBuffer() + elemBuilder.append('[') + + depth += 1 + if (length == 0 && indent != -1) + elemBuilder.appendAll(newLineCharArray, newLineCharArray.length) + else renderIndent() + reusableArrVisitor + } + + override def visitObject( + length: Int, + index: Int): upickle.core.ObjVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer + } = { + flushBuffer() + elemBuilder.append('{') + depth += 1 + if (length == 0 && indent != -1) + elemBuilder.appendAll(newLineCharArray, newLineCharArray.length) + else renderIndent() + reusableObjVisitor + } +} + object RenderUtils { // Pre-cached string representations of small integers (0-255) diff --git a/sjsonnet/src/sjsonnet/Util.scala b/sjsonnet/src/sjsonnet/Util.scala index d716ca4d..f4d6744a 100644 --- a/sjsonnet/src/sjsonnet/Util.scala +++ b/sjsonnet/src/sjsonnet/Util.scala @@ -128,10 +128,10 @@ object Util { while (i1 < n1 && i2 < n2) { val c1 = s1.charAt(i1) val c2 = s2.charAt(i2) - // Fast path: equal chars can be skipped without surrogate checks. - // Even for surrogate pairs, equal high surrogates at position i lead to - // comparing low surrogates at i+1, producing the correct codepoint ordering. - if (c1 == c2) { + // Fast path: equal non-surrogates can be skipped without codepoint checks. + // Equal surrogates still need codepoint decoding because a raw surrogate and + // a valid surrogate pair can share the same leading UTF-16 code unit. + if (c1 == c2 && !Character.isSurrogate(c1)) { i1 += 1 i2 += 1 } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) { diff --git a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala index ee8a8f17..70af6e56 100644 --- a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala @@ -39,7 +39,7 @@ object ManifestModule extends AbstractFunctionModule { */ private object ManifestJson extends Val.Builtin1("manifestJson", "v") { def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val = - Val.Str(pos, Materializer.apply0(v.value, MaterializeJsonRenderer())(ev).toString) + Val.Str(pos, Materializer.apply0(v.value, new FastMaterializeJsonRenderer())(ev).toString) } /** @@ -57,7 +57,7 @@ object ManifestModule extends AbstractFunctionModule { Materializer .apply0( v.value, - MaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":") + new FastMaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":") )(ev) .toString ) @@ -94,7 +94,7 @@ object ManifestModule extends AbstractFunctionModule { Materializer .apply0( v.value, - MaterializeJsonRenderer( + new FastMaterializeJsonRenderer( indent = i.value.asString.length, newline = newline.value.asString, keyValueSeparator = keyValSep.value.asString diff --git a/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala b/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala index d15b7b54..9d636bb9 100644 --- a/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala +++ b/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala @@ -119,6 +119,21 @@ object UnicodeHandlingTests extends TestSuite { eval("std.sort(['\\uD800\\uDC00', '\\uFFFF'])") ==> ujson.Arr("\uFFFF", "\uD800\uDC00") } + test("rawSurrogatePrefixOrdering") { + val rawSurrogatePrefix = "\uD800\uFFFF" // codepoints [0xD800, 0xFFFF] + val validSurrogatePair = "\uD800\uDC00" // codepoint [0x10000] + + assert(sjsonnet.Util.compareStringsByCodepoint(rawSurrogatePrefix, validSurrogatePair) < 0) + assert(sjsonnet.Util.compareStringsByCodepoint(validSurrogatePair, rawSurrogatePrefix) > 0) + + eval("(std.char(55296) + std.char(65535)) < (std.char(55296) + std.char(56320))") ==> + ujson.Bool(true) + + eval( + "std.sort([std.char(55296) + std.char(56320), std.char(55296) + std.char(65535)])" + ) ==> ujson.Arr(rawSurrogatePrefix, validSurrogatePair) + } + // Unpaired surrogate handling - sjsonnet-specific behavior // // Note: This is an intentional divergence from go-jsonnet and C++ jsonnet: From 2a5cdb39dc80303c4411aa9d0f1319c8d2e1f723 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 30 May 2026 05:09:02 +0800 Subject: [PATCH 2/7] perf: skip escape scan for AsciiSafeStr in char renderer Motivation: The JVM/char render hot path (BaseCharRenderer.visitNonNullString) ran a CharSWAR.hasEscapeChar scan on every string, even for Val.AsciiSafeStr which is statically known to need no JSON escaping (chars 0x20-0x7e, no quote/backslash). The Native ByteRenderer already had this bypass; the char path did not. Modification: - Add BaseCharRenderer.visitAsciiSafeString: quote + bulk getChars + quote, correct even under escapeUnicode since all chars are <= 0x7e. - Route Val.AsciiSafeStr through it via a Materializer.visitStr helper at the three value-string sites; ujson.Value AST path falls back to visitString. - Add AsciiSafeRenderBenchmark to isolate the render path for A/B. Result: JMH render-only, 335KB string-heavy output: 1.606 -> 1.441 ms/op (-10.3%, non-overlapping error bands). 450/450 tests pass. --- .../bench/AsciiSafeRenderBenchmark.scala | 55 +++++++++++++++++++ sjsonnet/src/sjsonnet/BaseCharRenderer.scala | 21 +++++++ sjsonnet/src/sjsonnet/Materializer.scala | 24 ++++++-- 3 files changed, 95 insertions(+), 5 deletions(-) create mode 100644 bench/src/sjsonnet/bench/AsciiSafeRenderBenchmark.scala diff --git a/bench/src/sjsonnet/bench/AsciiSafeRenderBenchmark.scala b/bench/src/sjsonnet/bench/AsciiSafeRenderBenchmark.scala new file mode 100644 index 00000000..57e9c973 --- /dev/null +++ b/bench/src/sjsonnet/bench/AsciiSafeRenderBenchmark.scala @@ -0,0 +1,55 @@ +package sjsonnet.bench + +import org.openjdk.jmh.annotations.* +import org.openjdk.jmh.infra.* +import sjsonnet.* + +import java.io.{StringWriter, Writer} +import java.util.concurrent.TimeUnit + +/** + * Isolates the rendering path on a string-heavy value (many [[Val.AsciiSafeStr]] leaves). Used to + * A/B the AsciiSafeStr renderer fast path: evaluate once in @Setup, then benchmark only render(). + */ +@BenchmarkMode(Array(Mode.AverageTime)) +@Fork(1) +@Threads(1) +@Warmup(iterations = 5) +@Measurement(iterations = 5) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +class AsciiSafeRenderBenchmark { + + private val source: String = + """{ + | obj: { ['key_field_%d' % i]: 'value_string_%d_abcdefghijklmnop' % i + | for i in std.range(0, 3000) }, + | joined: std.join(',', ['element_token_%d' % i for i in std.range(0, 8000)]), + |}""".stripMargin + + private var interp: Interpreter = _ + private var value: Val = _ + + @Setup + def setup(): Unit = { + val wd = MainBenchmark.testSuiteRoot + this.interp = new Interpreter( + Map.empty[String, String], + Map.empty[String, String], + OsPath(wd), + importer = new SjsonnetMainBase.SimpleImporter(IndexedSeq.empty, None), + parseCache = new DefaultParseCache + ) + val path = OsPath(wd / "ascii_safe_render_bench.jsonnet") + value = interp.evaluate(source, path).toOption.get + System.err.println("JSON length: " + render().length) + } + + @Benchmark def renderB(bh: Blackhole): Unit = bh.consume(render()) + + private def render(): String = { + val writer = new StringWriter + interp.materialize(value, new Renderer(writer, indent = 3)) + writer.toString + } +} diff --git a/sjsonnet/src/sjsonnet/BaseCharRenderer.scala b/sjsonnet/src/sjsonnet/BaseCharRenderer.scala index c858dde5..4fe51ed8 100644 --- a/sjsonnet/src/sjsonnet/BaseCharRenderer.scala +++ b/sjsonnet/src/sjsonnet/BaseCharRenderer.scala @@ -285,6 +285,27 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]( out } + /** + * Fast path for [[Val.AsciiSafeStr]]: the string is statically known to contain only chars in + * 0x20-0x7E, excluding `"` and `\`. That means no JSON escaping is ever required — not even under + * `escapeUnicode`, since every char is <= 0x7E. Emit `"` + raw chars + `"` with a single bulk + * `getChars`, skipping the per-call `CharSWAR.hasEscapeChar` scan that [[visitNonNullString]] + * would otherwise perform. Mirrors the no-escape ASCII fast path, minus the scan. + */ + def visitAsciiSafeString(s: String, index: Int): T = { + flushBuffer() + val len = s.length + elemBuilder.ensureLength(len + 2) + elemBuilder.appendUnsafe('"') + val cbArr = elemBuilder.arr + val pos = elemBuilder.getLength + s.getChars(0, len, cbArr, pos) + elemBuilder.length = pos + len + elemBuilder.appendUnsafe('"') + flushCharBuilder() + out + } + final def renderIndent(): Unit = { if (indent == -1) () else if (indentCache != null && depth < BaseCharRenderer.MaxCachedDepth) { diff --git a/sjsonnet/src/sjsonnet/Materializer.scala b/sjsonnet/src/sjsonnet/Materializer.scala index f0767b8e..dda1c037 100644 --- a/sjsonnet/src/sjsonnet/Materializer.scala +++ b/sjsonnet/src/sjsonnet/Materializer.scala @@ -43,10 +43,24 @@ abstract class Materializer { * JIT-friendly) and automatically switches to an explicit stack-based iterative loop when the * recursion depth exceeds [[Settings.materializeRecursiveDepthLimit]]. */ + /** + * Visit a string value, routing [[Val.AsciiSafeStr]] through the renderer's escape-free fast path + * when the visitor is a char renderer. Falls back to plain `visitString` for the ujson.Value AST + * path and for strings that may require escaping. + */ + @inline private def visitStr[T](s: Val.Str, visitor: Visitor[T, T]): T = { + storePos(s.pos) + visitor match { + case cr: BaseCharRenderer[T @unchecked] if s.isInstanceOf[Val.AsciiSafeStr] => + cr.visitAsciiSafeString(s.str, -1) + case _ => visitor.visitString(s.str, -1) + } + } + def apply0[T](v: Val, visitor: Visitor[T, T])(implicit evaluator: EvalScope): T = try { v match { - case Val.Str(pos, s) => storePos(pos); visitor.visitString(s, -1) - case obj: Val.Obj => + case s: Val.Str => visitStr(s, visitor) + case obj: Val.Obj => materializeRecursiveObj(obj, visitor, 0, Materializer.MaterializeContext(evaluator)) case Val.Num(pos, _) => storePos(pos); visitor.visitFloat64(v.asDouble, -1) case xs: Val.Arr => @@ -285,7 +299,7 @@ abstract class Materializer { (vt: @scala.annotation.switch) match { case 0 => // TAG_STR val s = childVal.asInstanceOf[Val.Str] - storePos(s.pos); childVisitor.visitString(s.str, -1) + visitStr(s, childVisitor) case 1 => // TAG_NUM storePos(childVal.pos); childVisitor.visitFloat64(childVal.asDouble, -1) case 2 => // TAG_TRUE @@ -436,8 +450,8 @@ abstract class Materializer { stack: java.util.ArrayDeque[Materializer.MaterializeFrame], ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = { childVal match { - case Val.Str(pos, s) => - storePos(pos); parentVisitor.visitValue(childVisitor.visitString(s, -1), -1) + case s: Val.Str => + parentVisitor.visitValue(visitStr(s, childVisitor), -1) case obj: Val.Obj => pushObjFrame(obj, childVisitor, stack, ctx) case Val.Num(pos, _) => From 628087b0c1df8649b6f73ed1d7fd5ffeeb08b845 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 30 May 2026 06:47:14 +0800 Subject: [PATCH 3/7] perf: use unsynchronized StringBuilderWriter in TomlRenderer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Motivation: std.manifestTomlEx routed through java.io.StringWriter, whose backing StringBuffer pays a monitor enter/exit on every write/flush on the hot TOML manifestation path. The JSON renderer already switched to the unsynchronized StringBuilderWriter in #874 (-9.3% on kube-prometheus native); TOML did not. Modification: - Switch TomlRenderer and the manifestTomlEx render path in ManifestModule from java.io.StringWriter to the package-private StringBuilderWriter. Output is byte-identical. std.deepJoin keeps StringWriter (separate concern). - Add TomlRenderBenchmark to A/B the render path. Result: Native hyperfine, TOML-heavy workload (1.79MB output): after ran 1.11 ± 0.07x faster than before (~10%), output byte-identical. JMH (whole-pipeline) showed AFTER < BEFORE in two independent rounds. 450/450 tests pass. --- .../sjsonnet/bench/TomlRenderBenchmark.scala | 56 +++++++++++++++++++ sjsonnet/src/sjsonnet/TomlRenderer.scala | 54 +++++++++--------- .../src/sjsonnet/stdlib/ManifestModule.scala | 12 ++-- 3 files changed, 92 insertions(+), 30 deletions(-) create mode 100644 bench/src/sjsonnet/bench/TomlRenderBenchmark.scala diff --git a/bench/src/sjsonnet/bench/TomlRenderBenchmark.scala b/bench/src/sjsonnet/bench/TomlRenderBenchmark.scala new file mode 100644 index 00000000..4fa473f5 --- /dev/null +++ b/bench/src/sjsonnet/bench/TomlRenderBenchmark.scala @@ -0,0 +1,56 @@ +package sjsonnet.bench + +import org.openjdk.jmh.annotations.* +import org.openjdk.jmh.infra.* +import sjsonnet.* + +import java.util.concurrent.TimeUnit + +/** + * Isolates std.manifestTomlEx rendering on a TOML-heavy object. Fresh interpreter each op (no + * caching) so the TOML render runs every iteration. Used to A/B the TomlRenderer StringWriter -> + * StringBuilderWriter swap. + */ +@BenchmarkMode(Array(Mode.AverageTime)) +@Fork(2) +@Threads(1) +@Warmup(iterations = 5) +@Measurement(iterations = 10) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +class TomlRenderBenchmark { + + private val source: String = + """std.manifestTomlEx( + | { ['section_%d' % i]: { + | name: 'value_string_%d_abcdef' % i, count: i, enabled: true, + | tags: ['alpha', 'beta', 'gamma'], nested: { a: 1, b: 'two', c: false }, + | } for i in std.range(0, 3000) }, + | ' ')""".stripMargin + + private var wd: os.Path = _ + + @Setup + def setup(): Unit = { + wd = MainBenchmark.testSuiteRoot + System.err.println("TOML length: " + run().length) + } + + @Benchmark def manifestToml(bh: Blackhole): Unit = bh.consume(run()) + + private def run(): String = { + val interp = new Interpreter( + Map.empty[String, String], + Map.empty[String, String], + OsPath(wd), + importer = new SjsonnetMainBase.SimpleImporter(IndexedSeq.empty, None), + parseCache = new DefaultParseCache + ) + interp + .evaluate(source, OsPath(wd / "toml_render_bench.jsonnet")) + .toOption + .get + .asInstanceOf[Val.Str] + .str + } +} diff --git a/sjsonnet/src/sjsonnet/TomlRenderer.scala b/sjsonnet/src/sjsonnet/TomlRenderer.scala index 7bbc59cb..f5b0e93b 100644 --- a/sjsonnet/src/sjsonnet/TomlRenderer.scala +++ b/sjsonnet/src/sjsonnet/TomlRenderer.scala @@ -2,22 +2,24 @@ package sjsonnet import upickle.core.{ArrVisitor, ObjVisitor, SimpleVisitor, Visitor} -import java.io.StringWriter - +// Uses the unsynchronized [[StringBuilderWriter]] rather than java.io.StringWriter: the latter is +// backed by a synchronized StringBuffer, paying a monitor enter/exit on every write/flush on the +// hot manifestTomlEx path. Output is byte-identical. Same swap as the JSON renderer in #874. class TomlRenderer( - out: StringWriter = new java.io.StringWriter(), + out: StringBuilderWriter = new StringBuilderWriter(), cumulatedIndent: String, indent: String) - extends SimpleVisitor[StringWriter, StringWriter] { + extends SimpleVisitor[StringBuilderWriter, StringBuilderWriter] { override def expectedMsg: String = "unimplemented type in Materializer" - private object objectKeyRenderer extends upickle.core.SimpleVisitor[StringWriter, StringWriter] { + private object objectKeyRenderer + extends upickle.core.SimpleVisitor[StringBuilderWriter, StringBuilderWriter] { override def expectedMsg = "expected string" - override def visitNull(index: Int): StringWriter = { + override def visitNull(index: Int): StringBuilderWriter = { TomlRenderer.this.visitNull(index) } - override def visitString(s: CharSequence, index: Int): StringWriter = { + override def visitString(s: CharSequence, index: Int): StringBuilderWriter = { if (s == null) visitNull(index) else { TomlRenderer.writeEscapedKey(out, s) @@ -33,19 +35,19 @@ class TomlRenderer( out } - override def visitNull(index: Int): StringWriter = Error.fail("Tried to manifest \"null\"") + override def visitNull(index: Int): StringBuilderWriter = Error.fail("Tried to manifest \"null\"") - override def visitTrue(index: Int): StringWriter = { + override def visitTrue(index: Int): StringBuilderWriter = { out.write("true") flush } - override def visitFalse(index: Int): StringWriter = { + override def visitFalse(index: Int): StringBuilderWriter = { out.write("false") flush } - override def visitString(s: CharSequence, index: Int): StringWriter = { + override def visitString(s: CharSequence, index: Int): StringBuilderWriter = { if (s == null) { visitNull(index) } else { @@ -54,7 +56,7 @@ class TomlRenderer( } } - override def visitFloat64(d: Double, index: Int): StringWriter = { + override def visitFloat64(d: Double, index: Int): StringBuilderWriter = { d match { case Double.PositiveInfinity => out.write("inf") case Double.NegativeInfinity => out.write("-inf") @@ -65,8 +67,10 @@ class TomlRenderer( flush } - override def visitArray(length: Int, index: Int): ArrVisitor[StringWriter, StringWriter] = - new ArrVisitor[StringWriter, StringWriter] { + override def visitArray( + length: Int, + index: Int): ArrVisitor[StringBuilderWriter, StringBuilderWriter] = + new ArrVisitor[StringBuilderWriter, StringBuilderWriter] { private val isInLine = length == 0 || depth > 0 private val newElementIndent = if (isInLine) "" else cumulatedIndent + indent private val separator = @@ -76,7 +80,7 @@ class TomlRenderer( depth += 1 out.write('[') out.write(separator) - def subVisitor: Visitor[StringWriter, StringWriter] = { + def subVisitor: Visitor[StringBuilderWriter, StringBuilderWriter] = { if (addComma) { out.write(',') out.write(separator) @@ -84,10 +88,10 @@ class TomlRenderer( out.write(newElementIndent) TomlRenderer.this } - def visitValue(v: StringWriter, index: Int): Unit = { + def visitValue(v: StringBuilderWriter, index: Int): Unit = { addComma = true } - def visitEnd(index: Int): StringWriter = { + def visitEnd(index: Int): StringBuilderWriter = { addComma = false depth -= 1 out.write(separator) @@ -100,23 +104,23 @@ class TomlRenderer( override def visitObject( length: Int, jsonableKeys: Boolean, - index: Int): ObjVisitor[StringWriter, StringWriter] = - new ObjVisitor[StringWriter, StringWriter] { + index: Int): ObjVisitor[StringBuilderWriter, StringBuilderWriter] = + new ObjVisitor[StringBuilderWriter, StringBuilderWriter] { private var addComma = false depth += 1 out.write("{ ") - def subVisitor: Visitor[StringWriter, StringWriter] = TomlRenderer.this - def visitKey(index: Int): Visitor[StringWriter, StringWriter] = { + def subVisitor: Visitor[StringBuilderWriter, StringBuilderWriter] = TomlRenderer.this + def visitKey(index: Int): Visitor[StringBuilderWriter, StringBuilderWriter] = { if (addComma) out.write(", ") objectKeyRenderer } def visitKeyValue(s: Any): Unit = { out.write(" = ") } - def visitValue(v: StringWriter, index: Int): Unit = { + def visitValue(v: StringBuilderWriter, index: Int): Unit = { addComma = true } - def visitEnd(index: Int): StringWriter = { + def visitEnd(index: Int): StringBuilderWriter = { addComma = false depth -= 1 out.write(" }") @@ -146,14 +150,14 @@ object TomlRenderer { } } - def writeEscapedKey(out: StringWriter, key: CharSequence): Unit = { + def writeEscapedKey(out: StringBuilderWriter, key: CharSequence): Unit = { if (isBareKey(key)) out.write(key.toString) else BaseRenderer.escape(out, key, unicode = true) } def escapeKey(key: String): String = if (isBareKey(key)) key else { - val out = new StringWriter() + val out = new StringBuilderWriter() writeEscapedKey(out, key) out.toString } diff --git a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala index 70af6e56..c78d5a9c 100644 --- a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala @@ -184,11 +184,11 @@ object ManifestModule extends AbstractFunctionModule { } private def renderTableInternal( - out: StringWriter, + out: StringBuilderWriter, v: Val.Obj, cumulatedIndent: String, indent: String, - path: mutable.ArrayBuffer[String])(implicit ev: EvalScope): StringWriter = { + path: mutable.ArrayBuffer[String])(implicit ev: EvalScope): StringBuilderWriter = { val keys = v.sortedVisibleKeyNames if (keys.length == 0) { out.write('\n') @@ -263,7 +263,7 @@ object ManifestModule extends AbstractFunctionModule { out } - private def renderTableHeader(out: StringWriter, path: mutable.ArrayBuffer[String]) = { + private def renderTableHeader(out: StringBuilderWriter, path: mutable.ArrayBuffer[String]) = { out.write('[') var i = 0 while (i < path.length) { @@ -275,7 +275,9 @@ object ManifestModule extends AbstractFunctionModule { out } - private def renderTableArrayHeader(out: StringWriter, path: mutable.ArrayBuffer[String]) = { + private def renderTableArrayHeader( + out: StringBuilderWriter, + path: mutable.ArrayBuffer[String]) = { out.write('[') renderTableHeader(out, path) out.write(']') @@ -283,7 +285,7 @@ object ManifestModule extends AbstractFunctionModule { } def evalRhs(v: Eval, indent: Eval, ev: EvalScope, pos: Position): Val = { - val out = new StringWriter + val out = new StringBuilderWriter renderTableInternal( out, v.value.asObj, From 03ddf3b8b535096e8bc89cb2d44d1b2de783870f Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 30 May 2026 15:42:04 +0800 Subject: [PATCH 4/7] perf: capture parse Position without boxing the offset Int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Motivation: Parser.Pos is invoked for nearly every AST node. It was `Index.map(off => new Position(...))`: fastparse's `Index` stores the offset as an Int in its `successValue: Any` field (boxing it), and the `.map` then unboxes it and allocates a closure — per node. boxToInteger via SharedPackageDefs.Index was a top self-frame in the parse flamegraph on kube-prometheus. Modification: - Rewrite Pos to write the Position object straight into successValue via ctx.freshSuccess(new Position(fileScope, ctx.index)), skipping the Int box/unbox and the map closure. Parse output (positions/errors) is unchanged. Result: JMH ParserBenchmark (parse-only, all test-suite files): 1.669 -> 1.579 ms/op (+5.4%, non-overlapping bands). Native parse_time on kube-prometheus: ~105.6 -> ~100.9 ms (+4.5%, consistent). Output byte-identical. 450/450 tests pass. --- sjsonnet/src/sjsonnet/Parser.scala | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sjsonnet/src/sjsonnet/Parser.scala b/sjsonnet/src/sjsonnet/Parser.scala index 30a2f0e6..f667689b 100644 --- a/sjsonnet/src/sjsonnet/Parser.scala +++ b/sjsonnet/src/sjsonnet/Parser.scala @@ -96,7 +96,16 @@ class Parser( throw new ParseError(msg, offset = offset) } - def Pos[$: P]: P[Position] = Index.map(offset => new Position(fileScope, offset)) + // Capture the current parse offset as a Position directly, rather than `Index.map(...)`. + // `Index` stores the offset as an `Int` in fastparse's `successValue: Any`, boxing it, and the + // `.map` then unboxes it and allocates a closure — both per AST node (Pos is called for nearly + // every node). Writing the Position straight into successValue (a reference) skips the box/unbox + // and the lambda. boxToInteger via SharedPackageDefs.Index was a top self-frame in the parse + // flamegraph on kube-prometheus. + def Pos[$: P]: P[Position] = { + val ctx = implicitly[P[$]] + ctx.freshSuccess(new Position(fileScope, ctx.index)) + } def id[$: P]: P[String] = P( CharIn("_a-zA-Z") ~~ From 0b4a1e3421c77666a9f61db96b61d3eab8b3eaf2 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 30 May 2026 17:01:47 +0800 Subject: [PATCH 5/7] perf: defer Position alloc in exprSuffix2 to the matching branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Motivation: exprSuffix2 was `Pos.flatMapX { i => CharIn(".[({")... }`, which allocated a Position on EVERY attempt — including the failing attempt that terminates `exprSuffix2.rep` after each expression. Most subexpressions have no suffix, so that trailing failed attempt (one per expression) allocated a Position that was immediately discarded. Modification: - Match the suffix char first; allocate `new Position(fileScope, ctx.index - 1)` only inside the matching branch. No suffix -> CharIn fails fast, no Position. Also drops the `.map(_(0))` Char step. Parse output (positions/errors) is unchanged. Result: JMH ParserBenchmark (-f0, same-session): 1.560 -> 1.530 ms/op (+1.9%). Native parse_time on kube-prometheus: non-regressing, min/p25 ~2% lower (noise-limited on a loaded machine). Output byte-identical. 517/517 tests pass. --- sjsonnet/src/sjsonnet/Parser.scala | 55 ++++++++++++++++-------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/sjsonnet/src/sjsonnet/Parser.scala b/sjsonnet/src/sjsonnet/Parser.scala index f667689b..bd495114 100644 --- a/sjsonnet/src/sjsonnet/Parser.scala +++ b/sjsonnet/src/sjsonnet/Parser.scala @@ -663,32 +663,37 @@ class Parser( def exprSuffix2[$: P]: P[Expr => Expr] = exprSuffix2(0) def exprSuffix2[$: P](currentDepth: Int): P[Expr => Expr] = { + // Match the suffix char FIRST, then allocate the Position only when a suffix actually matches. + // The previous `Pos.flatMapX { i => CharIn(...) }` allocated a Position on every attempt — + // including the failing attempt that terminates `exprSuffix2.rep` after each expression, which + // is the common case (most subexpressions have no suffix). Capturing the offset as + // `ctx.index - 1` after the single-char match defers the allocation to the matching branch and + // also drops the `.map(_(0))` step. P( - Pos.flatMapX { i => - CharIn(".[({")./.!.map(_(0)).flatMapX { c => - (c: @switch) match { - case '.' => Pass ~ id.map(x => Expr.Select(i, _: Expr, x)) - case '[' => - Pass ~ (expr(currentDepth + 1).? ~ (":" ~ expr(currentDepth + 1).?).rep ~ "]").map { - case (Some(tree), Seq()) => Expr.Lookup(i, _: Expr, tree) - case (start, ins) => - Expr.Slice(i, _: Expr, start, ins.headOption.flatten, ins.lift(1).flatten) - } - case '(' => - Pass ~ (args(currentDepth + 1) ~ ")" ~ "tailstrict".!.?).map { - case (args, namedNames, tailstrict) => - Expr.Apply( - i, - _: Expr, - args, - if (namedNames.length == 0) null else namedNames, - tailstrict.nonEmpty - ) - } - case '{' => - Pass ~ (objinside(i, currentDepth + 1) ~ "}").map(x => Expr.ObjExtend(i, _: Expr, x)) - case _ => Fail - } + CharIn(".[({")./.!.flatMapX { s => + val i = new Position(fileScope, implicitly[P[$]].index - 1) + (s.charAt(0): @switch) match { + case '.' => Pass ~ id.map(x => Expr.Select(i, _: Expr, x)) + case '[' => + Pass ~ (expr(currentDepth + 1).? ~ (":" ~ expr(currentDepth + 1).?).rep ~ "]").map { + case (Some(tree), Seq()) => Expr.Lookup(i, _: Expr, tree) + case (start, ins) => + Expr.Slice(i, _: Expr, start, ins.headOption.flatten, ins.lift(1).flatten) + } + case '(' => + Pass ~ (args(currentDepth + 1) ~ ")" ~ "tailstrict".!.?).map { + case (args, namedNames, tailstrict) => + Expr.Apply( + i, + _: Expr, + args, + if (namedNames.length == 0) null else namedNames, + tailstrict.nonEmpty + ) + } + case '{' => + Pass ~ (objinside(i, currentDepth + 1) ~ "}").map(x => Expr.ObjExtend(i, _: Expr, x)) + case _ => Fail } } ) From bbd51e50c23afb4b3c49aee79c03c44f48688fc4 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 30 May 2026 18:10:19 +0800 Subject: [PATCH 6/7] perf: flush FastMaterializeJsonRenderer only at root depth Motivation: std.manifestJson* render fully in memory via FastMaterializeJsonRenderer. The inherited flushCharBuilder spilled the CharBuilder to the output writer at every sub-tree boundary, adding buffer-to-buffer copies that are pure overhead when the whole document is built in memory and emitted once. Modification: - Override flushCharBuilder to write out only when depth == 0 (root finished); accumulate everything in elemBuilder until then. - Size StringBuilderWriter's initial buffer at 4096 (was 16) to cut early reallocations, and mark it private[sjsonnet]. Result: Fewer intermediate copies on the manifestJson* path; output byte-identical. --- sjsonnet/src/sjsonnet/Renderer.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sjsonnet/src/sjsonnet/Renderer.scala b/sjsonnet/src/sjsonnet/Renderer.scala index 0731ecf8..3a709dbd 100644 --- a/sjsonnet/src/sjsonnet/Renderer.scala +++ b/sjsonnet/src/sjsonnet/Renderer.scala @@ -4,7 +4,7 @@ import java.io.{StringWriter, Writer} import upickle.core.{ArrVisitor, ObjVisitor} -final class StringBuilderWriter(initialCapacity: Int = 16) extends Writer { +private[sjsonnet] final class StringBuilderWriter(initialCapacity: Int = 4096) extends Writer { private[this] val builder = new java.lang.StringBuilder(initialCapacity) override def write(c: Int): Unit = @@ -329,6 +329,12 @@ private[sjsonnet] final class FastMaterializeJsonRenderer( private val newLineCharArray = newline.toCharArray private val keyValueSeparatorCharArray = keyValueSeparator.toCharArray + // For in-memory rendering, mid-tree flushes only add buffer-to-buffer copies. Accumulate + // everything in elemBuilder and emit once when the root value finishes (depth == 0). + override def flushCharBuilder(): Unit = { + if (depth == 0) elemBuilder.writeOutToIfLongerThan(outWriter, 0) + } + private val reusableArrVisitor: ArrVisitor[StringBuilderWriter, StringBuilderWriter] { def subVisitor: sjsonnet.FastMaterializeJsonRenderer } = new ArrVisitor[StringBuilderWriter, StringBuilderWriter] { From 787c3e54d455b3ad62b035556366c386f50444ea Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 30 May 2026 18:10:21 +0800 Subject: [PATCH 7/7] test: add directional cases for objectRemoveKey super-merge and stripChars ascii mask Adds regression coverage: - object_remove_key_directional: objectRemoveKey interaction with super / addSuper (`a+:`) merge and inline addSuper asserts. - strip_chars_ascii_mask_directional: stripChars over the ASCII range. --- .../object_remove_key_directional.jsonnet | 15 +++++++++++++++ .../strip_chars_ascii_mask_directional.jsonnet | 5 +++++ ...ip_chars_ascii_mask_directional.jsonnet.golden | 1 + 3 files changed, 21 insertions(+) create mode 100644 sjsonnet/test/resources/new_test_suite/strip_chars_ascii_mask_directional.jsonnet create mode 100644 sjsonnet/test/resources/new_test_suite/strip_chars_ascii_mask_directional.jsonnet.golden diff --git a/sjsonnet/test/resources/new_test_suite/object_remove_key_directional.jsonnet b/sjsonnet/test/resources/new_test_suite/object_remove_key_directional.jsonnet index 4c88ddac..496551c4 100644 --- a/sjsonnet/test/resources/new_test_suite/object_remove_key_directional.jsonnet +++ b/sjsonnet/test/resources/new_test_suite/object_remove_key_directional.jsonnet @@ -32,4 +32,19 @@ std.assertEqual( std.assertEqual( ({ a:: 1 } + std.objectRemoveKey({ a: 2 }, 'a')).a, 1 +) && +std.assertEqual( + local removed = std.objectRemoveKey({ a: super.a + 1, b:: super.a + 2 }, 'a'); + local merged = { a: 10 } + removed + { a+: 5 }; + [merged.a, merged.b, std.objectHas(merged, 'b'), std.objectHasAll(merged, 'b')], + [15, 12, false, true] +) && +std.assertEqual( + local mixin = { + assert self.a == 2 : 'inline addSuper assert should see final self', + a+: 1, + b: super.a + self.a, + }; + ({ a: 1 } + mixin).b, + 3 ) diff --git a/sjsonnet/test/resources/new_test_suite/strip_chars_ascii_mask_directional.jsonnet b/sjsonnet/test/resources/new_test_suite/strip_chars_ascii_mask_directional.jsonnet new file mode 100644 index 00000000..560bcde5 --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/strip_chars_ascii_mask_directional.jsonnet @@ -0,0 +1,5 @@ +std.assertEqual(std.stripChars("abcXYZcba", "abcXYZ"), "") && +std.assertEqual(std.lstripChars("abcXYZkeepXYZ", "abcXYZ"), "keepXYZ") && +std.assertEqual(std.rstripChars("XYZkeepabcXYZ", "abcXYZ"), "XYZkeep") && +std.assertEqual(std.stripChars("éabcé", "abc"), "éabcé") && +std.assertEqual(std.stripChars("😀abc😀", "abc"), "😀abc😀") diff --git a/sjsonnet/test/resources/new_test_suite/strip_chars_ascii_mask_directional.jsonnet.golden b/sjsonnet/test/resources/new_test_suite/strip_chars_ascii_mask_directional.jsonnet.golden new file mode 100644 index 00000000..27ba77dd --- /dev/null +++ b/sjsonnet/test/resources/new_test_suite/strip_chars_ascii_mask_directional.jsonnet.golden @@ -0,0 +1 @@ +true