From da92dd15ac4b8e458e110bc8ce12fc6d891ce6e9 Mon Sep 17 00:00:00 2001 From: He-Pin Date: Thu, 28 May 2026 13:32:39 +0800 Subject: [PATCH 1/2] perf: speed up manifest JSON rendering Motivation: std.manifestJson* still contributed to the local Scala Native gap versus source-built jrsonnet, especially in real-world object-heavy rendering. Modification: Add an internal StringBuilder-backed FastMaterializeJsonRenderer for std.manifestJson, std.manifestJsonMinified, and std.manifestJsonEx while preserving the public MaterializeJsonRenderer StringWriter API. Reuse an in-place codepoint key sorter backed by java.util.Arrays.sort, and fix raw-surrogate prefix ordering in compareStringsByCodepoint. Result: Full validation passed: ./mill --no-server --ticker false --color false __.reformat and ./mill --no-server --ticker false --color false -j 1 __.test reported 451/451 tests passing. JMH regressions: manifestJsonEx 0.055 ms/op, realistic2 43.596 ms/op, gen_big_object 0.842 ms/op. Direct hyperfine against source-built jrsonnet: manifestJsonEx sjsonnet-native 5.090 ms vs jrsonnet 4.075 ms; kube-prometheus sjsonnet-native 143.738 ms vs jrsonnet 97.385 ms. --- sjsonnet/src/sjsonnet/Renderer.scala | 122 ++++++++++++++++++ sjsonnet/src/sjsonnet/Util.scala | 12 +- sjsonnet/src/sjsonnet/Val.scala | 3 +- .../src/sjsonnet/stdlib/ManifestModule.scala | 6 +- .../src/sjsonnet/stdlib/ObjectModule.scala | 7 +- .../src/sjsonnet/UnicodeHandlingTests.scala | 53 ++++++++ 6 files changed, 194 insertions(+), 9 deletions(-) diff --git a/sjsonnet/src/sjsonnet/Renderer.scala b/sjsonnet/src/sjsonnet/Renderer.scala index 39a123ae..0731ecf8 100644 --- a/sjsonnet/src/sjsonnet/Renderer.scala +++ b/sjsonnet/src/sjsonnet/Renderer.scala @@ -4,6 +4,41 @@ import java.io.{StringWriter, Writer} import upickle.core.{ArrVisitor, ObjVisitor} +final class StringBuilderWriter(initialCapacity: Int = 16) extends Writer { + private[this] val builder = new java.lang.StringBuilder(initialCapacity) + + override def write(c: Int): Unit = + builder.append(c.toChar) + + override def write(cbuf: Array[Char], off: Int, len: Int): Unit = + builder.append(cbuf, off, len) + + override def write(str: String): Unit = + builder.append(str) + + override def write(str: String, off: Int, len: Int): Unit = + builder.append(str, off, off + len) + + override def append(c: Char): Writer = { + builder.append(c) + this + } + + override def append(csq: CharSequence): Writer = { + builder.append(if (csq == null) "null" else csq) + this + } + + override def append(csq: CharSequence, start: Int, end: Int): Writer = { + builder.append(if (csq == null) "null" else csq, start, end) + this + } + + override def flush(): Unit = () + override def close(): Unit = () + override def toString: String = builder.toString +} + /** * Custom JSON renderer to try and match the behavior of google/jsonnet's render: * @@ -279,6 +314,93 @@ final case class MaterializeJsonRenderer( } } +private[sjsonnet] final class FastMaterializeJsonRenderer( + indent: Int = 4, + escapeUnicode: Boolean = false, + newline: String = "\n", + keyValueSeparator: String = ": ", + private val outWriter: StringBuilderWriter = new StringBuilderWriter()) + extends BaseCharRenderer( + outWriter, + indent, + escapeUnicode, + newline.toCharArray + ) { + private val newLineCharArray = newline.toCharArray + private val keyValueSeparatorCharArray = keyValueSeparator.toCharArray + + private val reusableArrVisitor: ArrVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + } = new ArrVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer = FastMaterializeJsonRenderer.this + def visitValue(v: StringBuilderWriter, index: Int): Unit = { + flushBuffer() + commaBuffered = true + } + def visitEnd(index: Int): StringBuilderWriter = { + commaBuffered = false + depth -= 1 + renderIndent() + elemBuilder.append(']') + flushCharBuilder() + outWriter + } + } + + private val reusableObjVisitor: ObjVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer + } = new ObjVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer = FastMaterializeJsonRenderer.this + def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer = + FastMaterializeJsonRenderer.this + def visitKeyValue(s: Any): Unit = { + elemBuilder.appendAll(keyValueSeparatorCharArray, keyValueSeparatorCharArray.length) + } + def visitValue(v: StringBuilderWriter, index: Int): Unit = { + commaBuffered = true + } + def visitEnd(index: Int): StringBuilderWriter = { + commaBuffered = false + depth -= 1 + renderIndent() + elemBuilder.append('}') + flushCharBuilder() + outWriter + } + } + + override def visitArray( + length: Int, + index: Int): upickle.core.ArrVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + } = { + flushBuffer() + elemBuilder.append('[') + + depth += 1 + if (length == 0 && indent != -1) + elemBuilder.appendAll(newLineCharArray, newLineCharArray.length) + else renderIndent() + reusableArrVisitor + } + + override def visitObject( + length: Int, + index: Int): upickle.core.ObjVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer + } = { + flushBuffer() + elemBuilder.append('{') + depth += 1 + if (length == 0 && indent != -1) + elemBuilder.appendAll(newLineCharArray, newLineCharArray.length) + else renderIndent() + reusableObjVisitor + } +} + object RenderUtils { // Pre-cached string representations of small integers (0-255) diff --git a/sjsonnet/src/sjsonnet/Util.scala b/sjsonnet/src/sjsonnet/Util.scala index d716ca4d..8cb3522b 100644 --- a/sjsonnet/src/sjsonnet/Util.scala +++ b/sjsonnet/src/sjsonnet/Util.scala @@ -128,10 +128,10 @@ object Util { while (i1 < n1 && i2 < n2) { val c1 = s1.charAt(i1) val c2 = s2.charAt(i2) - // Fast path: equal chars can be skipped without surrogate checks. - // Even for surrogate pairs, equal high surrogates at position i lead to - // comparing low surrogates at i+1, producing the correct codepoint ordering. - if (c1 == c2) { + // Fast path: equal non-surrogates can be skipped without codepoint checks. + // Equal surrogates still need codepoint decoding because a raw surrogate and + // a valid surrogate pair can share the same leading UTF-16 code unit. + if (c1 == c2 && !Character.isSurrogate(c1)) { i1 += 1 i2 += 1 } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) { @@ -157,6 +157,10 @@ object Util { override def compare(x: String, y: String): Int = compareStringsByCodepoint(x, y) } + def sortStringsByCodepointInPlace(xs: Array[String]): Unit = { + java.util.Arrays.sort(xs, CodepointStringOrdering) + } + def compareJsonnetStd(v1: Val, v2: Val, ev: EvalScope): Int = { val t1 = v1.prettyName val t2 = v2.prettyName diff --git a/sjsonnet/src/sjsonnet/Val.scala b/sjsonnet/src/sjsonnet/Val.scala index c2b9e379..a08dc1ef 100644 --- a/sjsonnet/src/sjsonnet/Val.scala +++ b/sjsonnet/src/sjsonnet/Val.scala @@ -1822,7 +1822,8 @@ object Val { private[sjsonnet] def sortedVisibleKeyNames: Array[String] = { var r = _sortedVisibleKeyNames if (r == null) { - r = visibleKeyNames.sorted(Util.CodepointStringOrdering) + r = visibleKeyNames.clone() + Util.sortStringsByCodepointInPlace(r) _sortedVisibleKeyNames = r } r diff --git a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala index ee8a8f17..70af6e56 100644 --- a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala @@ -39,7 +39,7 @@ object ManifestModule extends AbstractFunctionModule { */ private object ManifestJson extends Val.Builtin1("manifestJson", "v") { def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val = - Val.Str(pos, Materializer.apply0(v.value, MaterializeJsonRenderer())(ev).toString) + Val.Str(pos, Materializer.apply0(v.value, new FastMaterializeJsonRenderer())(ev).toString) } /** @@ -57,7 +57,7 @@ object ManifestModule extends AbstractFunctionModule { Materializer .apply0( v.value, - MaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":") + new FastMaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":") )(ev) .toString ) @@ -94,7 +94,7 @@ object ManifestModule extends AbstractFunctionModule { Materializer .apply0( v.value, - MaterializeJsonRenderer( + new FastMaterializeJsonRenderer( indent = i.value.asString.length, newline = newline.value.asString, keyValueSeparator = keyValSep.value.asString diff --git a/sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala b/sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala index 5275b1ef..4d14a4a2 100644 --- a/sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala @@ -258,7 +258,12 @@ object ObjectModule extends AbstractFunctionModule { maybeSortKeys(ev, v1.allKeyNames) @inline private def maybeSortKeys(ev: EvalScope, keys: Array[String]): Array[String] = - if (ev.settings.preserveOrder) keys else keys.sorted(Util.CodepointStringOrdering) + if (ev.settings.preserveOrder) keys + else { + val sorted = keys.clone() + Util.sortStringsByCodepointInPlace(sorted) + sorted + } def getObjValuesFromKeys( pos: Position, diff --git a/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala b/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala index d15b7b54..25c9d70f 100644 --- a/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala +++ b/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala @@ -109,16 +109,69 @@ object UnicodeHandlingTests extends TestSuite { val codepointSorted = testStrings.sorted(sjsonnet.Util.CodepointStringOrdering).toList codepointSorted ==> List("\uFFFF", "\uD800\uDC00") + val inPlaceSorted = testStrings.clone() + sjsonnet.Util.sortStringsByCodepointInPlace(inPlaceSorted) + inPlaceSorted.toList ==> codepointSorted + // These produce different results, demonstrating the bug that was fixed assert(utf16Sorted != codepointSorted) } + test("codepointInPlaceSortMatchesReferenceOrdering") { + val samples = Array( + "", + "a", + "b", + "aa", + "\u0000", + "\uD800\uDC00", // U+10000 + "\uFFFF", + "πŸ˜€", + "🌍", + "πŸš€", + "Γ©", + "Ξ©", + "δΈ­" + ) + + val cases = Seq( + Array.empty[String], + Array("a"), + Array("b", "a"), + Array.fill(20)("same"), + samples, + samples.reverse, + Array.tabulate(64)(i => samples((i * 7 + 3) % samples.length)) + ) + + for (c <- cases) { + val actual = c.clone() + sjsonnet.Util.sortStringsByCodepointInPlace(actual) + actual.toList ==> c.sorted(sjsonnet.Util.CodepointStringOrdering).toList + } + } + test("codepointOrderingInJsonnet") { // Verify that Jsonnet operations use Unicode codepoint ordering eval("'\\uFFFF' < '\\uD800\\uDC00'") ==> ujson.Bool(true) eval("std.sort(['\\uD800\\uDC00', '\\uFFFF'])") ==> ujson.Arr("\uFFFF", "\uD800\uDC00") } + test("rawSurrogatePrefixOrdering") { + val rawSurrogatePrefix = "\uD800\uFFFF" // codepoints [0xD800, 0xFFFF] + val validSurrogatePair = "\uD800\uDC00" // codepoint [0x10000] + + assert(sjsonnet.Util.compareStringsByCodepoint(rawSurrogatePrefix, validSurrogatePair) < 0) + assert(sjsonnet.Util.compareStringsByCodepoint(validSurrogatePair, rawSurrogatePrefix) > 0) + + eval("(std.char(55296) + std.char(65535)) < (std.char(55296) + std.char(56320))") ==> + ujson.Bool(true) + + eval( + "std.sort([std.char(55296) + std.char(56320), std.char(55296) + std.char(65535)])" + ) ==> ujson.Arr(rawSurrogatePrefix, validSurrogatePair) + } + // Unpaired surrogate handling - sjsonnet-specific behavior // // Note: This is an intentional divergence from go-jsonnet and C++ jsonnet: From 933ed417a41aec7c0dbb3e5a1cab0b33b140795d Mon Sep 17 00:00:00 2001 From: He-Pin Date: Sat, 30 May 2026 19:24:42 +0800 Subject: [PATCH 2/2] perf: use unsynchronized StringBuilderWriter in TomlRenderer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit std.manifestTomlEx routed through java.io.StringWriter, whose backing StringBuffer pays a monitor enter/exit on every write/flush on the hot TOML manifestation path. Switch TomlRenderer and the manifestTomlEx render path in ManifestModule to the unsynchronized package-private StringBuilderWriter (the same writer the JSON manifest renderer uses). Output is byte-identical; std.deepJoin keeps StringWriter (separate concern). Result (Scala Native hyperfine, TOML-heavy workload, ~1.8 MB output): after ran 1.11 Β± 0.07x faster than before (~10%); output byte-identical. --- sjsonnet/src/sjsonnet/TomlRenderer.scala | 54 ++++++++++--------- .../src/sjsonnet/stdlib/ManifestModule.scala | 12 +++-- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/sjsonnet/src/sjsonnet/TomlRenderer.scala b/sjsonnet/src/sjsonnet/TomlRenderer.scala index 7bbc59cb..f5b0e93b 100644 --- a/sjsonnet/src/sjsonnet/TomlRenderer.scala +++ b/sjsonnet/src/sjsonnet/TomlRenderer.scala @@ -2,22 +2,24 @@ package sjsonnet import upickle.core.{ArrVisitor, ObjVisitor, SimpleVisitor, Visitor} -import java.io.StringWriter - +// Uses the unsynchronized [[StringBuilderWriter]] rather than java.io.StringWriter: the latter is +// backed by a synchronized StringBuffer, paying a monitor enter/exit on every write/flush on the +// hot manifestTomlEx path. Output is byte-identical. Same swap as the JSON renderer in #874. class TomlRenderer( - out: StringWriter = new java.io.StringWriter(), + out: StringBuilderWriter = new StringBuilderWriter(), cumulatedIndent: String, indent: String) - extends SimpleVisitor[StringWriter, StringWriter] { + extends SimpleVisitor[StringBuilderWriter, StringBuilderWriter] { override def expectedMsg: String = "unimplemented type in Materializer" - private object objectKeyRenderer extends upickle.core.SimpleVisitor[StringWriter, StringWriter] { + private object objectKeyRenderer + extends upickle.core.SimpleVisitor[StringBuilderWriter, StringBuilderWriter] { override def expectedMsg = "expected string" - override def visitNull(index: Int): StringWriter = { + override def visitNull(index: Int): StringBuilderWriter = { TomlRenderer.this.visitNull(index) } - override def visitString(s: CharSequence, index: Int): StringWriter = { + override def visitString(s: CharSequence, index: Int): StringBuilderWriter = { if (s == null) visitNull(index) else { TomlRenderer.writeEscapedKey(out, s) @@ -33,19 +35,19 @@ class TomlRenderer( out } - override def visitNull(index: Int): StringWriter = Error.fail("Tried to manifest \"null\"") + override def visitNull(index: Int): StringBuilderWriter = Error.fail("Tried to manifest \"null\"") - override def visitTrue(index: Int): StringWriter = { + override def visitTrue(index: Int): StringBuilderWriter = { out.write("true") flush } - override def visitFalse(index: Int): StringWriter = { + override def visitFalse(index: Int): StringBuilderWriter = { out.write("false") flush } - override def visitString(s: CharSequence, index: Int): StringWriter = { + override def visitString(s: CharSequence, index: Int): StringBuilderWriter = { if (s == null) { visitNull(index) } else { @@ -54,7 +56,7 @@ class TomlRenderer( } } - override def visitFloat64(d: Double, index: Int): StringWriter = { + override def visitFloat64(d: Double, index: Int): StringBuilderWriter = { d match { case Double.PositiveInfinity => out.write("inf") case Double.NegativeInfinity => out.write("-inf") @@ -65,8 +67,10 @@ class TomlRenderer( flush } - override def visitArray(length: Int, index: Int): ArrVisitor[StringWriter, StringWriter] = - new ArrVisitor[StringWriter, StringWriter] { + override def visitArray( + length: Int, + index: Int): ArrVisitor[StringBuilderWriter, StringBuilderWriter] = + new ArrVisitor[StringBuilderWriter, StringBuilderWriter] { private val isInLine = length == 0 || depth > 0 private val newElementIndent = if (isInLine) "" else cumulatedIndent + indent private val separator = @@ -76,7 +80,7 @@ class TomlRenderer( depth += 1 out.write('[') out.write(separator) - def subVisitor: Visitor[StringWriter, StringWriter] = { + def subVisitor: Visitor[StringBuilderWriter, StringBuilderWriter] = { if (addComma) { out.write(',') out.write(separator) @@ -84,10 +88,10 @@ class TomlRenderer( out.write(newElementIndent) TomlRenderer.this } - def visitValue(v: StringWriter, index: Int): Unit = { + def visitValue(v: StringBuilderWriter, index: Int): Unit = { addComma = true } - def visitEnd(index: Int): StringWriter = { + def visitEnd(index: Int): StringBuilderWriter = { addComma = false depth -= 1 out.write(separator) @@ -100,23 +104,23 @@ class TomlRenderer( override def visitObject( length: Int, jsonableKeys: Boolean, - index: Int): ObjVisitor[StringWriter, StringWriter] = - new ObjVisitor[StringWriter, StringWriter] { + index: Int): ObjVisitor[StringBuilderWriter, StringBuilderWriter] = + new ObjVisitor[StringBuilderWriter, StringBuilderWriter] { private var addComma = false depth += 1 out.write("{ ") - def subVisitor: Visitor[StringWriter, StringWriter] = TomlRenderer.this - def visitKey(index: Int): Visitor[StringWriter, StringWriter] = { + def subVisitor: Visitor[StringBuilderWriter, StringBuilderWriter] = TomlRenderer.this + def visitKey(index: Int): Visitor[StringBuilderWriter, StringBuilderWriter] = { if (addComma) out.write(", ") objectKeyRenderer } def visitKeyValue(s: Any): Unit = { out.write(" = ") } - def visitValue(v: StringWriter, index: Int): Unit = { + def visitValue(v: StringBuilderWriter, index: Int): Unit = { addComma = true } - def visitEnd(index: Int): StringWriter = { + def visitEnd(index: Int): StringBuilderWriter = { addComma = false depth -= 1 out.write(" }") @@ -146,14 +150,14 @@ object TomlRenderer { } } - def writeEscapedKey(out: StringWriter, key: CharSequence): Unit = { + def writeEscapedKey(out: StringBuilderWriter, key: CharSequence): Unit = { if (isBareKey(key)) out.write(key.toString) else BaseRenderer.escape(out, key, unicode = true) } def escapeKey(key: String): String = if (isBareKey(key)) key else { - val out = new StringWriter() + val out = new StringBuilderWriter() writeEscapedKey(out, key) out.toString } diff --git a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala index 70af6e56..c78d5a9c 100644 --- a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala @@ -184,11 +184,11 @@ object ManifestModule extends AbstractFunctionModule { } private def renderTableInternal( - out: StringWriter, + out: StringBuilderWriter, v: Val.Obj, cumulatedIndent: String, indent: String, - path: mutable.ArrayBuffer[String])(implicit ev: EvalScope): StringWriter = { + path: mutable.ArrayBuffer[String])(implicit ev: EvalScope): StringBuilderWriter = { val keys = v.sortedVisibleKeyNames if (keys.length == 0) { out.write('\n') @@ -263,7 +263,7 @@ object ManifestModule extends AbstractFunctionModule { out } - private def renderTableHeader(out: StringWriter, path: mutable.ArrayBuffer[String]) = { + private def renderTableHeader(out: StringBuilderWriter, path: mutable.ArrayBuffer[String]) = { out.write('[') var i = 0 while (i < path.length) { @@ -275,7 +275,9 @@ object ManifestModule extends AbstractFunctionModule { out } - private def renderTableArrayHeader(out: StringWriter, path: mutable.ArrayBuffer[String]) = { + private def renderTableArrayHeader( + out: StringBuilderWriter, + path: mutable.ArrayBuffer[String]) = { out.write('[') renderTableHeader(out, path) out.write(']') @@ -283,7 +285,7 @@ object ManifestModule extends AbstractFunctionModule { } def evalRhs(v: Eval, indent: Eval, ev: EvalScope, pos: Position): Val = { - val out = new StringWriter + val out = new StringBuilderWriter renderTableInternal( out, v.value.asObj,