diff --git a/sjsonnet/src/sjsonnet/Renderer.scala b/sjsonnet/src/sjsonnet/Renderer.scala index 39a123ae..0731ecf8 100644 --- a/sjsonnet/src/sjsonnet/Renderer.scala +++ b/sjsonnet/src/sjsonnet/Renderer.scala @@ -4,6 +4,41 @@ import java.io.{StringWriter, Writer} import upickle.core.{ArrVisitor, ObjVisitor} +final class StringBuilderWriter(initialCapacity: Int = 16) extends Writer { + private[this] val builder = new java.lang.StringBuilder(initialCapacity) + + override def write(c: Int): Unit = + builder.append(c.toChar) + + override def write(cbuf: Array[Char], off: Int, len: Int): Unit = + builder.append(cbuf, off, len) + + override def write(str: String): Unit = + builder.append(str) + + override def write(str: String, off: Int, len: Int): Unit = + builder.append(str, off, off + len) + + override def append(c: Char): Writer = { + builder.append(c) + this + } + + override def append(csq: CharSequence): Writer = { + builder.append(if (csq == null) "null" else csq) + this + } + + override def append(csq: CharSequence, start: Int, end: Int): Writer = { + builder.append(if (csq == null) "null" else csq, start, end) + this + } + + override def flush(): Unit = () + override def close(): Unit = () + override def toString: String = builder.toString +} + /** * Custom JSON renderer to try and match the behavior of google/jsonnet's render: * @@ -279,6 +314,93 @@ final case class MaterializeJsonRenderer( } } +private[sjsonnet] final class FastMaterializeJsonRenderer( + indent: Int = 4, + escapeUnicode: Boolean = false, + newline: String = "\n", + keyValueSeparator: String = ": ", + private val outWriter: StringBuilderWriter = new StringBuilderWriter()) + extends BaseCharRenderer( + outWriter, + indent, + escapeUnicode, + newline.toCharArray + ) { + private val newLineCharArray = newline.toCharArray + private val keyValueSeparatorCharArray = keyValueSeparator.toCharArray + + private val reusableArrVisitor: ArrVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + } = new ArrVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer = FastMaterializeJsonRenderer.this + def visitValue(v: StringBuilderWriter, index: Int): Unit = { + flushBuffer() + commaBuffered = true + } + def visitEnd(index: Int): StringBuilderWriter = { + commaBuffered = false + depth -= 1 + renderIndent() + elemBuilder.append(']') + flushCharBuilder() + outWriter + } + } + + private val reusableObjVisitor: ObjVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer + } = new ObjVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer = FastMaterializeJsonRenderer.this + def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer = + FastMaterializeJsonRenderer.this + def visitKeyValue(s: Any): Unit = { + elemBuilder.appendAll(keyValueSeparatorCharArray, keyValueSeparatorCharArray.length) + } + def visitValue(v: StringBuilderWriter, index: Int): Unit = { + commaBuffered = true + } + def visitEnd(index: Int): StringBuilderWriter = { + commaBuffered = false + depth -= 1 + renderIndent() + elemBuilder.append('}') + flushCharBuilder() + outWriter + } + } + + override def visitArray( + length: Int, + index: Int): upickle.core.ArrVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + } = { + flushBuffer() + elemBuilder.append('[') + + depth += 1 + if (length == 0 && indent != -1) + elemBuilder.appendAll(newLineCharArray, newLineCharArray.length) + else renderIndent() + reusableArrVisitor + } + + override def visitObject( + length: Int, + index: Int): upickle.core.ObjVisitor[StringBuilderWriter, StringBuilderWriter] { + def subVisitor: sjsonnet.FastMaterializeJsonRenderer + def visitKey(index: Int): sjsonnet.FastMaterializeJsonRenderer + } = { + flushBuffer() + elemBuilder.append('{') + depth += 1 + if (length == 0 && indent != -1) + elemBuilder.appendAll(newLineCharArray, newLineCharArray.length) + else renderIndent() + reusableObjVisitor + } +} + object RenderUtils { // Pre-cached string representations of small integers (0-255) diff --git a/sjsonnet/src/sjsonnet/Util.scala b/sjsonnet/src/sjsonnet/Util.scala index d716ca4d..8cb3522b 100644 --- a/sjsonnet/src/sjsonnet/Util.scala +++ b/sjsonnet/src/sjsonnet/Util.scala @@ -128,10 +128,10 @@ object Util { while (i1 < n1 && i2 < n2) { val c1 = s1.charAt(i1) val c2 = s2.charAt(i2) - // Fast path: equal chars can be skipped without surrogate checks. - // Even for surrogate pairs, equal high surrogates at position i lead to - // comparing low surrogates at i+1, producing the correct codepoint ordering. - if (c1 == c2) { + // Fast path: equal non-surrogates can be skipped without codepoint checks. + // Equal surrogates still need codepoint decoding because a raw surrogate and + // a valid surrogate pair can share the same leading UTF-16 code unit. + if (c1 == c2 && !Character.isSurrogate(c1)) { i1 += 1 i2 += 1 } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) { @@ -157,6 +157,10 @@ object Util { override def compare(x: String, y: String): Int = compareStringsByCodepoint(x, y) } + def sortStringsByCodepointInPlace(xs: Array[String]): Unit = { + java.util.Arrays.sort(xs, CodepointStringOrdering) + } + def compareJsonnetStd(v1: Val, v2: Val, ev: EvalScope): Int = { val t1 = v1.prettyName val t2 = v2.prettyName diff --git a/sjsonnet/src/sjsonnet/Val.scala b/sjsonnet/src/sjsonnet/Val.scala index c2b9e379..a08dc1ef 100644 --- a/sjsonnet/src/sjsonnet/Val.scala +++ b/sjsonnet/src/sjsonnet/Val.scala @@ -1822,7 +1822,8 @@ object Val { private[sjsonnet] def sortedVisibleKeyNames: Array[String] = { var r = _sortedVisibleKeyNames if (r == null) { - r = visibleKeyNames.sorted(Util.CodepointStringOrdering) + r = visibleKeyNames.clone() + Util.sortStringsByCodepointInPlace(r) _sortedVisibleKeyNames = r } r diff --git a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala index ee8a8f17..70af6e56 100644 --- a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala @@ -39,7 +39,7 @@ object ManifestModule extends AbstractFunctionModule { */ private object ManifestJson extends Val.Builtin1("manifestJson", "v") { def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val = - Val.Str(pos, Materializer.apply0(v.value, MaterializeJsonRenderer())(ev).toString) + Val.Str(pos, Materializer.apply0(v.value, new FastMaterializeJsonRenderer())(ev).toString) } /** @@ -57,7 +57,7 @@ object ManifestModule extends AbstractFunctionModule { Materializer .apply0( v.value, - MaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":") + new FastMaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":") )(ev) .toString ) @@ -94,7 +94,7 @@ object ManifestModule extends AbstractFunctionModule { Materializer .apply0( v.value, - MaterializeJsonRenderer( + new FastMaterializeJsonRenderer( indent = i.value.asString.length, newline = newline.value.asString, keyValueSeparator = keyValSep.value.asString diff --git a/sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala b/sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala index 5275b1ef..4d14a4a2 100644 --- a/sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala @@ -258,7 +258,12 @@ object ObjectModule extends AbstractFunctionModule { maybeSortKeys(ev, v1.allKeyNames) @inline private def maybeSortKeys(ev: EvalScope, keys: Array[String]): Array[String] = - if (ev.settings.preserveOrder) keys else keys.sorted(Util.CodepointStringOrdering) + if (ev.settings.preserveOrder) keys + else { + val sorted = keys.clone() + Util.sortStringsByCodepointInPlace(sorted) + sorted + } def getObjValuesFromKeys( pos: Position, diff --git a/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala b/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala index d15b7b54..25c9d70f 100644 --- a/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala +++ b/sjsonnet/test/src/sjsonnet/UnicodeHandlingTests.scala @@ -109,16 +109,69 @@ object UnicodeHandlingTests extends TestSuite { val codepointSorted = testStrings.sorted(sjsonnet.Util.CodepointStringOrdering).toList codepointSorted ==> List("\uFFFF", "\uD800\uDC00") + val inPlaceSorted = testStrings.clone() + sjsonnet.Util.sortStringsByCodepointInPlace(inPlaceSorted) + inPlaceSorted.toList ==> codepointSorted + // These produce different results, demonstrating the bug that was fixed assert(utf16Sorted != codepointSorted) } + test("codepointInPlaceSortMatchesReferenceOrdering") { + val samples = Array( + "", + "a", + "b", + "aa", + "\u0000", + "\uD800\uDC00", // U+10000 + "\uFFFF", + "πŸ˜€", + "🌍", + "πŸš€", + "Γ©", + "Ξ©", + "δΈ­" + ) + + val cases = Seq( + Array.empty[String], + Array("a"), + Array("b", "a"), + Array.fill(20)("same"), + samples, + samples.reverse, + Array.tabulate(64)(i => samples((i * 7 + 3) % samples.length)) + ) + + for (c <- cases) { + val actual = c.clone() + sjsonnet.Util.sortStringsByCodepointInPlace(actual) + actual.toList ==> c.sorted(sjsonnet.Util.CodepointStringOrdering).toList + } + } + test("codepointOrderingInJsonnet") { // Verify that Jsonnet operations use Unicode codepoint ordering eval("'\\uFFFF' < '\\uD800\\uDC00'") ==> ujson.Bool(true) eval("std.sort(['\\uD800\\uDC00', '\\uFFFF'])") ==> ujson.Arr("\uFFFF", "\uD800\uDC00") } + test("rawSurrogatePrefixOrdering") { + val rawSurrogatePrefix = "\uD800\uFFFF" // codepoints [0xD800, 0xFFFF] + val validSurrogatePair = "\uD800\uDC00" // codepoint [0x10000] + + assert(sjsonnet.Util.compareStringsByCodepoint(rawSurrogatePrefix, validSurrogatePair) < 0) + assert(sjsonnet.Util.compareStringsByCodepoint(validSurrogatePair, rawSurrogatePrefix) > 0) + + eval("(std.char(55296) + std.char(65535)) < (std.char(55296) + std.char(56320))") ==> + ujson.Bool(true) + + eval( + "std.sort([std.char(55296) + std.char(56320), std.char(55296) + std.char(65535)])" + ) ==> ujson.Arr(rawSurrogatePrefix, validSurrogatePair) + } + // Unpaired surrogate handling - sjsonnet-specific behavior // // Note: This is an intentional divergence from go-jsonnet and C++ jsonnet: