Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions bench/src/sjsonnet/bench/AsciiSafeRenderBenchmark.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package sjsonnet.bench

import org.openjdk.jmh.annotations.*
import org.openjdk.jmh.infra.*
import sjsonnet.*

import java.io.{StringWriter, Writer}
import java.util.concurrent.TimeUnit

/**
* Isolates the rendering path on a string-heavy value (many [[Val.AsciiSafeStr]] leaves). Used to
* A/B the AsciiSafeStr renderer fast path: evaluate once in @Setup, then benchmark only render().
*/
@BenchmarkMode(Array(Mode.AverageTime))
@Fork(1)
@Threads(1)
@Warmup(iterations = 5)
@Measurement(iterations = 5)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Benchmark)
class AsciiSafeRenderBenchmark {

private val source: String =
"""{
| obj: { ['key_field_%d' % i]: 'value_string_%d_abcdefghijklmnop' % i
| for i in std.range(0, 3000) },
| joined: std.join(',', ['element_token_%d' % i for i in std.range(0, 8000)]),
|}""".stripMargin

private var interp: Interpreter = _
private var value: Val = _

@Setup
def setup(): Unit = {
val wd = MainBenchmark.testSuiteRoot
this.interp = new Interpreter(
Map.empty[String, String],
Map.empty[String, String],
OsPath(wd),
importer = new SjsonnetMainBase.SimpleImporter(IndexedSeq.empty, None),
parseCache = new DefaultParseCache
)
val path = OsPath(wd / "ascii_safe_render_bench.jsonnet")
value = interp.evaluate(source, path).toOption.get
System.err.println("JSON length: " + render().length)
}

@Benchmark def renderB(bh: Blackhole): Unit = bh.consume(render())

private def render(): String = {
val writer = new StringWriter
interp.materialize(value, new Renderer(writer, indent = 3))
writer.toString
}
}
56 changes: 56 additions & 0 deletions bench/src/sjsonnet/bench/TomlRenderBenchmark.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package sjsonnet.bench

import org.openjdk.jmh.annotations.*
import org.openjdk.jmh.infra.*
import sjsonnet.*

import java.util.concurrent.TimeUnit

/**
* Isolates std.manifestTomlEx rendering on a TOML-heavy object. Fresh interpreter each op (no
* caching) so the TOML render runs every iteration. Used to A/B the TomlRenderer StringWriter ->
* StringBuilderWriter swap.
*/
@BenchmarkMode(Array(Mode.AverageTime))
@Fork(2)
@Threads(1)
@Warmup(iterations = 5)
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Benchmark)
class TomlRenderBenchmark {

private val source: String =
"""std.manifestTomlEx(
| { ['section_%d' % i]: {
| name: 'value_string_%d_abcdef' % i, count: i, enabled: true,
| tags: ['alpha', 'beta', 'gamma'], nested: { a: 1, b: 'two', c: false },
| } for i in std.range(0, 3000) },
| ' ')""".stripMargin

private var wd: os.Path = _

@Setup
def setup(): Unit = {
wd = MainBenchmark.testSuiteRoot
System.err.println("TOML length: " + run().length)
}

@Benchmark def manifestToml(bh: Blackhole): Unit = bh.consume(run())

private def run(): String = {
val interp = new Interpreter(
Map.empty[String, String],
Map.empty[String, String],
OsPath(wd),
importer = new SjsonnetMainBase.SimpleImporter(IndexedSeq.empty, None),
parseCache = new DefaultParseCache
)
interp
.evaluate(source, OsPath(wd / "toml_render_bench.jsonnet"))
.toOption
.get
.asInstanceOf[Val.Str]
.str
}
}
21 changes: 21 additions & 0 deletions sjsonnet/src/sjsonnet/BaseCharRenderer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,27 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output](
out
}

/**
* Fast path for [[Val.AsciiSafeStr]]: the string is statically known to contain only chars in
* 0x20-0x7E, excluding `"` and `\`. That means no JSON escaping is ever required — not even under
* `escapeUnicode`, since every char is <= 0x7E. Emit `"` + raw chars + `"` with a single bulk
* `getChars`, skipping the per-call `CharSWAR.hasEscapeChar` scan that [[visitNonNullString]]
* would otherwise perform. Mirrors the no-escape ASCII fast path, minus the scan.
*/
def visitAsciiSafeString(s: String, index: Int): T = {
flushBuffer()
val len = s.length
elemBuilder.ensureLength(len + 2)
elemBuilder.appendUnsafe('"')
val cbArr = elemBuilder.arr
val pos = elemBuilder.getLength
s.getChars(0, len, cbArr, pos)
elemBuilder.length = pos + len
elemBuilder.appendUnsafe('"')
flushCharBuilder()
out
}

final def renderIndent(): Unit = {
if (indent == -1) ()
else if (indentCache != null && depth < BaseCharRenderer.MaxCachedDepth) {
Expand Down
24 changes: 19 additions & 5 deletions sjsonnet/src/sjsonnet/Materializer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,24 @@ abstract class Materializer {
* JIT-friendly) and automatically switches to an explicit stack-based iterative loop when the
* recursion depth exceeds [[Settings.materializeRecursiveDepthLimit]].
*/
/**
* Visit a string value, routing [[Val.AsciiSafeStr]] through the renderer's escape-free fast path
* when the visitor is a char renderer. Falls back to plain `visitString` for the ujson.Value AST
* path and for strings that may require escaping.
*/
@inline private def visitStr[T](s: Val.Str, visitor: Visitor[T, T]): T = {
storePos(s.pos)
visitor match {
case cr: BaseCharRenderer[T @unchecked] if s.isInstanceOf[Val.AsciiSafeStr] =>
cr.visitAsciiSafeString(s.str, -1)
case _ => visitor.visitString(s.str, -1)
}
}

def apply0[T](v: Val, visitor: Visitor[T, T])(implicit evaluator: EvalScope): T = try {
v match {
case Val.Str(pos, s) => storePos(pos); visitor.visitString(s, -1)
case obj: Val.Obj =>
case s: Val.Str => visitStr(s, visitor)
case obj: Val.Obj =>
materializeRecursiveObj(obj, visitor, 0, Materializer.MaterializeContext(evaluator))
case Val.Num(pos, _) => storePos(pos); visitor.visitFloat64(v.asDouble, -1)
case xs: Val.Arr =>
Expand Down Expand Up @@ -285,7 +299,7 @@ abstract class Materializer {
(vt: @scala.annotation.switch) match {
case 0 => // TAG_STR
val s = childVal.asInstanceOf[Val.Str]
storePos(s.pos); childVisitor.visitString(s.str, -1)
visitStr(s, childVisitor)
case 1 => // TAG_NUM
storePos(childVal.pos); childVisitor.visitFloat64(childVal.asDouble, -1)
case 2 => // TAG_TRUE
Expand Down Expand Up @@ -436,8 +450,8 @@ abstract class Materializer {
stack: java.util.ArrayDeque[Materializer.MaterializeFrame],
ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = {
childVal match {
case Val.Str(pos, s) =>
storePos(pos); parentVisitor.visitValue(childVisitor.visitString(s, -1), -1)
case s: Val.Str =>
parentVisitor.visitValue(visitStr(s, childVisitor), -1)
case obj: Val.Obj =>
pushObjFrame(obj, childVisitor, stack, ctx)
case Val.Num(pos, _) =>
Expand Down
66 changes: 40 additions & 26 deletions sjsonnet/src/sjsonnet/Parser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,16 @@ class Parser(
throw new ParseError(msg, offset = offset)
}

def Pos[$: P]: P[Position] = Index.map(offset => new Position(fileScope, offset))
// Capture the current parse offset as a Position directly, rather than `Index.map(...)`.
// `Index` stores the offset as an `Int` in fastparse's `successValue: Any`, boxing it, and the
// `.map` then unboxes it and allocates a closure — both per AST node (Pos is called for nearly
// every node). Writing the Position straight into successValue (a reference) skips the box/unbox
// and the lambda. boxToInteger via SharedPackageDefs.Index was a top self-frame in the parse
// flamegraph on kube-prometheus.
def Pos[$: P]: P[Position] = {
val ctx = implicitly[P[$]]
ctx.freshSuccess(new Position(fileScope, ctx.index))
}

def id[$: P]: P[String] = P(
CharIn("_a-zA-Z") ~~
Expand Down Expand Up @@ -654,32 +663,37 @@ class Parser(
def exprSuffix2[$: P]: P[Expr => Expr] = exprSuffix2(0)

def exprSuffix2[$: P](currentDepth: Int): P[Expr => Expr] = {
// Match the suffix char FIRST, then allocate the Position only when a suffix actually matches.
// The previous `Pos.flatMapX { i => CharIn(...) }` allocated a Position on every attempt —
// including the failing attempt that terminates `exprSuffix2.rep` after each expression, which
// is the common case (most subexpressions have no suffix). Capturing the offset as
// `ctx.index - 1` after the single-char match defers the allocation to the matching branch and
// also drops the `.map(_(0))` step.
P(
Pos.flatMapX { i =>
CharIn(".[({")./.!.map(_(0)).flatMapX { c =>
(c: @switch) match {
case '.' => Pass ~ id.map(x => Expr.Select(i, _: Expr, x))
case '[' =>
Pass ~ (expr(currentDepth + 1).? ~ (":" ~ expr(currentDepth + 1).?).rep ~ "]").map {
case (Some(tree), Seq()) => Expr.Lookup(i, _: Expr, tree)
case (start, ins) =>
Expr.Slice(i, _: Expr, start, ins.headOption.flatten, ins.lift(1).flatten)
}
case '(' =>
Pass ~ (args(currentDepth + 1) ~ ")" ~ "tailstrict".!.?).map {
case (args, namedNames, tailstrict) =>
Expr.Apply(
i,
_: Expr,
args,
if (namedNames.length == 0) null else namedNames,
tailstrict.nonEmpty
)
}
case '{' =>
Pass ~ (objinside(i, currentDepth + 1) ~ "}").map(x => Expr.ObjExtend(i, _: Expr, x))
case _ => Fail
}
CharIn(".[({")./.!.flatMapX { s =>
val i = new Position(fileScope, implicitly[P[$]].index - 1)
(s.charAt(0): @switch) match {
case '.' => Pass ~ id.map(x => Expr.Select(i, _: Expr, x))
case '[' =>
Pass ~ (expr(currentDepth + 1).? ~ (":" ~ expr(currentDepth + 1).?).rep ~ "]").map {
case (Some(tree), Seq()) => Expr.Lookup(i, _: Expr, tree)
case (start, ins) =>
Expr.Slice(i, _: Expr, start, ins.headOption.flatten, ins.lift(1).flatten)
}
case '(' =>
Pass ~ (args(currentDepth + 1) ~ ")" ~ "tailstrict".!.?).map {
case (args, namedNames, tailstrict) =>
Expr.Apply(
i,
_: Expr,
args,
if (namedNames.length == 0) null else namedNames,
tailstrict.nonEmpty
)
}
case '{' =>
Pass ~ (objinside(i, currentDepth + 1) ~ "}").map(x => Expr.ObjExtend(i, _: Expr, x))
case _ => Fail
}
}
)
Expand Down
Loading
Loading