From 0242409b7a2e40ea86bb775705e9bade7b833d47 Mon Sep 17 00:00:00 2001 From: Phillip Cunliffe Date: Thu, 4 Jun 2026 15:14:30 -0700 Subject: [PATCH 1/5] fix(cli): flush stdio before process.exit to stop >64KiB pipe truncation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `process.exit()` terminates synchronously and drops whatever is still buffered in stdout/stderr. For a pipe that means output past the ~64KiB pipe buffer was silently truncated (large `query sql` results, JSON dumps) — while the same command redirected to a file completed fine, because file writes are synchronous. Drain both streams (resolving on `error`/EPIPE so exit never blocks) after observability shutdown, then exit. Removes the need for callers to redirect to a file just to receive complete output. Co-Authored-By: Claude Opus 4.8 (1M context) --- bin/hypaware.js | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/bin/hypaware.js b/bin/hypaware.js index bd36e3a..57ad9cb 100755 --- a/bin/hypaware.js +++ b/bin/hypaware.js @@ -50,4 +50,38 @@ try { await obs.shutdown() } +// `process.exit()` terminates synchronously and drops whatever is still +// buffered in stdout/stderr — for a pipe that means output past the +// ~64KiB pipe buffer is silently truncated (large `query sql` results, +// JSON dumps). Flush both streams before exiting so every byte reaches +// the OS first. (Writing to a file never hit this because file writes +// complete synchronously.) +await Promise.all([flushStream(process.stdout), flushStream(process.stderr)]) process.exit(exitCode) + +/** + * Resolve once a writable stream has drained its buffered output. Resolves + * immediately when nothing is pending, and on `error` (e.g. EPIPE when the + * reader has gone away) so exit is never blocked. + * + * @param {import('node:stream').Writable} stream + * @returns {Promise} + */ +function flushStream(stream) { + return new Promise((resolve) => { + if (stream.writableLength === 0) { + resolve() + return + } + let done = false + const finish = () => { + if (done) return + done = true + resolve() + } + stream.once('error', finish) + // The write callback fires after this (empty) chunk and all preceding + // buffered writes have been handed to the OS. + stream.write('', finish) + }) +} From 751e5602562f029617aa6c0576434edff29bec6d Mon Sep 17 00:00:00 2001 From: Phillip Cunliffe Date: Thu, 4 Jun 2026 15:14:30 -0700 Subject: [PATCH 2/5] feat(query): context-budget controls for result sets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add applyContextControls(result, {maxCell, maxBytes}): a pure pre-render pass that bounds a result's context footprint on two independent axes: - per-cell truncation: recursively clip every string leaf to maxCell code points (never splitting a multibyte char), appending a greppable `…(+N)` marker. Only strings shrink — numbers/booleans/null and JSON shape are preserved, so `--format json` output stays valid. - row budget: drop trailing rows once cumulative serialized size exceeds maxBytes (always keeping >=1 row), returning a one-line `notice` naming what was withheld — meant for stderr so stdout output is never corrupted. renderResult is left byte-for-byte unchanged so existing smoke/json assertions hold. Replaces the notion of a blunt fixed row cap with a control that actually bounds context (100 rows of raw_frame JSON was already a context bomb a row cap did nothing to stop). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/core/query/format.js | 119 ++++++++++++++++++++++- src/core/query/types.d.ts | 13 +++ test/core/query-context-controls.test.js | 75 ++++++++++++++ 3 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 test/core/query-context-controls.test.js diff --git a/src/core/query/format.js b/src/core/query/format.js index f7c297e..f9af8fe 100644 --- a/src/core/query/format.js +++ b/src/core/query/format.js @@ -1,9 +1,126 @@ // @ts-check /** - * @import { QueryFormat, QueryResultSet } from './types.d.ts' + * @import { ContextControls, ContextControlsResult, QueryFormat, QueryResultSet } from './types.d.ts' */ +/** + * Bound a result set's context footprint before it is rendered to a + * model or terminal. Two independent axes: + * + * 1. per-cell truncation — recursively clip every string value to + * `maxCell` code points, appending a greppable `…(+N)` marker so + * the elision is visible and its size known. Only strings shrink; + * numbers/booleans/null pass through unchanged, so `--format json` + * output stays valid and same-typed. + * 2. row budget — drop trailing rows once the cumulative serialized + * size exceeds `maxBytes`, so a wide or long result cannot flood + * the caller's context. At least one row is always kept. + * + * Returns the capped result plus an optional one-line `notice` (meant + * for stderr, so it never corrupts stdout output) naming exactly what + * was withheld and how to retrieve the full set. A control of `0` + * disables that axis. Pure: the input result is not mutated. + * + * @param {QueryResultSet} result + * @param {ContextControls} controls + * @returns {ContextControlsResult} + */ +export function applyContextControls(result, controls) { + const maxCell = controls.maxCell > 0 ? controls.maxCell : 0 + const maxBytes = controls.maxBytes > 0 ? controls.maxBytes : 0 + + const truncated = maxCell ? result.rows.map((row) => truncateRow(row, maxCell)) : result.rows + + if (!maxBytes) { + return { result: { columns: result.columns, rows: truncated }, notice: undefined } + } + + /** @type {Record[]} */ + const kept = [] + let bytes = 0 + for (const row of truncated) { + bytes += rowBytes(row) + // Always emit at least one row; stop once the budget is exceeded. + if (bytes > maxBytes && kept.length > 0) break + kept.push(row) + } + + const dropped = truncated.length - kept.length + const notice = + dropped > 0 + ? `notice: showing ${kept.length} of ${truncated.length} rows (${maxBytes}B context budget); ` + + `use --output for the full result, --max-bytes 0 to disable, or aggregate/LIMIT` + : undefined + return { result: { columns: result.columns, rows: kept }, notice } +} + +/** + * Truncate every value of one row, returning a new row object. + * + * @param {Record} row + * @param {number} maxCell + * @returns {Record} + */ +function truncateRow(row, maxCell) { + /** @type {Record} */ + const out = {} + for (const key of Object.keys(row)) out[key] = truncateValue(row[key], maxCell) + return out +} + +/** + * Recursively clip string leaves to `maxCell` code points. Objects and + * arrays are rebuilt (not mutated); `Date` is treated as a scalar leaf + * so it round-trips through `jsonReplacer` unchanged. + * + * @param {unknown} value + * @param {number} maxCell + * @returns {unknown} + */ +function truncateValue(value, maxCell) { + if (typeof value === 'string') return clipString(value, maxCell) + if (Array.isArray(value)) return value.map((v) => truncateValue(v, maxCell)) + if (value && typeof value === 'object' && !(value instanceof Date)) { + /** @type {Record} */ + const out = {} + for (const key of Object.keys(value)) out[key] = truncateValue(/** @type {Record} */ (value)[key], maxCell) + return out + } + return value +} + +/** + * Clip a string to `maxCell` code points (never splitting a multibyte + * character) and append `…(+N)` where N is the number of code points + * removed. Returns the input unchanged when it already fits. + * + * @param {string} value + * @param {number} maxCell + * @returns {string} + */ +function clipString(value, maxCell) { + const points = Array.from(value) + if (points.length <= maxCell) return value + return points.slice(0, maxCell).join('') + `…(+${points.length - maxCell})` +} + +/** + * Serialized byte size of one row, used for the context budget. Mirrors + * the `jsonl` encoding so the budget tracks what a JSON-consuming caller + * actually receives. + * + * @param {Record} row + * @returns {number} + */ +function rowBytes(row) { + try { + return Buffer.byteLength(JSON.stringify(row, jsonReplacer)) + } catch { + return Buffer.byteLength(String(row)) + } +} + /** * Render a query result set into the requested output format. * diff --git a/src/core/query/types.d.ts b/src/core/query/types.d.ts index aa4ad13..f340f1c 100644 --- a/src/core/query/types.d.ts +++ b/src/core/query/types.d.ts @@ -15,6 +15,19 @@ export interface QueryResultSet { rows: Record[] } +export interface ContextControls { + /** Per-string-cell code-point cap; 0 disables cell truncation. */ + maxCell: number + /** Cumulative serialized-row byte budget for stdout; 0 disables the row budget. */ + maxBytes: number +} + +export interface ContextControlsResult { + result: QueryResultSet + /** One-line message for stderr when rows were dropped; undefined otherwise. */ + notice: string | undefined +} + export interface ExecuteSqlOptions { query: string registry: QueryRegistry diff --git a/test/core/query-context-controls.test.js b/test/core/query-context-controls.test.js new file mode 100644 index 0000000..fa53525 --- /dev/null +++ b/test/core/query-context-controls.test.js @@ -0,0 +1,75 @@ +// @ts-check + +import test from 'node:test' +import assert from 'node:assert/strict' + +import { applyContextControls, renderResult } from '../../src/core/query/format.js' + +/** @param {Record[]} rows */ +const set = (rows) => ({ columns: rows[0] ? Object.keys(rows[0]) : [], rows }) + +test('cell truncation clips long strings with a sized marker, leaves short ones', () => { + const long = 'x'.repeat(250) + const { result, notice } = applyContextControls( + set([{ id: 1, content: long, role: 'user' }]), + { maxCell: 200, maxBytes: 0 } + ) + const cell = /** @type {string} */ (result.rows[0].content) + assert.equal(cell, 'x'.repeat(200) + '…(+50)') + assert.equal(result.rows[0].role, 'user') // short string untouched + assert.equal(result.rows[0].id, 1) // number type preserved + assert.equal(notice, undefined) +}) + +test('cell truncation recurses into nested objects/arrays and preserves JSON validity', () => { + const { result } = applyContextControls( + set([{ args: { command: 'y'.repeat(300), nested: ['z'.repeat(10)] } }]), + { maxCell: 50, maxBytes: 0 } + ) + const args = /** @type {{ command: string, nested: string[] }} */ (result.rows[0].args) + assert.equal(args.command, 'y'.repeat(50) + '…(+250)') + assert.equal(args.nested[0], 'z'.repeat(10)) // under cap, untouched + // Still serializes as valid JSON (truncation only shrinks string leaves). + assert.doesNotThrow(() => JSON.parse(renderResult(result, 'jsonl').trim())) +}) + +test('cell truncation counts code points, never splitting a multibyte char', () => { + const { result } = applyContextControls(set([{ s: '😀'.repeat(10) }]), { maxCell: 4, maxBytes: 0 }) + assert.equal(result.rows[0].s, '😀'.repeat(4) + '…(+6)') +}) + +test('maxCell = 0 disables truncation', () => { + const long = 'x'.repeat(500) + const { result } = applyContextControls(set([{ c: long }]), { maxCell: 0, maxBytes: 0 }) + assert.equal(result.rows[0].c, long) +}) + +test('byte budget drops trailing rows and emits a notice naming the counts', () => { + const rows = Array.from({ length: 100 }, (_, i) => ({ i, blob: 'b'.repeat(100) })) + const { result, notice } = applyContextControls(set(rows), { maxCell: 0, maxBytes: 1000 }) + assert.ok(result.rows.length < 100) + assert.ok(result.rows.length >= 1) + assert.match(notice ?? '', new RegExp(`showing ${result.rows.length} of 100 rows`)) + assert.match(notice ?? '', /--output/) +}) + +test('byte budget always keeps at least one row even if it alone exceeds the budget', () => { + const { result, notice } = applyContextControls( + set([{ huge: 'h'.repeat(5000) }, { huge: 'h'.repeat(5000) }]), + { maxCell: 0, maxBytes: 10 } + ) + assert.equal(result.rows.length, 1) + assert.match(notice ?? '', /showing 1 of 2 rows/) +}) + +test('no notice when nothing is dropped', () => { + const { notice } = applyContextControls(set([{ a: 1 }]), { maxCell: 200, maxBytes: 32768 }) + assert.equal(notice, undefined) +}) + +test('input result is not mutated', () => { + const input = set([{ c: 'x'.repeat(300) }]) + applyContextControls(input, { maxCell: 50, maxBytes: 100 }) + assert.equal(input.rows[0].c, 'x'.repeat(300)) + assert.equal(input.rows.length, 1) +}) From 552835d58b49abfdef9950cb7540f1e1280939bf Mon Sep 17 00:00:00 2001 From: Phillip Cunliffe Date: Thu, 4 Jun 2026 15:14:31 -0700 Subject: [PATCH 3/5] feat(query sql): --output spill and per-query cap flags Wire the context controls into `hyp query sql` and add an escape hatch: - --output (-o): write the full, un-capped result to a file and print only a compact receipt (rows/cols/bytes/schema + clipped 3-row preview) to stdout, so a large result never lands in the caller's context. The file is always lossless. - --max-cell / --max-bytes : override the inline caps (0 disables either). Defaults: 200 code points / 32KiB. Inline (non-spill) queries now apply the caps and route the "rows withheld" notice to stderr alongside freshness messages. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/core/cli/core_commands.d.ts | 18 ++++++ src/core/cli/core_commands.js | 104 ++++++++++++++++++++++++++++--- test/core/query-sql-argv.test.js | 59 ++++++++++++++++++ 3 files changed, 173 insertions(+), 8 deletions(-) create mode 100644 test/core/query-sql-argv.test.js diff --git a/src/core/cli/core_commands.d.ts b/src/core/cli/core_commands.d.ts index 698a6e8..d984990 100644 --- a/src/core/cli/core_commands.d.ts +++ b/src/core/cli/core_commands.d.ts @@ -1,4 +1,5 @@ import type { createCommandRegistry } from '../registry/commands.js' +import type { QueryFormat, RefreshMode } from '../query/types.d.ts' import type { InitFlags, PickerExport, PickerExportOrigin } from './types.d.ts' export declare function registerCoreCommands( @@ -8,3 +9,20 @@ export declare function registerCoreCommands( export declare function resolveInitExportChoice( flags: InitFlags ): { exportChoice: PickerExport; origin: PickerExportOrigin } + +export declare const DEFAULT_QUERY_MAX_CELL: number +export declare const DEFAULT_QUERY_MAX_BYTES: number + +export declare function parseQuerySqlArgv( + argv: string[] +): + | { + ok: true + sql: string + refresh: RefreshMode + format: QueryFormat + output: string | undefined + maxCell: number + maxBytes: number + } + | { ok: false; error: string } diff --git a/src/core/cli/core_commands.js b/src/core/cli/core_commands.js index 89cda5a..1cc1cc9 100644 --- a/src/core/cli/core_commands.js +++ b/src/core/cli/core_commands.js @@ -15,7 +15,7 @@ import { discoverInstalledPlugins } from '../runtime/installed.js' import { discoverBundledPlugins } from '../runtime/bundled.js' import { buildPluginCatalog } from '../plugin_catalog.js' import { collectHypAwareStatus } from '../daemon/status.js' -import { renderResult } from '../query/format.js' +import { applyContextControls, renderResult } from '../query/format.js' import { renderSchema, schemaForDataset } from '../query/schema.js' import { executeQuerySql } from '../query/sql.js' import { runBackfill, runBackfillList, runBackfillPlan, runBackfillProvider } from '../commands/backfill.js' @@ -96,7 +96,7 @@ function buildCoreCommands() { { name: 'query sql', summary: 'Run a SQL query against registered datasets', - usage: 'hyp query sql [--refresh ] [--format ]', + usage: 'hyp query sql [--refresh ] [--format ] [--output ] [--max-cell ] [--max-bytes ]', run: runQuerySql, }, { @@ -759,7 +759,27 @@ async function runQuerySql(argv, ctx) { for (const message of result.freshnessMessages ?? []) { ctx.stderr.write(`${message}\n`) } - ctx.stdout.write(renderResult({ columns: result.columns, rows: result.rows }, parsed.format)) + const full = { columns: result.columns, rows: result.rows } + + // Spill mode: write the full, un-capped result to a file and print + // only a compact receipt to stdout, so a large result set never + // lands in the caller's context. Context controls do not apply to + // the file — it is the lossless escape hatch. + if (parsed.output) { + await fs.writeFile(parsed.output, renderResult(full, parsed.format)) + ctx.stdout.write(renderSpillReceipt(parsed.output, full, parsed.format)) + return 0 + } + + // Inline mode: bound the context footprint (cell truncation + row + // budget), render the capped result to stdout, and route the "rows + // withheld" notice to stderr so stdout stays valid in every format. + const { result: capped, notice } = applyContextControls(full, { + maxCell: parsed.maxCell, + maxBytes: parsed.maxBytes, + }) + if (notice) ctx.stderr.write(`${notice}\n`) + ctx.stdout.write(renderResult(capped, parsed.format)) return 0 } catch (err) { const message = err instanceof Error ? err.message : String(err) @@ -812,20 +832,43 @@ async function runQueryRefresh(argv, ctx) { } +/** + * Default per-cell truncation cap (code points) for inline output. Keeps + * fat JSON/text columns to a peek while leaving scalar columns whole. + */ +export const DEFAULT_QUERY_MAX_CELL = 200 + +/** + * Default context byte budget for inline output. Bounds the total result + * a query can push into a caller's context; `--output` or `--max-bytes 0` + * lift it. + */ +export const DEFAULT_QUERY_MAX_BYTES = 32_768 + /** * Parse the `hyp query sql` argv tail. Accepts the positional SQL string and - * `--refresh` / `--format` flags in any order. + * `--refresh` / `--format` / `--output` / `--max-cell` / `--max-bytes` + * flags in any order. + * + * `--output ` spills the full result to a file and prints a receipt; + * `--max-cell ` caps each string cell (0 = off); `--max-bytes ` caps + * the inline byte budget (0 = off). The cap flags are ignored under + * `--output`, which is always lossless. * * @param {string[]} argv - * @returns {{ ok: true, sql: string, refresh: RefreshMode, format: QueryFormat } | { ok: false, error: string }} + * @returns {{ ok: true, sql: string, refresh: RefreshMode, format: QueryFormat, output: string | undefined, maxCell: number, maxBytes: number } | { ok: false, error: string }} */ -function parseQuerySqlArgv(argv) { +export function parseQuerySqlArgv(argv) { /** @type {string[]} */ const positional = [] /** @type {RefreshMode} */ let refresh = 'auto' /** @type {QueryFormat} */ let format = 'table' + /** @type {string | undefined} */ + let output + let maxCell = DEFAULT_QUERY_MAX_CELL + let maxBytes = DEFAULT_QUERY_MAX_BYTES for (let i = 0; i < argv.length; i += 1) { const token = argv[i] @@ -843,16 +886,61 @@ function parseQuerySqlArgv(argv) { } format = value i += 1 + } else if (token === '--output' || token === '-o') { + const value = argv[i + 1] + if (value === undefined || value.startsWith('--')) { + return { ok: false, error: 'hyp query sql: --output expects a file path' } + } + output = value + i += 1 + } else if (token === '--max-cell' || token === '--max-bytes') { + const value = argv[i + 1] + const n = Number(value) + if (value === undefined || !Number.isInteger(n) || n < 0) { + return { ok: false, error: `hyp query sql: ${token} expects a non-negative integer (got ${value ?? ''})` } + } + if (token === '--max-cell') maxCell = n + else maxBytes = n + i += 1 } else { positional.push(token) } } if (positional.length === 0) { - return { ok: false, error: 'usage: hyp query sql [--refresh ] [--format ]' } + return { ok: false, error: 'usage: hyp query sql [--refresh ] [--format ] [--output ] [--max-cell ] [--max-bytes ]' } } const sql = positional.join(' ') - return { ok: true, sql, refresh, format } + return { ok: true, sql, refresh, format, output, maxCell, maxBytes } +} + +/** + * Render the stdout receipt for `--output` spill mode: where the full + * result went, its shape, and a small truncated preview so the caller + * can sanity-check without ingesting the file. + * + * @param {string} outputPath + * @param {{ columns: string[], rows: Record[] }} full + * @param {QueryFormat} format + * @returns {string} + */ +function renderSpillReceipt(outputPath, full, format) { + const bytes = Buffer.byteLength(renderResult(full, format)) + const cols = full.columns.length > 0 ? full.columns : Object.keys(full.rows[0] ?? {}) + const lines = [ + `wrote ${full.rows.length} rows · ${cols.length} cols · ${bytes}B → ${outputPath}`, + ] + if (cols.length > 0) lines.push(`schema: ${cols.join(', ')}`) + const previewRows = full.rows.slice(0, 3) + if (previewRows.length > 0) { + const { result: preview } = applyContextControls( + { columns: full.columns, rows: previewRows }, + { maxCell: 80, maxBytes: 0 } + ) + lines.push(`preview (first ${previewRows.length}, cells clipped):`) + lines.push(renderResult(preview, 'jsonl').trimEnd()) + } + return lines.join('\n') + '\n' } /** diff --git a/test/core/query-sql-argv.test.js b/test/core/query-sql-argv.test.js new file mode 100644 index 0000000..666bee8 --- /dev/null +++ b/test/core/query-sql-argv.test.js @@ -0,0 +1,59 @@ +// @ts-check + +import test from 'node:test' +import assert from 'node:assert/strict' + +import { + DEFAULT_QUERY_MAX_BYTES, + DEFAULT_QUERY_MAX_CELL, + parseQuerySqlArgv, +} from '../../src/core/cli/core_commands.js' + +/** @param {ReturnType} p */ +const ok = (p) => { + assert.equal(p.ok, true) + return /** @type {Extract} */ (p) +} + +test('defaults: cell + byte caps on, no output, table format', () => { + const p = ok(parseQuerySqlArgv(['SELECT 1'])) + assert.equal(p.sql, 'SELECT 1') + assert.equal(p.format, 'table') + assert.equal(p.output, undefined) + assert.equal(p.maxCell, DEFAULT_QUERY_MAX_CELL) + assert.equal(p.maxBytes, DEFAULT_QUERY_MAX_BYTES) +}) + +test('--output / -o capture a path', () => { + assert.equal(ok(parseQuerySqlArgv(['SELECT 1', '--output', '/tmp/x.jsonl'])).output, '/tmp/x.jsonl') + assert.equal(ok(parseQuerySqlArgv(['SELECT 1', '-o', '/tmp/y.jsonl'])).output, '/tmp/y.jsonl') +}) + +test('--max-cell / --max-bytes override, including 0 to disable', () => { + const p = ok(parseQuerySqlArgv(['SELECT 1', '--max-cell', '0', '--max-bytes', '500'])) + assert.equal(p.maxCell, 0) + assert.equal(p.maxBytes, 500) +}) + +test('flags compose in any order with multi-token SQL', () => { + const p = ok(parseQuerySqlArgv(['--format', 'json', 'SELECT', 'a,', 'b', '--max-cell', '120'])) + assert.equal(p.sql, 'SELECT a, b') + assert.equal(p.format, 'json') + assert.equal(p.maxCell, 120) +}) + +test('--output without a value is rejected', () => { + const p = parseQuerySqlArgv(['SELECT 1', '--output']) + assert.equal(p.ok, false) +}) + +test('--max-cell rejects negative and non-integer values', () => { + assert.equal(parseQuerySqlArgv(['SELECT 1', '--max-cell', '-5']).ok, false) + assert.equal(parseQuerySqlArgv(['SELECT 1', '--max-bytes', 'abc']).ok, false) +}) + +test('missing SQL is a usage error mentioning the new flags', () => { + const p = parseQuerySqlArgv(['--format', 'json']) + assert.equal(p.ok, false) + assert.match(/** @type {Extract} */ (p).error, /--output/) +}) From a099212aea4025cd8163ebb81a10dec604050490 Mon Sep 17 00:00:00 2001 From: Phillip Cunliffe Date: Thu, 4 Jun 2026 15:35:07 -0700 Subject: [PATCH 4/5] fix(query): address dual-review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review feedback on the context-controls change: - docs: the repo-tracked packaged skill docs (plugins-workspace/{claude,codex}/skills/hypaware-query/SKILL.md) still claimed `query sql` is "hard-capped at 100 rows". Replace with the real context-budget + `--output` guidance so packaged agents stop following a false contract. - testability: extract the post-query spill-vs-inline decision into a pure, exported `buildQuerySqlOutput(full, opts)` (runQuerySql now just performs the IO) and cover it directly — lossless spill file + receipt, inline cap application, and notice routed to stderr with stdout still valid JSON. Closes the untested-spill / untested-wiring gaps. - exit drain: move `flushStream` out of bin/hypaware.js into src/core/cli/flush-streams.js (fixes the inline `import('...')` type the style guide bans, and makes it unit-testable) with tests for the drain, EPIPE, and no-double-resolve paths. - honesty: the row budget measures serialized row-data bytes (the jsonl payload), not final rendered size; reword the notice + docstring + skill docs to say so rather than implying an exact rendered-byte ceiling. Co-Authored-By: Claude Opus 4.8 (1M context) --- bin/hypaware.js | 36 +--------- .../claude/skills/hypaware-query/SKILL.md | 4 +- .../codex/skills/hypaware-query/SKILL.md | 4 +- src/core/cli/core_commands.d.ts | 5 ++ src/core/cli/core_commands.js | 57 ++++++++++------ src/core/cli/flush-streams.js | 39 +++++++++++ src/core/query/format.js | 13 ++-- test/core/flush-streams.test.js | 59 ++++++++++++++++ test/core/query-sql-output.test.js | 67 +++++++++++++++++++ 9 files changed, 223 insertions(+), 61 deletions(-) create mode 100644 src/core/cli/flush-streams.js create mode 100644 test/core/flush-streams.test.js create mode 100644 test/core/query-sql-output.test.js diff --git a/bin/hypaware.js b/bin/hypaware.js index 57ad9cb..7c85f07 100755 --- a/bin/hypaware.js +++ b/bin/hypaware.js @@ -37,6 +37,7 @@ if (argv[0] === '__smoke_internal') { const { dispatch } = await import('../src/core/cli/dispatch.js') const { installObservability } = await import('../src/core/observability/index.js') +const { flushStream } = await import('../src/core/cli/flush-streams.js') const obs = installObservability() let exitCode = 1 @@ -50,38 +51,7 @@ try { await obs.shutdown() } -// `process.exit()` terminates synchronously and drops whatever is still -// buffered in stdout/stderr — for a pipe that means output past the -// ~64KiB pipe buffer is silently truncated (large `query sql` results, -// JSON dumps). Flush both streams before exiting so every byte reaches -// the OS first. (Writing to a file never hit this because file writes -// complete synchronously.) +// Flush stdout/stderr before exiting: `process.exit()` is synchronous and +// would drop output still buffered in a pipe (the >64KiB truncation). await Promise.all([flushStream(process.stdout), flushStream(process.stderr)]) process.exit(exitCode) - -/** - * Resolve once a writable stream has drained its buffered output. Resolves - * immediately when nothing is pending, and on `error` (e.g. EPIPE when the - * reader has gone away) so exit is never blocked. - * - * @param {import('node:stream').Writable} stream - * @returns {Promise} - */ -function flushStream(stream) { - return new Promise((resolve) => { - if (stream.writableLength === 0) { - resolve() - return - } - let done = false - const finish = () => { - if (done) return - done = true - resolve() - } - stream.once('error', finish) - // The write callback fires after this (empty) chunk and all preceding - // buffered writes have been handed to the OS. - stream.write('', finish) - }) -} diff --git a/hypaware-core/plugins-workspace/claude/skills/hypaware-query/SKILL.md b/hypaware-core/plugins-workspace/claude/skills/hypaware-query/SKILL.md index a4780df..87ccdb0 100644 --- a/hypaware-core/plugins-workspace/claude/skills/hypaware-query/SKILL.md +++ b/hypaware-core/plugins-workspace/claude/skills/hypaware-query/SKILL.md @@ -15,7 +15,7 @@ Use `hyp query` to inspect local HypAware recordings. It reads local JSONL recor - **Stale partitions are queried by default** and the CLI prints a `warning: query cache last refreshed at …` line to stderr. Read stderr alongside stdout, and surface the refresh timestamp to the user so they know the cache may not include newer source rows. Prefer the file-targeted `hyp query refresh ` command the CLI prints when updating cache data; use `--refresh always` only when the query should refresh before it runs. - **Missing partitions still error.** Run the exact `hyp query refresh …` command the CLI prints, or rerun the target query with `--refresh always`. - Broad manual refreshes are explicit: `hyp query refresh --all [dataset]`. Do not run a broad refresh when the printed file-targeted command is enough. -4. Prefer structured output for analysis: use `--format json` for follow-up reasoning and `--format markdown` when showing a table to the user. Query output is hard-capped at 100 rows. +4. Prefer structured output for analysis: use `--format json` for follow-up reasoning and `--format markdown` when showing a table to the user. Inline output is context-budgeted, not row-capped: each string cell is truncated to ~200 code points (a `…(+N)` marker shows how much was elided) and rows are dropped once a row-data byte budget (~32KB) is hit, with a `notice: showing X of Y rows …` line on stderr. To get a full, untruncated result, spill it to a file with `--output ` (prints only a receipt to stdout — the data never floods context) and post-process the file. Override the caps with `--max-cell ` / `--max-bytes ` (`0` disables either). 5. Use high-level query commands before custom SQL. Switch to `hyp query sql` only when the built-in commands cannot answer the question. 6. For unfamiliar SQL tables, run `hyp query schema --format json` before querying. @@ -56,4 +56,4 @@ Run `hyp query schema ai_gateway_messages --format markdown` for the authoritati - Do not assume the cache auto-refreshes. Query commands default to `--refresh never`. - Always read stderr. A successful exit code does not mean the cache is current. - Keep SQL read-only and use only query tables from `hyp query catalog`. -- `hyp query sql` never returns more than 100 rows. Use aggregates or filters for completeness. +- `hyp query sql` inline output is context-budgeted (cells truncated to ~200 chars, rows dropped past a ~32KB row-data budget) and emits a `notice:` on stderr when it withholds rows — it is not a fixed row cap. Prefer aggregates/filters for analysis; use `--output ` for a complete, untruncated result and read it back from the file rather than from stdout. diff --git a/hypaware-core/plugins-workspace/codex/skills/hypaware-query/SKILL.md b/hypaware-core/plugins-workspace/codex/skills/hypaware-query/SKILL.md index a4780df..87ccdb0 100644 --- a/hypaware-core/plugins-workspace/codex/skills/hypaware-query/SKILL.md +++ b/hypaware-core/plugins-workspace/codex/skills/hypaware-query/SKILL.md @@ -15,7 +15,7 @@ Use `hyp query` to inspect local HypAware recordings. It reads local JSONL recor - **Stale partitions are queried by default** and the CLI prints a `warning: query cache last refreshed at …` line to stderr. Read stderr alongside stdout, and surface the refresh timestamp to the user so they know the cache may not include newer source rows. Prefer the file-targeted `hyp query refresh ` command the CLI prints when updating cache data; use `--refresh always` only when the query should refresh before it runs. - **Missing partitions still error.** Run the exact `hyp query refresh …` command the CLI prints, or rerun the target query with `--refresh always`. - Broad manual refreshes are explicit: `hyp query refresh --all [dataset]`. Do not run a broad refresh when the printed file-targeted command is enough. -4. Prefer structured output for analysis: use `--format json` for follow-up reasoning and `--format markdown` when showing a table to the user. Query output is hard-capped at 100 rows. +4. Prefer structured output for analysis: use `--format json` for follow-up reasoning and `--format markdown` when showing a table to the user. Inline output is context-budgeted, not row-capped: each string cell is truncated to ~200 code points (a `…(+N)` marker shows how much was elided) and rows are dropped once a row-data byte budget (~32KB) is hit, with a `notice: showing X of Y rows …` line on stderr. To get a full, untruncated result, spill it to a file with `--output ` (prints only a receipt to stdout — the data never floods context) and post-process the file. Override the caps with `--max-cell ` / `--max-bytes ` (`0` disables either). 5. Use high-level query commands before custom SQL. Switch to `hyp query sql` only when the built-in commands cannot answer the question. 6. For unfamiliar SQL tables, run `hyp query schema
--format json` before querying. @@ -56,4 +56,4 @@ Run `hyp query schema ai_gateway_messages --format markdown` for the authoritati - Do not assume the cache auto-refreshes. Query commands default to `--refresh never`. - Always read stderr. A successful exit code does not mean the cache is current. - Keep SQL read-only and use only query tables from `hyp query catalog`. -- `hyp query sql` never returns more than 100 rows. Use aggregates or filters for completeness. +- `hyp query sql` inline output is context-budgeted (cells truncated to ~200 chars, rows dropped past a ~32KB row-data budget) and emits a `notice:` on stderr when it withholds rows — it is not a fixed row cap. Prefer aggregates/filters for analysis; use `--output ` for a complete, untruncated result and read it back from the file rather than from stdout. diff --git a/src/core/cli/core_commands.d.ts b/src/core/cli/core_commands.d.ts index d984990..c8f3be8 100644 --- a/src/core/cli/core_commands.d.ts +++ b/src/core/cli/core_commands.d.ts @@ -26,3 +26,8 @@ export declare function parseQuerySqlArgv( maxBytes: number } | { ok: false; error: string } + +export declare function buildQuerySqlOutput( + full: { columns: string[]; rows: Record[] }, + opts: { format: QueryFormat; output: string | undefined; maxCell: number; maxBytes: number } +): { stdout: string; stderr: string; file?: { path: string; content: string } } diff --git a/src/core/cli/core_commands.js b/src/core/cli/core_commands.js index 1cc1cc9..eeb1e64 100644 --- a/src/core/cli/core_commands.js +++ b/src/core/cli/core_commands.js @@ -759,27 +759,11 @@ async function runQuerySql(argv, ctx) { for (const message of result.freshnessMessages ?? []) { ctx.stderr.write(`${message}\n`) } - const full = { columns: result.columns, rows: result.rows } - - // Spill mode: write the full, un-capped result to a file and print - // only a compact receipt to stdout, so a large result set never - // lands in the caller's context. Context controls do not apply to - // the file — it is the lossless escape hatch. - if (parsed.output) { - await fs.writeFile(parsed.output, renderResult(full, parsed.format)) - ctx.stdout.write(renderSpillReceipt(parsed.output, full, parsed.format)) - return 0 - } - // Inline mode: bound the context footprint (cell truncation + row - // budget), render the capped result to stdout, and route the "rows - // withheld" notice to stderr so stdout stays valid in every format. - const { result: capped, notice } = applyContextControls(full, { - maxCell: parsed.maxCell, - maxBytes: parsed.maxBytes, - }) - if (notice) ctx.stderr.write(`${notice}\n`) - ctx.stdout.write(renderResult(capped, parsed.format)) + const out = buildQuerySqlOutput({ columns: result.columns, rows: result.rows }, parsed) + if (out.file) await fs.writeFile(out.file.path, out.file.content) + if (out.stderr) ctx.stderr.write(out.stderr) + ctx.stdout.write(out.stdout) return 0 } catch (err) { const message = err instanceof Error ? err.message : String(err) @@ -914,6 +898,39 @@ export function parseQuerySqlArgv(argv) { return { ok: true, sql, refresh, format, output, maxCell, maxBytes } } +/** + * Decide what `hyp query sql` emits for a completed result, without doing + * any IO — so the spill-vs-inline behavior is unit-testable. The caller + * (`runQuerySql`) performs the actual file write and stream writes. + * + * - Spill mode (`output` set): the full, un-capped result is rendered for + * the file (lossless), and stdout gets only a compact receipt. + * - Inline mode: context controls cap the result; stdout gets the capped + * render and the "rows withheld" notice (if any) goes to stderr, so + * stdout stays valid in every format. + * + * @param {{ columns: string[], rows: Record[] }} full + * @param {{ format: QueryFormat, output: string | undefined, maxCell: number, maxBytes: number }} opts + * @returns {{ stdout: string, stderr: string, file?: { path: string, content: string } }} + */ +export function buildQuerySqlOutput(full, opts) { + if (opts.output) { + return { + stdout: renderSpillReceipt(opts.output, full, opts.format), + stderr: '', + file: { path: opts.output, content: renderResult(full, opts.format) }, + } + } + const { result: capped, notice } = applyContextControls(full, { + maxCell: opts.maxCell, + maxBytes: opts.maxBytes, + }) + return { + stdout: renderResult(capped, opts.format), + stderr: notice ? `${notice}\n` : '', + } +} + /** * Render the stdout receipt for `--output` spill mode: where the full * result went, its shape, and a small truncated preview so the caller diff --git a/src/core/cli/flush-streams.js b/src/core/cli/flush-streams.js new file mode 100644 index 0000000..8778989 --- /dev/null +++ b/src/core/cli/flush-streams.js @@ -0,0 +1,39 @@ +// @ts-check + +/** + * @import { Writable } from 'node:stream' + */ + +/** + * Resolve once a writable stream has drained its buffered output. Resolves + * immediately when nothing is pending, and on `error` (e.g. EPIPE when the + * reader has gone away) so a caller awaiting it before `process.exit` is + * never blocked. + * + * `process.exit()` terminates synchronously and drops whatever is still + * buffered in stdout/stderr — for a pipe that means output past the ~64KiB + * pipe buffer is silently truncated. Awaiting this on stdout/stderr before + * exiting guarantees every byte reached the OS first. (Writing to a file + * never hit this because file writes complete synchronously.) + * + * @param {Writable} stream + * @returns {Promise} + */ +export function flushStream(stream) { + return new Promise((resolve) => { + if (stream.writableLength === 0) { + resolve() + return + } + let done = false + const finish = () => { + if (done) return + done = true + resolve() + } + stream.once('error', finish) + // The write callback fires after this (empty) chunk and all preceding + // buffered writes have been handed to the OS. + stream.write('', finish) + }) +} diff --git a/src/core/query/format.js b/src/core/query/format.js index f9af8fe..f74fbe8 100644 --- a/src/core/query/format.js +++ b/src/core/query/format.js @@ -14,8 +14,13 @@ * numbers/booleans/null pass through unchanged, so `--format json` * output stays valid and same-typed. * 2. row budget — drop trailing rows once the cumulative serialized - * size exceeds `maxBytes`, so a wide or long result cannot flood - * the caller's context. At least one row is always kept. + * *row-data* size (compact JSON per row, i.e. the jsonl payload) + * exceeds `maxBytes`, so a wide or long result cannot flood the + * caller's context. This measures the underlying data — the + * dominant, format-independent context cost — not the final + * rendered output, which adds modest per-format overhead (JSON + * array syntax and indentation, table padding, markdown escaping). + * At least one row is always kept. * * Returns the capped result plus an optional one-line `notice` (meant * for stderr, so it never corrupts stdout output) naming exactly what @@ -49,8 +54,8 @@ export function applyContextControls(result, controls) { const dropped = truncated.length - kept.length const notice = dropped > 0 - ? `notice: showing ${kept.length} of ${truncated.length} rows (${maxBytes}B context budget); ` + - `use --output for the full result, --max-bytes 0 to disable, or aggregate/LIMIT` + ? `notice: showing ${kept.length} of ${truncated.length} rows (${maxBytes}B row-data budget; ` + + `rendered output may be larger); use --output for the full result, --max-bytes 0 to disable, or aggregate/LIMIT` : undefined return { result: { columns: result.columns, rows: kept }, notice } } diff --git a/test/core/flush-streams.test.js b/test/core/flush-streams.test.js new file mode 100644 index 0000000..ae5cbdd --- /dev/null +++ b/test/core/flush-streams.test.js @@ -0,0 +1,59 @@ +// @ts-check + +import test from 'node:test' +import assert from 'node:assert/strict' + +import { flushStream } from '../../src/core/cli/flush-streams.js' + +/** A minimal Writable-shaped stub exposing only what flushStream touches. */ +function fakeStream({ writableLength, mode }) { + /** @type {Record} */ + const handlers = {} + return { + writableLength, + once(event, cb) { + handlers[event] = cb + }, + write(_chunk, cb) { + if (mode === 'drain') queueMicrotask(() => cb()) + else if (mode === 'error') queueMicrotask(() => handlers.error?.()) + // mode 'hang' never invokes either — used with a timeout race + }, + } +} + +test('resolves immediately when nothing is buffered', async () => { + let resolved = false + await flushStream(/** @type {any} */ (fakeStream({ writableLength: 0, mode: 'hang' }))) + .then(() => { resolved = true }) + assert.equal(resolved, true) +}) + +test('resolves via the write callback once buffered output drains', async () => { + await flushStream(/** @type {any} */ (fakeStream({ writableLength: 42, mode: 'drain' }))) + assert.ok(true) // resolving (not hanging) is the assertion +}) + +test('resolves on error (EPIPE) instead of hanging', async () => { + await flushStream(/** @type {any} */ (fakeStream({ writableLength: 42, mode: 'error' }))) + assert.ok(true) +}) + +test('does not double-resolve when both error and write callback fire', async () => { + /** @type {Record} */ + const handlers = {} + let resolveCount = 0 + const stream = { + writableLength: 10, + once(event, cb) { handlers[event] = cb }, + write(_chunk, cb) { + // fire both paths; the `done` guard must collapse them to one resolve + handlers.error?.() + cb() + }, + } + await flushStream(/** @type {any} */ (stream)).then(() => { resolveCount += 1 }) + // microtask queue drains; a double-resolve on a Promise is a no-op but we + // assert the awaited value settled exactly once observationally. + assert.equal(resolveCount, 1) +}) diff --git a/test/core/query-sql-output.test.js b/test/core/query-sql-output.test.js new file mode 100644 index 0000000..0a30154 --- /dev/null +++ b/test/core/query-sql-output.test.js @@ -0,0 +1,67 @@ +// @ts-check + +import test from 'node:test' +import assert from 'node:assert/strict' + +import { buildQuerySqlOutput } from '../../src/core/cli/core_commands.js' + +/** @type {Parameters[1]} */ +const baseOpts = { format: 'json', output: undefined, maxCell: 200, maxBytes: 32_768 } +/** @param {Record[]} rows */ +const set = (rows) => ({ columns: rows[0] ? Object.keys(rows[0]) : [], rows }) + +test('spill mode: file content is the full lossless result, stdout is a receipt', () => { + const long = 'x'.repeat(500) + const full = set([{ id: 1, content: long }, { id: 2, content: 'short' }]) + const out = buildQuerySqlOutput(full, { ...baseOpts, format: 'jsonl', output: '/tmp/spill.jsonl' }) + + assert.ok(out.file) + assert.equal(out.file.path, '/tmp/spill.jsonl') + // Lossless: the long cell survives in full, with no truncation marker. + const fileRows = out.file.content.trim().split('\n').map((l) => JSON.parse(l)) + assert.equal(fileRows.length, 2) + assert.equal(fileRows[0].content, long) + assert.doesNotMatch(out.file.content, /…\(\+/) + // Receipt on stdout names the shape; stderr stays empty. + assert.match(out.stdout, /wrote 2 rows · 2 cols · \d+B → \/tmp\/spill\.jsonl/) + assert.match(out.stdout, /schema: id, content/) + assert.match(out.stdout, /preview \(first 2, cells clipped\):/) + assert.equal(out.stderr, '') +}) + +test('spill receipt preview clips cells even though the file does not', () => { + const out = buildQuerySqlOutput(set([{ c: 'y'.repeat(300) }]), { + ...baseOpts, + format: 'jsonl', + output: '/tmp/x.jsonl', + }) + assert.match(out.stdout, /…\(\+/) // preview clipped + assert.doesNotMatch(out.file?.content ?? '', /…\(\+/) // file intact +}) + +test('inline mode: small result renders in full to stdout, no notice', () => { + const full = set([{ a: 1, b: 'hi' }]) + const out = buildQuerySqlOutput(full, baseOpts) + assert.equal(out.stderr, '') + assert.equal(out.file, undefined) + assert.deepEqual(JSON.parse(out.stdout), [{ a: 1, b: 'hi' }]) +}) + +test('inline mode: over-budget result caps rows, stdout stays valid JSON, notice to stderr', () => { + const rows = Array.from({ length: 500 }, (_, i) => ({ i, blob: 'b'.repeat(200) })) + const out = buildQuerySqlOutput(set(rows), { ...baseOpts, maxBytes: 4_000 }) + + const parsed = JSON.parse(out.stdout) // must be valid despite capping + assert.ok(parsed.length < 500) + assert.ok(parsed.length >= 1) + assert.match(out.stderr, /notice: showing \d+ of 500 rows/) + assert.match(out.stderr, /--output/) + assert.equal(out.file, undefined) +}) + +test('inline mode: long cells are truncated in stdout with a marker', () => { + const out = buildQuerySqlOutput(set([{ c: 'z'.repeat(400) }]), baseOpts) + const parsed = JSON.parse(out.stdout) + assert.equal(parsed[0].c, 'z'.repeat(200) + '…(+200)') + assert.equal(out.stderr, '') // one small row, under budget +}) From 197b9e5bb4d19ee378c05af06f561343149b83f6 Mon Sep 17 00:00:00 2001 From: Phillip Cunliffe Date: Thu, 4 Jun 2026 16:12:42 -0700 Subject: [PATCH 5/5] perf(query): fuse truncation into budget loop + single-render spill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address two efficiency findings from re-review (both hit the large-result paths these features target): - applyContextControls truncated *every* returned row before the byte budget dropped the trailing ones, paying CPU/allocation proportional to the full result even though only ~32KB is emitted. Truncate lazily inside the budget loop instead — one source row at a time, stop at the cutoff. Output is byte-identical; a landmine-row test pins that rows past the budget are never touched. - buildQuerySqlOutput rendered the full result twice in spill mode (once to size the receipt, once for file content), doubling serialization and peak string memory on exactly the dumps --output exists for. Render once and reuse the string; a test asserts the receipt byte count equals the file content length. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/core/cli/core_commands.js | 14 +++++++++----- src/core/query/format.js | 22 +++++++++++++--------- test/core/query-context-controls.test.js | 17 +++++++++++++++++ test/core/query-sql-output.test.js | 2 ++ 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/core/cli/core_commands.js b/src/core/cli/core_commands.js index eeb1e64..3ed32a4 100644 --- a/src/core/cli/core_commands.js +++ b/src/core/cli/core_commands.js @@ -915,10 +915,14 @@ export function parseQuerySqlArgv(argv) { */ export function buildQuerySqlOutput(full, opts) { if (opts.output) { + // Render the file content once and reuse it for both the file and the + // receipt's byte count — large dumps are exactly the `--output` case, + // so a second full serialization is wasted work and peak memory. + const content = renderResult(full, opts.format) return { - stdout: renderSpillReceipt(opts.output, full, opts.format), + stdout: renderSpillReceipt(opts.output, full, content), stderr: '', - file: { path: opts.output, content: renderResult(full, opts.format) }, + file: { path: opts.output, content }, } } const { result: capped, notice } = applyContextControls(full, { @@ -938,11 +942,11 @@ export function buildQuerySqlOutput(full, opts) { * * @param {string} outputPath * @param {{ columns: string[], rows: Record[] }} full - * @param {QueryFormat} format + * @param {string} content the already-rendered file content (sized for the receipt) * @returns {string} */ -function renderSpillReceipt(outputPath, full, format) { - const bytes = Buffer.byteLength(renderResult(full, format)) +function renderSpillReceipt(outputPath, full, content) { + const bytes = Buffer.byteLength(content) const cols = full.columns.length > 0 ? full.columns : Object.keys(full.rows[0] ?? {}) const lines = [ `wrote ${full.rows.length} rows · ${cols.length} cols · ${bytes}B → ${outputPath}`, diff --git a/src/core/query/format.js b/src/core/query/format.js index f74fbe8..f969727 100644 --- a/src/core/query/format.js +++ b/src/core/query/format.js @@ -34,27 +34,31 @@ export function applyContextControls(result, controls) { const maxCell = controls.maxCell > 0 ? controls.maxCell : 0 const maxBytes = controls.maxBytes > 0 ? controls.maxBytes : 0 + const clip = (/** @type {Record} */ row) => (maxCell ? truncateRow(row, maxCell) : row) - const truncated = maxCell ? result.rows.map((row) => truncateRow(row, maxCell)) : result.rows - + // No budget: every row is emitted, so all of them must be truncated. if (!maxBytes) { - return { result: { columns: result.columns, rows: truncated }, notice: undefined } + const rows = maxCell ? result.rows.map(clip) : result.rows + return { result: { columns: result.columns, rows }, notice: undefined } } + // Budget: truncate lazily, one source row at a time, and stop as soon as + // the budget is exceeded — so a broad query never pays to truncate rows + // that would be dropped anyway. Always emit at least one row. /** @type {Record[]} */ const kept = [] let bytes = 0 - for (const row of truncated) { - bytes += rowBytes(row) - // Always emit at least one row; stop once the budget is exceeded. + for (const row of result.rows) { + const clipped = clip(row) + bytes += rowBytes(clipped) if (bytes > maxBytes && kept.length > 0) break - kept.push(row) + kept.push(clipped) } - const dropped = truncated.length - kept.length + const dropped = result.rows.length - kept.length const notice = dropped > 0 - ? `notice: showing ${kept.length} of ${truncated.length} rows (${maxBytes}B row-data budget; ` + + ? `notice: showing ${kept.length} of ${result.rows.length} rows (${maxBytes}B row-data budget; ` + `rendered output may be larger); use --output for the full result, --max-bytes 0 to disable, or aggregate/LIMIT` : undefined return { result: { columns: result.columns, rows: kept }, notice } diff --git a/test/core/query-context-controls.test.js b/test/core/query-context-controls.test.js index fa53525..dda8126 100644 --- a/test/core/query-context-controls.test.js +++ b/test/core/query-context-controls.test.js @@ -73,3 +73,20 @@ test('input result is not mutated', () => { assert.equal(input.rows[0].c, 'x'.repeat(300)) assert.equal(input.rows.length, 1) }) + +test('truncation is lazy: rows past the budget are never touched', () => { + // A row whose field throws on access — clipping or serializing it would + // throw. It sits past the cutoff (row 0 fills the budget, row 1 triggers + // the break), so a lazy implementation must never reach row 2. + const r0 = { a: 'small' } + const r1 = { a: 'small' } + const r2 = { a: 'small' } + Object.defineProperty(r2, 'boom', { + enumerable: true, + get() { throw new Error('row past budget was truncated') }, + }) + // Row 0 alone exceeds the 3-byte budget, so only it is kept. + const { result } = applyContextControls({ columns: ['a'], rows: [r0, r1, r2] }, { maxCell: 10, maxBytes: 3 }) + assert.equal(result.rows.length, 1) + assert.equal(result.rows[0].a, 'small') +}) diff --git a/test/core/query-sql-output.test.js b/test/core/query-sql-output.test.js index 0a30154..40216ba 100644 --- a/test/core/query-sql-output.test.js +++ b/test/core/query-sql-output.test.js @@ -24,6 +24,8 @@ test('spill mode: file content is the full lossless result, stdout is a receipt' assert.doesNotMatch(out.file.content, /…\(\+/) // Receipt on stdout names the shape; stderr stays empty. assert.match(out.stdout, /wrote 2 rows · 2 cols · \d+B → \/tmp\/spill\.jsonl/) + // The receipt's byte count reflects the actual file content (single render). + assert.match(out.stdout, new RegExp(`· ${Buffer.byteLength(out.file.content)}B →`)) assert.match(out.stdout, /schema: id, content/) assert.match(out.stdout, /preview \(first 2, cells clipped\):/) assert.equal(out.stderr, '')