hyparam · philcunliffe · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/bin/hypaware.js b/bin/hypaware.js
@@ -37,6 +37,7 @@ if (argv[0] === '__smoke_internal') {
 
 const { dispatch } = await import('../src/core/cli/dispatch.js')
 const { installObservability } = await import('../src/core/observability/index.js')
+const { flushStream } = await import('../src/core/cli/flush-streams.js')
 
 const obs = installObservability()
 let exitCode = 1
@@ -50,4 +51,7 @@ try {
   await obs.shutdown()
 }
 
+// Flush stdout/stderr before exiting: `process.exit()` is synchronous and
+// would drop output still buffered in a pipe (the >64KiB truncation).
+await Promise.all([flushStream(process.stdout), flushStream(process.stderr)])
 process.exit(exitCode)
diff --git a/hypaware-core/plugins-workspace/claude/skills/hypaware-query/SKILL.md b/hypaware-core/plugins-workspace/claude/skills/hypaware-query/SKILL.md
@@ -15,7 +15,7 @@ Use `hyp query` to inspect local HypAware recordings. It reads local JSONL recor
    - **Stale partitions are queried by default** and the CLI prints a `warning: query cache last refreshed at …` line to stderr. Read stderr alongside stdout, and surface the refresh timestamp to the user so they know the cache may not include newer source rows. Prefer the file-targeted `hyp query refresh <file.jsonl>` command the CLI prints when updating cache data; use `--refresh always` only when the query should refresh before it runs.
    - **Missing partitions still error.** Run the exact `hyp query refresh …` command the CLI prints, or rerun the target query with `--refresh always`.
    - Broad manual refreshes are explicit: `hyp query refresh --all [dataset]`. Do not run a broad refresh when the printed file-targeted command is enough.
-4. Prefer structured output for analysis: use `--format json` for follow-up reasoning and `--format markdown` when showing a table to the user. Query output is hard-capped at 100 rows.
+4. Prefer structured output for analysis: use `--format json` for follow-up reasoning and `--format markdown` when showing a table to the user. Inline output is context-budgeted, not row-capped: each string cell is truncated to ~200 code points (a `…(+N)` marker shows how much was elided) and rows are dropped once a row-data byte budget (~32KB) is hit, with a `notice: showing X of Y rows …` line on stderr. To get a full, untruncated result, spill it to a file with `--output <file>` (prints only a receipt to stdout — the data never floods context) and post-process the file. Override the caps with `--max-cell <n>` / `--max-bytes <n>` (`0` disables either).
 5. Use high-level query commands before custom SQL. Switch to `hyp query sql` only when the built-in commands cannot answer the question.
 6. For unfamiliar SQL tables, run `hyp query schema <table> --format json` before querying.
 
@@ -56,4 +56,4 @@ Run `hyp query schema ai_gateway_messages --format markdown` for the authoritati
 - Do not assume the cache auto-refreshes. Query commands default to `--refresh never`.
 - Always read stderr. A successful exit code does not mean the cache is current.
 - Keep SQL read-only and use only query tables from `hyp query catalog`.
-- `hyp query sql` never returns more than 100 rows. Use aggregates or filters for completeness.
+- `hyp query sql` inline output is context-budgeted (cells truncated to ~200 chars, rows dropped past a ~32KB row-data budget) and emits a `notice:` on stderr when it withholds rows — it is not a fixed row cap. Prefer aggregates/filters for analysis; use `--output <file>` for a complete, untruncated result and read it back from the file rather than from stdout.
diff --git a/hypaware-core/plugins-workspace/codex/skills/hypaware-query/SKILL.md b/hypaware-core/plugins-workspace/codex/skills/hypaware-query/SKILL.md
@@ -15,7 +15,7 @@ Use `hyp query` to inspect local HypAware recordings. It reads local JSONL recor
    - **Stale partitions are queried by default** and the CLI prints a `warning: query cache last refreshed at …` line to stderr. Read stderr alongside stdout, and surface the refresh timestamp to the user so they know the cache may not include newer source rows. Prefer the file-targeted `hyp query refresh <file.jsonl>` command the CLI prints when updating cache data; use `--refresh always` only when the query should refresh before it runs.
    - **Missing partitions still error.** Run the exact `hyp query refresh …` command the CLI prints, or rerun the target query with `--refresh always`.
    - Broad manual refreshes are explicit: `hyp query refresh --all [dataset]`. Do not run a broad refresh when the printed file-targeted command is enough.
-4. Prefer structured output for analysis: use `--format json` for follow-up reasoning and `--format markdown` when showing a table to the user. Query output is hard-capped at 100 rows.
+4. Prefer structured output for analysis: use `--format json` for follow-up reasoning and `--format markdown` when showing a table to the user. Inline output is context-budgeted, not row-capped: each string cell is truncated to ~200 code points (a `…(+N)` marker shows how much was elided) and rows are dropped once a row-data byte budget (~32KB) is hit, with a `notice: showing X of Y rows …` line on stderr. To get a full, untruncated result, spill it to a file with `--output <file>` (prints only a receipt to stdout — the data never floods context) and post-process the file. Override the caps with `--max-cell <n>` / `--max-bytes <n>` (`0` disables either).
 5. Use high-level query commands before custom SQL. Switch to `hyp query sql` only when the built-in commands cannot answer the question.
 6. For unfamiliar SQL tables, run `hyp query schema <table> --format json` before querying.
 
@@ -56,4 +56,4 @@ Run `hyp query schema ai_gateway_messages --format markdown` for the authoritati
 - Do not assume the cache auto-refreshes. Query commands default to `--refresh never`.
 - Always read stderr. A successful exit code does not mean the cache is current.
 - Keep SQL read-only and use only query tables from `hyp query catalog`.
-- `hyp query sql` never returns more than 100 rows. Use aggregates or filters for completeness.
+- `hyp query sql` inline output is context-budgeted (cells truncated to ~200 chars, rows dropped past a ~32KB row-data budget) and emits a `notice:` on stderr when it withholds rows — it is not a fixed row cap. Prefer aggregates/filters for analysis; use `--output <file>` for a complete, untruncated result and read it back from the file rather than from stdout.
diff --git a/src/core/cli/core_commands.d.ts b/src/core/cli/core_commands.d.ts
@@ -1,4 +1,5 @@
 import type { createCommandRegistry } from '../registry/commands.js'
+import type { QueryFormat, RefreshMode } from '../query/types.d.ts'
 import type { InitFlags, PickerExport, PickerExportOrigin } from './types.d.ts'
 
 export declare function registerCoreCommands(
@@ -8,3 +9,25 @@ export declare function registerCoreCommands(
 export declare function resolveInitExportChoice(
   flags: InitFlags
 ): { exportChoice: PickerExport; origin: PickerExportOrigin }
+
+export declare const DEFAULT_QUERY_MAX_CELL: number
+export declare const DEFAULT_QUERY_MAX_BYTES: number
+
+export declare function parseQuerySqlArgv(
+  argv: string[]
+):
+  | {
+      ok: true
+      sql: string
+      refresh: RefreshMode
+      format: QueryFormat
+      output: string | undefined
+      maxCell: number
+      maxBytes: number
+    }
+  | { ok: false; error: string }
+
+export declare function buildQuerySqlOutput(
+  full: { columns: string[]; rows: Record<string, unknown>[] },
+  opts: { format: QueryFormat; output: string | undefined; maxCell: number; maxBytes: number }
+): { stdout: string; stderr: string; file?: { path: string; content: string } }
diff --git a/src/core/cli/core_commands.js b/src/core/cli/core_commands.js
@@ -15,7 +15,7 @@ import { discoverInstalledPlugins } from '../runtime/installed.js'
 import { discoverBundledPlugins } from '../runtime/bundled.js'
 import { buildPluginCatalog } from '../plugin_catalog.js'
 import { collectHypAwareStatus } from '../daemon/status.js'
-import { renderResult } from '../query/format.js'
+import { applyContextControls, renderResult } from '../query/format.js'
 import { renderSchema, schemaForDataset } from '../query/schema.js'
 import { executeQuerySql } from '../query/sql.js'
 import { runBackfill, runBackfillList, runBackfillPlan, runBackfillProvider } from '../commands/backfill.js'
@@ -96,7 +96,7 @@ function buildCoreCommands() {
     {
       name: 'query sql',
       summary: 'Run a SQL query against registered datasets',
-      usage: 'hyp query sql <sql> [--refresh <mode>] [--format <fmt>]',
+      usage: 'hyp query sql <sql> [--refresh <mode>] [--format <fmt>] [--output <file>] [--max-cell <n>] [--max-bytes <n>]',
       run: runQuerySql,
     },
     {
@@ -759,7 +759,11 @@ async function runQuerySql(argv, ctx) {
     for (const message of result.freshnessMessages ?? []) {
       ctx.stderr.write(`${message}\n`)
     }
-    ctx.stdout.write(renderResult({ columns: result.columns, rows: result.rows }, parsed.format))
+
+    const out = buildQuerySqlOutput({ columns: result.columns, rows: result.rows }, parsed)
+    if (out.file) await fs.writeFile(out.file.path, out.file.content)
+    if (out.stderr) ctx.stderr.write(out.stderr)
+    ctx.stdout.write(out.stdout)
     return 0
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err)
@@ -812,20 +816,43 @@ async function runQueryRefresh(argv, ctx) {
 }
 
 
+/**
+ * Default per-cell truncation cap (code points) for inline output. Keeps
+ * fat JSON/text columns to a peek while leaving scalar columns whole.
+ */
+export const DEFAULT_QUERY_MAX_CELL = 200
+
+/**
+ * Default context byte budget for inline output. Bounds the total result
+ * a query can push into a caller's context; `--output` or `--max-bytes 0`
+ * lift it.
+ */
+export const DEFAULT_QUERY_MAX_BYTES = 32_768
+
 /**
  * Parse the `hyp query sql` argv tail. Accepts the positional SQL string and
- * `--refresh` / `--format` flags in any order.
+ * `--refresh` / `--format` / `--output` / `--max-cell` / `--max-bytes`
+ * flags in any order.
+ *
+ * `--output <file>` spills the full result to a file and prints a receipt;
+ * `--max-cell <n>` caps each string cell (0 = off); `--max-bytes <n>` caps
+ * the inline byte budget (0 = off). The cap flags are ignored under
+ * `--output`, which is always lossless.
  *
  * @param {string[]} argv
- * @returns {{ ok: true, sql: string, refresh: RefreshMode, format: QueryFormat } | { ok: false, error: string }}
+ * @returns {{ ok: true, sql: string, refresh: RefreshMode, format: QueryFormat, output: string | undefined, maxCell: number, maxBytes: number } | { ok: false, error: string }}
  */
-function parseQuerySqlArgv(argv) {
+export function parseQuerySqlArgv(argv) {
   /** @type {string[]} */
   const positional = []
   /** @type {RefreshMode} */
   let refresh = 'auto'
   /** @type {QueryFormat} */
   let format = 'table'
+  /** @type {string | undefined} */
+  let output
+  let maxCell = DEFAULT_QUERY_MAX_CELL
+  let maxBytes = DEFAULT_QUERY_MAX_BYTES
 
   for (let i = 0; i < argv.length; i += 1) {
     const token = argv[i]
@@ -843,16 +870,98 @@ function parseQuerySqlArgv(argv) {
       }
       format = value
       i += 1
+    } else if (token === '--output' || token === '-o') {
+      const value = argv[i + 1]
+      if (value === undefined || value.startsWith('--')) {
+        return { ok: false, error: 'hyp query sql: --output expects a file path' }
+      }
+      output = value
+      i += 1
+    } else if (token === '--max-cell' || token === '--max-bytes') {
+      const value = argv[i + 1]
+      const n = Number(value)
+      if (value === undefined || !Number.isInteger(n) || n < 0) {
+        return { ok: false, error: `hyp query sql: ${token} expects a non-negative integer (got ${value ?? '<missing>'})` }
+      }
+      if (token === '--max-cell') maxCell = n
+      else maxBytes = n
+      i += 1
     } else {
       positional.push(token)
     }
   }
 
   if (positional.length === 0) {
-    return { ok: false, error: 'usage: hyp query sql <sql> [--refresh <mode>] [--format <fmt>]' }
+    return { ok: false, error: 'usage: hyp query sql <sql> [--refresh <mode>] [--format <fmt>] [--output <file>] [--max-cell <n>] [--max-bytes <n>]' }
   }
   const sql = positional.join(' ')
-  return { ok: true, sql, refresh, format }
+  return { ok: true, sql, refresh, format, output, maxCell, maxBytes }
+}
+
+/**
+ * Decide what `hyp query sql` emits for a completed result, without doing
+ * any IO — so the spill-vs-inline behavior is unit-testable. The caller
+ * (`runQuerySql`) performs the actual file write and stream writes.
+ *
+ * - Spill mode (`output` set): the full, un-capped result is rendered for
+ *   the file (lossless), and stdout gets only a compact receipt.
+ * - Inline mode: context controls cap the result; stdout gets the capped
+ *   render and the "rows withheld" notice (if any) goes to stderr, so
+ *   stdout stays valid in every format.
+ *
+ * @param {{ columns: string[], rows: Record<string, unknown>[] }} full
+ * @param {{ format: QueryFormat, output: string | undefined, maxCell: number, maxBytes: number }} opts
+ * @returns {{ stdout: string, stderr: string, file?: { path: string, content: string } }}
+ */
+export function buildQuerySqlOutput(full, opts) {
+  if (opts.output) {
+    // Render the file content once and reuse it for both the file and the
+    // receipt's byte count — large dumps are exactly the `--output` case,
+    // so a second full serialization is wasted work and peak memory.
+    const content = renderResult(full, opts.format)
+    return {
+      stdout: renderSpillReceipt(opts.output, full, content),
+      stderr: '',
+      file: { path: opts.output, content },
+    }
+  }
+  const { result: capped, notice } = applyContextControls(full, {
+    maxCell: opts.maxCell,
+    maxBytes: opts.maxBytes,
+  })
+  return {
+    stdout: renderResult(capped, opts.format),
+    stderr: notice ? `${notice}\n` : '',
+  }
+}
+
+/**
+ * Render the stdout receipt for `--output` spill mode: where the full
+ * result went, its shape, and a small truncated preview so the caller
+ * can sanity-check without ingesting the file.
+ *
+ * @param {string} outputPath
+ * @param {{ columns: string[], rows: Record<string, unknown>[] }} full
+ * @param {string} content  the already-rendered file content (sized for the receipt)
+ * @returns {string}
+ */
+function renderSpillReceipt(outputPath, full, content) {
+  const bytes = Buffer.byteLength(content)
+  const cols = full.columns.length > 0 ? full.columns : Object.keys(full.rows[0] ?? {})
+  const lines = [
+    `wrote ${full.rows.length} rows · ${cols.length} cols · ${bytes}B → ${outputPath}`,
+  ]
+  if (cols.length > 0) lines.push(`schema: ${cols.join(', ')}`)
+  const previewRows = full.rows.slice(0, 3)
+  if (previewRows.length > 0) {
+    const { result: preview } = applyContextControls(
+      { columns: full.columns, rows: previewRows },
+      { maxCell: 80, maxBytes: 0 }
+    )
+    lines.push(`preview (first ${previewRows.length}, cells clipped):`)
+    lines.push(renderResult(preview, 'jsonl').trimEnd())
+  }
+  return lines.join('\n') + '\n'
 }
 
 /**

diff --git a/src/core/cli/flush-streams.js b/src/core/cli/flush-streams.js
@@ -0,0 +1,39 @@
+// @ts-check
+
+/**
+ * @import { Writable } from 'node:stream'
+ */
+
+/**
+ * Resolve once a writable stream has drained its buffered output. Resolves
+ * immediately when nothing is pending, and on `error` (e.g. EPIPE when the
+ * reader has gone away) so a caller awaiting it before `process.exit` is
+ * never blocked.
+ *
+ * `process.exit()` terminates synchronously and drops whatever is still
+ * buffered in stdout/stderr — for a pipe that means output past the ~64KiB
+ * pipe buffer is silently truncated. Awaiting this on stdout/stderr before
+ * exiting guarantees every byte reached the OS first. (Writing to a file
+ * never hit this because file writes complete synchronously.)
+ *
+ * @param {Writable} stream
+ * @returns {Promise<void>}
+ */
+export function flushStream(stream) {
+  return new Promise((resolve) => {
+    if (stream.writableLength === 0) {
+      resolve()
+      return
+    }
+    let done = false
+    const finish = () => {
+      if (done) return
+      done = true
+      resolve()
+    }
+    stream.once('error', finish)
+    // The write callback fires after this (empty) chunk and all preceding
+    // buffered writes have been handed to the OS.
+    stream.write('', finish)
+  })
+}