hyparam · bgmcmullen · Jun 9, 2026 · Jun 6, 2026 · Jun 8, 2026
diff --git a/collectivus-plugin-kernel-types.d.ts b/collectivus-plugin-kernel-types.d.ts
@@ -133,12 +133,18 @@ export interface PluginContributionManifest {
   sinks?: PluginSinkManifest[]
   datasets?: PluginDatasetManifest[]
   skills?: PluginSkillManifest[]
+  agents?: PluginAgentManifest[]
   init_presets?: PluginInitPresetManifest[]
 }
 
 export interface PluginClientManifest {
   name: string
   skill_dir: string
+  /**
+   * Per-client subagent directory relative to the user's home (e.g.
+   * `.claude/agents`). Absent for clients without a subagent concept.
+   */
+  agent_dir?: string
   attach_probe?: PluginAttachProbeManifest
   required_upstreams?: string[]
 }
@@ -192,6 +198,12 @@ export interface PluginSkillManifest {
   source_dir?: string
 }
 
+export interface PluginAgentManifest {
+  name: string
+  clients: PluginSkillClient[]
+  source_file?: string
+}
+
 export interface PluginInitPresetManifest {
   name: string
   summary?: string
@@ -293,6 +305,7 @@ export interface PluginActivationContext {
    */
   storage: QueryStorageService
   skills: SkillRegistry
+  agents: AgentRegistry
   initPresets: InitPresetRegistry
   /**
    * Backfill provider registry (kernel-owned). Plugins register
@@ -541,6 +554,12 @@ export interface CommandRunContext {
    * to materialize plugin-contributed skills under per-client paths.
    */
   skills: SkillRegistry
+  /**
+   * Agent registry (kernel-owned). Populated by the dispatcher.
+   * `hyp agents install` and the walkthrough enumerate this to
+   * materialize plugin-contributed subagents under per-client paths.
+   */
+  agents: AgentRegistry
   /**
    * Source registry (kernel-owned). Populated by the dispatcher.
    * `hyp status` and the Phase 9 walkthrough enumerate this to render
@@ -1288,6 +1307,24 @@ export interface SkillContribution {
   projectLocal?: boolean
 }
 
+export interface AgentRegistry {
+  register(agent: AgentContribution): void
+  list(): AgentContribution[]
+}
+
+/**
+ * A custom subagent contributed by a client-adapter plugin. Unlike
+ * skills (a directory tree around a `SKILL.md`), an agent is a single
+ * markdown definition file installed flat into the per-client agent
+ * directory as `<agent_dir>/<name>.md`.
+ */
+export interface AgentContribution {
+  name: string
+  plugin: PluginName
+  clients: PluginSkillClient[]
+  sourceFile: string
+}
+
 export interface InitPresetRegistry {
   register(preset: InitPresetContribution): void
   get(name: string): InitPresetContribution | undefined

diff --git a/docs/PLUGIN_AUTHORING.md b/docs/PLUGIN_AUTHORING.md
@@ -65,9 +65,9 @@ fields (validated by `src/core/manifest.js`):
 | `permissions` | no | String array, e.g. `["network", "read_env"]`. |
 | `requires` | no | `{ plugins?, capabilities? }` — see [Capabilities](#capabilities). |
 | `provides` | no | `{ capabilities? }` — see [Capabilities](#capabilities). |
-| `contributes` | no | What the plugin adds: `sources`, `sinks`, `datasets`, `commands`, `skills`, `init_presets`, `config_sections`, `client`. |
+| `contributes` | no | What the plugin adds: `sources`, `sinks`, `datasets`, `commands`, `skills`, `agents`, `init_presets`, `config_sections`, `client`. |
 
-Each entry under `contributes.{sources,sinks,datasets,commands,skills,init_presets}`
+Each entry under `contributes.{sources,sinks,datasets,commands,skills,agents,init_presets}`
 needs a non-empty `name`; `config_sections` entries use `section`.
 
 ---
@@ -97,7 +97,7 @@ on the registries hanging off `ctx`. The kernel handles dependency
 order, paths, logging, and lifecycle. `ctx` gives you:
 
 - `ctx.sources`, `ctx.sinks`, `ctx.query`, `ctx.commands`, `ctx.skills`,
-  `ctx.initPresets`, `ctx.configRegistry` — the registries.
+  `ctx.agents`, `ctx.initPresets`, `ctx.configRegistry` — the registries.
 - `ctx.requireCapability(name, range)` / `ctx.provideCapability(name, version, value)`.
 - `ctx.config` — the validated config slice for this plugin.
 - `ctx.paths` — `{ rootDir, stateDir, cacheDir, tempDir }`, created for you.
@@ -208,6 +208,26 @@ ctx.skills.register({
 })
 ```
 
+### Agents
+
+Materialize a custom subagent into client agent directories (e.g.
+`.claude/agents/`). Unlike a skill, an agent is a single markdown
+definition file installed flat as `<agent_dir>/<name>.md`. Declare
+`contributes.agents: [{ name, clients }]` and register:
+
+```js
+ctx.agents.register({
+  name: 'hypaware-widget-analyst',
+  plugin: PLUGIN_NAME,
+  clients: ['claude'],
+  sourceFile: '/abs/path/to/agents/hypaware-widget-analyst.md',
+})
+```
+
+Only clients whose manifest declares `contributes.client.agent_dir`
+receive agents; targets without one are skipped with a warning by
+`hyp agents install`.
+
 ### Init presets
 
 Declare `contributes.init_presets: [{ name }]` and register a `run` that

diff --git a/hypaware-core/plugins-workspace/claude/agents/hypaware-analyst.md b/hypaware-core/plugins-workspace/claude/agents/hypaware-analyst.md
@@ -0,0 +1,72 @@
+---
+name: hypaware-analyst
+description: Worker for fan-out analysis of local HypAware recordings. Spawn one per independent slice based on date partition, gateway id, conversation id, user id, file glob, etc. when an analysis would otherwise require many `hyp query` runs or return large result sets. Each invocation receives a scope plus an explicit question and returns a short structured summary — never raw query output.
+tools: Bash, Read, Grep, Glob
+model: haiku
+---
+
+# HypAware Analyst Worker
+
+You are a worker spawned to analyze ONE slice of local HypAware recordings. Your job is to run the minimum number of `hyp query` commands needed to answer the question for your slice, then return a compact structured summary.
+
+## CLI essentials
+
+You run `hyp query` commands. These rules are non-negotiable.
+
+- **Use `--format json`** for anything you will parse. `--format markdown` only when you literally need a table for the lead.
+- **Inline output is context-budgeted, not row-capped.** String cells truncate to ~200 chars (`…(+N)` markers) and rows are dropped past a ~32KB row-data budget, with a `notice: showing X of Y rows …` line on stderr. Prefer aggregates that fit the budget; when your slice genuinely needs a large result, spill it with `--output <file>` and post-process the file with Read/Grep instead of parsing stdout.
+- **Narrow aggressively in SQL.** Add `WHERE` clauses on `date`, `gateway_id`, `conversation_id`, `user_id`, `message_created_at`, etc., until the slice matches what you are assigned. Filtering inside the SELECT is the only narrowing mechanism — `hyp query sql` does not take dataset-shaped flags like `--date` or `--gateway-id`.
+- **Unfamiliar table?** Run `hyp query schema <table> --format json` once, then query. Works for built-ins and `hyp collect`-registered tables.
+- **`--config <path>`** only when told the service uses a non-default config. Otherwise rely on what `hyp status` would discover.
+- **Read-only SQL only.** SQL must be a single `SELECT`. The available `hyp query` subcommands are `schema`, `status`, `sql`, `refresh`, `maintain` — you are restricted to `schema`, `status`, and `sql`. Never run `refresh` or `maintain`, and never shell out to side effects.
+
+## Datasets you can query
+
+- `logs` — OTLP log records (HypAware OTel collector).
+- `traces` — OTLP spans.
+- `metrics` — OTLP metric points.
+- `ai_gateway_messages` — one row per AI-gateway content part. Key columns: `conversation_id`, `message_id`, `message_index`, `part_id`, `part_index`, `role`, `part_type` (`text` | `reasoning` | `tool_call` | `tool_result` | passthrough), `tool_name`, `tool_call_id`, `tool_args`, `content_text`, `is_error`, `is_compact_summary`, `is_sidechain`, `cwd`, `git_branch`, `user_id`, `client_name`, `client_version`, `entrypoint`, `user_type`, `permission_mode`, `provider`, `model`, `hook_event`, `caller_type`, `attributes` (JSON: `gateway`, `client`, `request`, `timing`, sometimes `usage`), `status` (JSON: `tool_status`, sometimes `finish_reason`), `message_created_at`, `conversation_started_at`. Partition columns: `gateway_id`, `date`.
+- Collection tables registered via `hyp collect` — see `hyp collect list` for what's available in the lead's setup.
+
+For exact columns in the installed version: `hyp query schema <table> --format json`. For the full reference on `hyp query`, read `~/.claude/skills/hypaware-query/SKILL.md`.
+
+## SQL hints
+
+- JSON columns (`attributes`, `status`, `tools`, `tool_args`, `raw_frame`, `previous_message_id`, `compact_metadata`) use `JSON_VALUE(col, '$.path')` for scalar extraction and `JSON_QUERY(col, '$.path')` for subtrees. `JSON_EXISTS` is **not** supported — use `JSON_QUERY(...) IS NOT NULL` instead.
+- `is_error`, `is_sidechain`, `is_compact_summary` are direct boolean columns — prefer them over JSON probing or `content_text` substring matches.
+- Token usage is recorded at `attributes.$.usage.*` when present, but **for Claude-via-gateway recordings this is typically null** — fall back to `attributes.$.gateway.request_bytes` and `attributes.$.gateway.response_bytes` as size proxies.
+- Latency lives at `attributes.$.timing.latency_ms` (note: `latency_ms`, not `duration_ms`).
+- Dedup usage/timing per message before summing: those fields can repeat across the parts of one message — `GROUP BY conversation_id, message_id` with `MAX(...)` first, then aggregate per conversation/user/etc.
+- Tool call / result pairs join on `tool_call_id`. The natural ordering key for `ai_gateway_messages` is `(conversation_id, message_index, part_index)`.
+- Table names are resolved from the SQL AST; only built-ins and registered collection tables are valid.
+
+## What to return
+
+Return a compact JSON-shaped summary. Keep it under ~50 lines. Examples of good shape:
+
+```json
+{
+  "scope": "date=2026-05-20, gateway_id=cli-laptop, user_id=86459ddf-...",
+  "counts": { "rows": 1234, "errors": 12, "distinct_conversations": 88 },
+  "top": [
+    { "tool_name": "Bash", "errors": 7 },
+    { "tool_name": "Edit", "errors": 3 }
+  ],
+  "samples": [
+    { "conversation_id": "abc123", "message_id": "f01a...", "note": "tool_status=error on git push" }
+  ],
+  "anomalies": ["3 traces > 30s, all POST /v1/messages from claude-cli/2.1.118"],
+  "commands_run": 4
+}
+```
+
+Rules for the summary:
+
+- **Never** paste raw query output. Counts, top-N, ids, and short prose only.
+- Always include `scope` so the lead can merge across workers.
+- If a query failed: return `{ "error": "...", "exit_code": N, "stderr": "..." }` and stop. Do not retry, and do not attempt to fix cache state — that is the lead's job.
+- If the question turns out to need data outside your assigned scope, return `{ "out_of_scope": "what extra slice is needed" }` and let the lead spawn another worker.
+
+## Efficiency budget
+
+Aim to run **≤ 5** `hyp query` commands. If you find yourself running more, your slice is too broad or the question is too vague — return what you have plus `{ "needs_narrower_scope": true }`.
diff --git a/hypaware-core/plugins-workspace/claude/hypaware.plugin.json b/hypaware-core/plugins-workspace/claude/hypaware.plugin.json
@@ -23,6 +23,7 @@
     "client": {
       "name": "claude",
       "skill_dir": ".claude/skills",
+      "agent_dir": ".claude/agents",
       "attach_probe": {
         "format": "json",
         "settings_file": ".claude/settings.json",
@@ -35,6 +36,9 @@
       { "name": "hypaware-ignore", "clients": ["claude"] },
       { "name": "hypaware-unignore", "clients": ["claude"] }
     ],
+    "agents": [
+      { "name": "hypaware-analyst", "clients": ["claude"] }
+    ],
     "init_presets": [
       {
         "name": "claude-and-otel-local",

diff --git a/hypaware-core/plugins-workspace/claude/skills/hypaware-query/SKILL.md b/hypaware-core/plugins-workspace/claude/skills/hypaware-query/SKILL.md
@@ -16,26 +16,28 @@ Use `hyp query` to inspect local HypAware recordings. It reads local JSONL recor
    - **Missing partitions still error.** Run the exact `hyp query refresh …` command the CLI prints, or rerun the target query with `--refresh always`.
    - Broad manual refreshes are explicit: `hyp query refresh --all [dataset]`. Do not run a broad refresh when the printed file-targeted command is enough.
 4. Prefer structured output for analysis: use `--format json` for follow-up reasoning and `--format markdown` when showing a table to the user. Inline output is context-budgeted, not row-capped: each string cell is truncated to ~200 code points (a `…(+N)` marker shows how much was elided) and rows are dropped once a row-data byte budget (~32KB) is hit, with a `notice: showing X of Y rows …` line on stderr. To get a full, untruncated result, spill it to a file with `--output <file>` (prints only a receipt to stdout — the data never floods context) and post-process the file. Override the caps with `--max-cell <n>` / `--max-bytes <n>` (`0` disables either).
-5. Use high-level query commands before custom SQL. Switch to `hyp query sql` only when the built-in commands cannot answer the question.
-6. For unfamiliar SQL tables, run `hyp query schema <table> --format json` before querying.
+5. For unfamiliar SQL tables, run `hyp query schema <table> --format json` before querying. Registered datasets can have different column sets even when they share a logical shape (e.g., per-user `agent_logs_*` S3 datasets) — check each table's schema before writing cross-table SQL. If `schema` reports `columns: 0` for a dataset that is still queryable, fall back to `SELECT * FROM <table> LIMIT 1`; failed queries also list the available columns in their error message.
 
 ## Common Commands
 
 ```bash
 hyp query status
-hyp query catalog --format json
-hyp query logs --since 1h --format json
-hyp query traces slow --limit 20 --format json
-hyp query metrics list --format json
-hyp query metrics series <metric-name> --format json
 hyp query schema <table> --format json
 hyp query sql "<sql>" --format json
+hyp query sql "<sql>" --format jsonl --output <file>   # full result, lossless
 hyp query refresh <file.jsonl>
 hyp query refresh --all logs
-hyp collect <file.jsonl> --name <name>
-hyp collect --glob '<pattern>' --name <name>
+hyp collect list
+hyp collect remove <name>
 ```
 
+These are the only subcommands in the installed CLI (`hyp query`: schema, status, sql, refresh, maintain; `hyp collect`: list, remove). There are no high-level `catalog`/`logs`/`traces`/`metrics` query commands — answer questions with `hyp query sql`, and discover datasets from the `hyp query status` output.
+
+## SQL dialect notes
+
+- `json_extract_scalar()` does not exist. `JSON_EXTRACT` does, but it errors on rows where a JSON-typed column (notably `tool_args`) holds a plain string instead of a JSON object ("first argument must be JSON string or object, got string").
+- The robust pattern for extracting fields from `tool_args` is a regex over the raw text, e.g. `regexp_extract(CAST(tool_args AS VARCHAR), '"command":"([^"]+)', 1)`.
+
 ## AI gateway message model
 
 Recorded AI-gateway traffic is exposed through one dataset: `ai_gateway_messages`. Each row is a normalized message content part owned by the HypAware AI gateway schema.
@@ -54,6 +56,6 @@ Run `hyp query schema ai_gateway_messages --format markdown` for the authoritati
 ## Guardrails
 
 - Do not assume the cache auto-refreshes. Query commands default to `--refresh never`.
-- Always read stderr. A successful exit code does not mean the cache is current.
-- Keep SQL read-only and use only query tables from `hyp query catalog`.
+- Always read stderr, and never pipe it to /dev/null (especially in shell loops over multiple datasets) — errors and staleness warnings land there, and an empty stdout is indistinguishable from zero rows. A successful exit code does not mean the cache is current.
+- Keep SQL read-only and use only datasets listed by `hyp query status`.
 - `hyp query sql` inline output is context-budgeted (cells truncated to ~200 chars, rows dropped past a ~32KB row-data budget) and emits a `notice:` on stderr when it withholds rows — it is not a fixed row cap. Prefer aggregates/filters for analysis; use `--output <file>` for a complete, untruncated result and read it back from the file rather than from stdout.
diff --git a/hypaware-core/plugins-workspace/claude/src/index.js b/hypaware-core/plugins-workspace/claude/src/index.js
@@ -251,6 +251,14 @@ export async function activate(ctx) {
     })
   }
 
+  const agentsRoot = path.resolve(skillsRootDir(), 'agents')
+  ctx.agents.register({
+    name: 'hypaware-analyst',
+    plugin: PLUGIN_NAME,
+    clients: ['claude'],
+    sourceFile: path.join(agentsRoot, 'hypaware-analyst.md'),
+  })
+
   ctx.initPresets.register({
     name: 'claude-and-otel-local',
     plugin: PLUGIN_NAME,