diff --git a/README.md b/README.md
index d53e629..85049f4 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ contree run apt-get install -y curl     # builds on the previous snapshot
 contree session branch experiment       # branch the sandbox state
 contree run -- make test                # experiment freely
 contree session checkout main           # switch back instantly
-contree session rollback 2              # or rewind two steps
+contree session rollback -- -2          # or rewind two steps
 ```
 
 ## What is ConTree?
@@ -116,7 +116,7 @@ contree cp /app/output.log . # download to local machine
 contree session branch experiment     # create a branch
 contree run -- make test              # experiment on it
 contree session checkout main         # switch back
-contree session rollback 1            # undo last run
+contree session rollback              # undo last run (default: back 1 entry)
 ```
 
 ## Interactive Shell
@@ -151,9 +151,13 @@ The shell provides tab completion for commands, paths, image tags, and operation
 | `run [-- CMD]` | `r` | Spawn a sandbox instance, execute command |
 | `images [--prefix]` | `i`, `img` | List and import images |
 | `tag UUID TAG` | `t` | Tag or untag an image |
-| `ps` | | List operations (instances, imports) |
-| `kill UUID` | | Cancel an operation (`--all` for all) |
+| `ps` | | List operations (shortcut for `operation ls`) |
+| `kill UUID [UUID...]` | | Cancel operations (shortcut for `operation cancel`; `--all` for all active) |
 | `show UUID` | | Show operation result |
+| `operation list` | `op`, `ls` | Same as `ps` (canonical) |
+| `operation show UUID...` | `sh` | Multi-UUID inspect |
+| `operation wait UUID...` | `w` | Block until each op finishes (or `--all`; `--timeout`) |
+| `operation cancel UUID...` | `kill`, `k` | Multi-UUID cancel (or `--all`) |
 | `ls [PATH]` | | List files in session image (no VM) |
 | `cat PATH` | | Show file content from session image (no VM) |
 | `cp PATH DEST` | | Download file from image to local path |
@@ -237,7 +241,7 @@ contree session                       # show current state
 contree session show                  # display history DAG
 contree session branch feature        # create branch from HEAD
 contree session checkout feature      # switch to it
-contree session rollback 3            # go back 3 steps
+contree session rollback -- -3        # go back 3 steps (note `--`; bare `3` is absolute id)
 contree session use other-session     # import image from another session
 ```
 
@@ -259,7 +263,7 @@ Pipe JSON output into `jq`, feed CSV into spreadsheets, or parse programmaticall
 
 ### Config file
 
-`~/.config/contree-cli/config.ini`:
+`$XDG_CONFIG_HOME/contree/auth.ini` (default: `~/.config/contree/auth.ini`; override via `$CONTREE_HOME`):
 
 ```ini
 [DEFAULT]
diff --git a/contree_cli/__main__.py b/contree_cli/__main__.py
index 41be67a..d749071 100644
--- a/contree_cli/__main__.py
+++ b/contree_cli/__main__.py
@@ -1,4 +1,5 @@
 import contextvars
+import http.client
 import logging
 import sys
 from collections.abc import Callable
@@ -98,11 +99,19 @@ def main() -> None:
         except ApiError as exc:
             log.error("%s", exc)
             exit(1)
+        except ValueError as exc:
+            # Raised by loader.from_args for malformed user input
+            # (invalid UUIDs, etc.); the message is already user-facing.
+            log.error("%s", exc)
+            exit(1)
+        except (OSError, http.client.HTTPException) as exc:
+            log.error("Network error: %s", exc)
+            exit(1)
         except KeyboardInterrupt:
             log.error("User interrupted")
             exit(1)
         finally:
-            formatter.flush()
+            formatter.close()
 
     exit(exit_code or 0)
 
diff --git a/contree_cli/agent.md b/contree_cli/agent.md
index 090b762..e3a4ea3 100644
--- a/contree_cli/agent.md
+++ b/contree_cli/agent.md
@@ -6,8 +6,7 @@ Core workflow
 
 Agent protocol — follow this sequence for every task:
 
-1. Choose a session key: agent_<task> or agent_<task>_<subagent>.
-   Pass it with -S on every command. Do not rely on env vars.
+1. Choose a session key: agent_<task> or agent_<task>_<subagent>. Pass it with -S on every command. Do not rely on env vars.
 
 2. Resume or start a session:
      contree session list --filter <hint>
@@ -22,10 +21,11 @@ Agent protocol — follow this sequence for every task:
      contree images --prefix=...
      contree session show
 
-5. Execute in small steps — one mutating step per run:
-     contree -S <key> run -- sh -lc 'apt-get update -qq'
-     contree -S <key> run -- sh -lc 'apt-get install -y curl'
+5. Execute in small steps — one mutating step per run. Pick the run mode by the command's needs, not by category. `apt-get install` and `pip install` are plain executables; direct mode is fine. `-s` is for shell features (pipes, redirects, `&&`, `;`, `$`):
+     contree -S <key> run -- apt-get update -qq
+     contree -S <key> run -- apt-get install -y curl
      contree -S <key> run -- make -C /work test
+     contree -S <key> run -s -- 'apt list --installed 2>/dev/null | wc -l'
 
 6. Tag useful results immediately:
      contree -S <key> tag my-env:latest
@@ -49,17 +49,14 @@ More: contree run --help, contree session --help
 Sessions
 ========
 
-Sessions track image, working directory, pending files, and history.
-Each non-disposable run creates a new image checkpoint you can
-rollback to or branch from.
+Sessions track image, working directory, pending files, and history. Each non-disposable run creates a new image checkpoint you can rollback to or branch from.
 
 Session key resolution (priority):
   1. -S / --session flag (most reliable, survives terminal restarts)
   2. CONTREE_SESSION env var (stable within a shell session)
   3. Auto-generated from profile + PID + TTY (changes on terminal close)
 
-Always use -S for agent workflows. Auto-generated keys are
-unreliable across terminal restarts.
+Always use -S for agent workflows. Auto-generated keys are unreliable across terminal restarts.
 
 Starting and resuming:
   contree -S <key> use tag:alpine:latest   start new session
@@ -72,11 +69,15 @@ Branch workflow:
   contree -S <key> session checkout experiment
   contree -S <key> run -- risky-command
   contree -S <key> session checkout main     # abandon if failed
-  contree -S <key> session branch -d experiment  # clean up
+  contree -S <key> session branch --delete experiment  # clean up
 
 Rollback:
-  contree -S <key> session rollback 1       undo last run
-  contree -S <key> session rollback 3       undo last 3 runs
+  contree -S <key> session rollback         back one entry (default)
+  contree -S <key> session rollback -- -3   back three entries (note `--`)
+  contree -S <key> session rollback +1      forward one entry
+  contree -S <key> session rollback 42      absolute jump to history id 42
+
+  WARNING: a bare positive N is an ABSOLUTE id, not "back N steps". Use `--` plus a negative N for relative navigation.
   contree -S <key> session show             view history before rollback
 
 History DAG (contree session show output):
@@ -85,8 +86,7 @@ History DAG (contree session show output):
   2   def456    1       run   apt-get update       main
   3   789abc    2       run   apt-get install curl main, experiment
 
-Sessions are agent memory — reuse the same key to resume later.
-Different profiles have separate session databases.
+Sessions are agent memory — reuse the same key to resume later. Different profiles have separate session databases.
 
 Cleanup:
   contree -S <key> session delete <key> -y   delete session
@@ -97,9 +97,7 @@ More: contree session --help, contree session branch --help
 Images and tags
 ===============
 
-All data — images, operations, uploaded files — is scoped to a
-Project. Multiple tokens can access the same project. Different
-projects have separate scopes.
+All data — images, operations, uploaded files — is scoped to a Project. Multiple tokens can access the same project. Different projects have separate scopes.
 
 Listing:
   contree images                         all tagged images
@@ -112,13 +110,13 @@ Tagging:
   contree tag my-app:v1.0                tag current session image
   contree tag UUID my-app:v1.0           tag specific image by UUID
   contree tag tag:alpine:latest my-copy  re-tag by reference
-  contree tag -d UUID my-tag             remove tag
+  contree tag -U UUID my-tag             remove tag (or --delete/--rm)
 
 Tag rules:
-  - Tags are unique per project — assigning moves the tag
-  - Allowed: a-z 0-9 _ - with : / . separators (max 256 chars)
-  - Your tags shadow public tags with the same name
-  - Removing your tag restores the public one
+  - Tags are unique per project — assigning moves the tag.
+  - Allowed: a-z 0-9 _ - with : / . separators (max 256 chars).
+  - Your tags shadow public tags with the same name.
+  - Removing your tag restores the public one.
 
 Importing from registries:
   contree images import ubuntu:latest
@@ -131,14 +129,10 @@ Tag conventions:
   <project>/<purpose>/<base>:<tag> project-specific
   my-app:latest                    simple app tags
 
-Always search before building:
-  contree images --prefix=python-dev
+Always search before building: `contree images --prefix=python-dev`.
 
 Building from a Dockerfile:
-  When a project already ships a Dockerfile, prefer `contree build`
-  over hand-running each step. It executes FROM/RUN/COPY/WORKDIR/ENV
-  /ARG/USER against the API and caches every layer as a branch so
-  rebuilds are fast.
+  When a project already ships a Dockerfile, prefer `contree build` over hand-running each step. It executes FROM/RUN/COPY/WORKDIR/ENV/ARG/USER against the API and caches every layer as a branch so rebuilds are fast.
 
   Layer cache is keyed by abspath(context), shared across invocations:
     contree build .                     build ./Dockerfile, no tag
@@ -147,21 +141,21 @@ Building from a Dockerfile:
     contree build . --build-arg VERSION=1.2
     contree build . --no-cache          force rebuild
 
-  Supported directives: FROM, RUN, COPY, ADD (local paths only),
-  WORKDIR, ENV, ARG, USER. CMD/ENTRYPOINT/LABEL/EXPOSE/VOLUME/etc.
-  are parsed but skipped with a warning. Multi-stage (AS / --from)
-  is not yet supported.
+  Supported directives: FROM, RUN, COPY, ADD (local files/dirs and http(s) URLs; no tar auto-extract), WORKDIR, ENV, ARG, USER. CMD/ENTRYPOINT/LABEL/EXPOSE/VOLUME/etc. are parsed but skipped with a warning. Multi-stage (AS / --from) is not yet supported.
+
+  .dockerignore is applied to every COPY/ADD walk on top of the default exclude list (.git, __pycache__, node_modules, etc.).
 
-  .dockerignore is applied to every COPY/ADD walk on top of the
-  default exclude list (.git, __pycache__, node_modules, etc.).
+  build runs in its own session keyed by abspath(CONTEXT) (visible as "session": "build:<hash>" in -f json output). `-S <agent_key>` on `build` is harmless but does not bind the build to your agent session. Verify the resulting image from a normal session:
+    contree build . --tag myapp:dev
+    contree -S agent_verify use tag:myapp:dev
+    contree -S agent_verify run -D -- myapp --version
 
 More: contree build --help, contree images --help, contree tag --help
 
 Files and directories
 =====================
 
-contree run is remote execution. Local files are NOT visible in
-the sandbox unless explicitly attached.
+contree run is remote execution. Local files are NOT visible in the sandbox unless explicitly attached.
 
 Single file:
   contree run --file ./app.py:/app/app.py -- python /app/app.py
@@ -169,8 +163,7 @@ Single file:
 Directory (recursive):
   contree run --file ./src:/app/src -- make -C /app/src
 
-Full --file syntax:
-  host_path[:instance_path][:uUID][:gGID][:mMODE]
+Full --file syntax: `host_path[:instance_path][:uUID][:gGID][:mMODE]`
 
   ./app.py                            defaults from stat
   ./app.py:/app/app.py                explicit destination
@@ -178,25 +171,18 @@ Full --file syntax:
   ./app.py:/app.py:u0:g0:m0755       all explicit
   ./app.py:uroot:groot               uid/gid by name (local resolve)
 
-Directory exclusions (automatic):
-  .*, .git, *.pyc, __pycache__, .venv, .mypy_cache,
-  .pytest_cache, node_modules, dist, build
+Directory exclusions (automatic): `.*`, `.git`, `*.pyc`, `__pycache__`, `.venv`, `.mypy_cache`, `.pytest_cache`, `node_modules`, `dist`, `build`.
 
 Add custom exclusions:
   contree run --file ./project:/app --file-excludes '*.log' '*.tmp' -- ...
 
-Upload caching:
-  Files cached locally by path + inode + mtime + size.
-  Cache TTL: 90 days. Server deduplicates by SHA256.
-  Unchanged files skip hash calculation and API calls.
+Upload caching: files cached locally by path + inode + mtime + size. Cache TTL: 90 days. Server deduplicates by SHA256. Unchanged files skip hash calculation and API calls.
 
 Staging files for next run:
   contree file edit /etc/nginx/nginx.conf    download, edit, stage
   contree file cp ./config.yaml /etc/app/    upload and stage
 
-Pending files are injected into the next non-disposable run.
-Explicit --file takes priority over pending files at same path.
-Pending files are branch-aware.
+Pending files are injected into the next run, including disposable ones (the run sees them; the active branch only commits them after a successful non-disposable run). Explicit --file takes priority over pending files at the same path. Pending files are branch-aware.
 
 Listing uploaded files:
   contree file ls                 list all uploaded files in the project
@@ -204,45 +190,39 @@ Listing uploaded files:
   contree file ls -q              uuid + sha256 + source only (quiet)
   contree -f json file ls         JSON output for jq
 
-  Output joins remote files (uuid, sha256, size, created_at) with the
-  local upload cache. The SOURCE column shows whatever this machine
-  used to produce the file:
+  Output joins remote files (uuid, sha256, size, created_at) with the local upload cache. The SOURCE column shows whatever this machine used to produce the file:
     - absolute host path for files uploaded via `run --file` / `COPY`;
     - https://... URL for files fetched via `ADD URL`.
 
-  IMPORTANT: SOURCE is resolved ONLY for files uploaded from this
-  specific machine. The mapping lives in the local SQLite cache (per
-  profile, under $CONTREE_HOME/cli/sessions/<profile>.db) keyed by
-  path+inode+mtime+size (for host paths) or by the URL itself (for
-  URL fetches), and is NOT shared between hosts. Rows show empty
-  SOURCE when:
+  IMPORTANT: SOURCE is resolved ONLY for files uploaded from this specific machine. The mapping lives in the local SQLite cache (per profile, under $CONTREE_HOME/cli/sessions/<profile>.db) keyed by path+inode+mtime+size (for host paths) or by the URL itself (for URL fetches), and is NOT shared between hosts. Rows show empty SOURCE when:
     - the file was uploaded from a different machine or by a teammate;
-    - the host file has been moved, renamed, or its inode/mtime/size
-      changed since upload (the cache key no longer matches);
-    - the upload happened before tracking landed (older entries
-      backfill on the next match).
-  An agent must not assume SOURCE is authoritative across hosts;
-  for cross-machine identity always use the remote UUID or sha256.
+    - the host file has been moved, renamed, or its inode/mtime/size changed since upload (the cache key no longer matches);
+    - the upload happened before tracking landed (older entries backfill on the next match).
+  An agent must not assume SOURCE is authoritative across hosts; for cross-machine identity always use the remote UUID or sha256.
 
 More: contree run --help, contree file --help
 
 Execution modes
 ===============
 
-Direct command (default) — each arg is a separate argv entry:
+Direct command (default) — each arg is a separate argv entry. Use this for plain executables that do not need shell features:
   contree run -- uname -a
   contree run -- make -C /app test
-  contree run -- sh -lc 'pip install flask'   (login shell)
+  contree run -- python /app/script.py
 
-Shell mode (-s) — joins args, passes to sh -c:
+Shell mode (-s) — joins args and passes to sh -c. Use when you need pipes, redirects, &&, ;, or variable expansion. Do not wrap manually in `sh -c '...'`; -s already does that for you. For working directory use `-C /path` or `contree cd /path`, NOT `cd` inside `-s`:
   contree run -s -- 'echo hello && ls /'
-  contree run -s -- 'cd /app && make test'
+  contree run -C /app -s -- 'echo $PWD && make test'
   contree run -s -- 'cat /etc/passwd | grep root'
+  contree run -- apt-get install -y curl      (direct mode is fine)
+
+Login shell (`run -- sh -lc '…'`) — only when /etc/profile.d behavior is explicitly required, e.g. PATH set by `agent` provisioning. Prefer `contree env` / `-e` and `cd` / `-C` over login-shell magic:
+  contree run -- sh -lc 'cargo build'   (only if PATH needs profile)
 
 When to use which:
-  Direct: contree run -- make test         (no shell features needed)
-  Shell:  contree run -s -- 'cd /app && make'  (need cd/pipes/&&)
-  Login:  contree run -- sh -lc 'cargo build'  (need PATH from profile)
+  Direct: contree run -- make test                (no shell features needed)
+  Shell:  contree run -C /app -s -- 'a | grep b'  (need pipes/&&/$expand)
+  Login:  contree run -- sh -lc 'cargo build'     (rare; PATH from profile)
 
 Interpreter mode (-I) — shebang scripts:
   #!/usr/bin/env -S contree run -I
@@ -257,31 +237,73 @@ Piped stdin:
 
 Detached mode (-d):
   contree run -d -- long-running-task
-  contree ps                              check status
-  contree ps -a -S FAILED --since=1h      recent failures
-  contree show UUID                       view result
-  contree session wait                    block until done
-  contree session wait UUID1 UUID2        wait for specific ops
+  contree ps                                  check status
+  contree ps -a --status FAILED --since=1h    recent failures
+  contree show UUID                           view result
+  contree session wait                        block until done + advance branch
+  contree session wait UUID1 UUID2            poll only (NO branch advance)
+
+  NOTE: status filtering uses --status, NOT -S. `-S` is the global session flag and only works BEFORE the subcommand. Also, the default `run -d` output is plain/table -- use `contree -f json run -d ...` to capture the UUID via `jq -r .uuid` reliably.
+
+Operation references (UUID_OR_REF):
+  Every positional that `--help` labels `UUID_OR_REF` -- `op show`, `op cancel`, `op wait`, top-level `show`/`kill`, and `session wait` -- accepts either a real operation UUID OR a session-history reference. References are resolved against the active session (the one selected by `-S <key>`) before the API is called, so the same notation works everywhere.
+
+  Accepted forms:
+    @  /  :  /  HEAD              latest op on the active branch tip
+    @N /  :N /  bare N            absolute history id N
+    @-N / :-N / HEAD~N            N steps back from the tip
+    HEAD~                         shorthand for HEAD~1
+    @+N / :+N                     N steps forward (latest child)
+
+  Examples:
+    contree -S <key> op show HEAD                latest op in this session
+    contree -S <key> op show HEAD~ HEAD~2        last two ops
+    contree -S <key> op wait HEAD                wait on the latest detached run
+    contree -S <key> kill @5                     cancel by absolute history id
+    contree -S <key> session wait HEAD~1         poll the previous op
+
+  Caveats:
+    * Requires an active session (-S <key>); otherwise resolution errors.
+    * HEAD on a `use` entry has no operation_uuid and errors clearly ("has no operation UUID"); pick HEAD~1 or an explicit id instead.
+    * @0 and HEAD~0 are rejected with "non-zero" -- use HEAD for "now".
 
 Monitoring background operations:
-  Use the `operation` namespace (alias `op`) when juggling several
-  detached runs. `op ls` is `ps`; `op show` and `op cancel` accept
-  multiple UUIDs in one call.
+  Use the `operation` namespace (alias `op`) when juggling several detached runs. `op ls` is the canonical command — `contree ps` is its top-level shortcut. `op show` and `op cancel` accept multiple UUIDs in one call (`op cancel` has aliases `kill` and `k`).
+
+  contree op ls                               EXECUTING only (default)
+  contree op ls -a                            every status
+  contree op ls --status PENDING              list PENDING ops
+  contree op ls --status FAILED --since 1h    recent failures
+  contree op show UUID1 UUID2 UUID3           inspect a batch in one call
+  contree op cancel UUID1 UUID2               cancel selected operations
+  contree kill UUID1 UUID2                    same -- top-level shortcut
+  contree op cancel --all                     cancel every active op (rare)
+
+  Default `op ls`/`ps` lists only `EXECUTING`; `PENDING` and `ASSIGNED` are hidden until `-a` or an explicit `--status`. For a full active snapshot, fetch with `-a` and filter client-side.
 
-  contree op ls                           list operations (= ps)
-  contree op ls -a -S EXECUTING           filter active runs
-  contree op show UUID1 UUID2 UUID3       inspect a batch in one call
-  contree op cancel UUID1 UUID2           cancel selected operations
-  contree op cancel --all                 cancel every active op (rare)
+  `op wait` is a pure observer: polls and prints one operation record per completion. Default formatter pins uuid, status, exit_code, timed_out, duration first and error last; every other scalar API field appears between them, so column count is not fixed. For scripts use `-f json` (one object per line) or `-f tsv` and select fields explicitly. `status` is the server's word verbatim (orchestration outcome — did the API run the job?); the sandbox process's exit code lives in the separate `exit_code` column. The CLI exit code is 1 when any op finishes non-SUCCESS, or the actual `exit_code` when a SUCCESS op had a non-zero sandbox exit (so `op wait && next-step` still composes naturally with `run -- false`). --timeout (default 60s) caps the wait. Use --all to wait for every currently active op in the project.
 
-  Fan-out + join pattern:
-    A=$(contree run -d -- make -C /work/a build | jq -r .uuid)
-    B=$(contree run -d -- make -C /work/b build | jq -r .uuid)
-    contree session wait "$A" "$B"
-    contree op show "$A" "$B"
+  Rule of thumb -- use `op wait` ONLY outside session context: `op wait` is the right tool when the UUIDs came from somewhere else (different session, different agent, `images import`, raw API call) and you only need "is it done yet?". For ops you spawned in *this* session, use `session wait` (no-arg form) instead -- it polls AND advances the active branch to each result image, which `op wait` will not do.
 
-  Background checks are cheap: terminal results are cached locally,
-  so repeated `op show` / `show` calls do not re-hit the API.
+  Caveat 1 -- `op wait` does NOT advance session state: each `run -d` (non-disposable) creates a `detached-<op-uuid>` branch pointing at the START image. `op wait` does not move those branches to the result image; the result lives only on the server. After fan-out + wait the session looks the same as before the wait, just with `detached-*` branches accumulated.
+
+  PREFERRED fan-out (--disposable, no image-tracking concerns):
+    A=$(contree -S <key> -f json run -d --disposable -- pytest tests/a | jq -r .uuid)
+    B=$(contree -S <key> -f json run -d --disposable -- pytest tests/b | jq -r .uuid)
+    C=$(contree -S <key> -f json run -d --disposable -- pytest tests/c | jq -r .uuid)
+    contree -S <key> op wait "$A" "$B" "$C"     block until all complete
+    contree -S <key> op show "$A" "$B" "$C"     stdout/stderr per op
+
+  Non-disposable fan-out (must recover images manually):
+    A=$(contree -S <key> -f json run -d -- apt-get install -y curl | jq -r .uuid)
+    B=$(contree -S <key> -f json run -d -- apt-get install -y wget | jq -r .uuid)
+    contree -S <key> op wait "$A" "$B"
+    IMG_A=$(contree -f json op show "$A" | jq -r .image)
+    contree use "$IMG_A"                    bind chosen result back
+
+  Caveat 2 -- `op wait --all` is project-wide: if another agent (or another shell of yours) is running concurrently in the same project, your --all will block on its ops too. The result is still a valid wait, just possibly not over the set you expected. For session-spawned fan-out the correct alternative is `contree -S <key> session wait` (no args): it drains only this session's pending detached ops and advances the active branch with each result image. Reach for `op wait --all` only when you really want a project-wide observer (admin/cleanup tooling).
+
+  Background checks are cheap: terminal results are cached locally, so repeated `op show` / `show` calls do not re-hit the API.
 
 Disposable mode (-D) — no image checkpoint:
   contree run -D -- rm -rf /tmp/*
@@ -294,11 +316,10 @@ Session-level environment variables:
   contree env PATH=/root/.cargo/bin:/usr/local/bin:/usr/bin:/bin:/sbin
   contree env DEBUG=1
   contree run -- cargo build         # injects PATH and DEBUG per-run
-  contree env -d DEBUG               # unset
+  contree env -U DEBUG               # unset
   contree env                        # list all
 
-Session env is injected on every run but NOT saved into the image
-unless --preserve-env is passed.
+Session env is injected on every run but NOT saved into the image unless --preserve-env is passed.
 
 Preserve env into the image (persists across runs server-side):
   contree run --preserve-env -e PATH="/root/.cargo/bin:/usr/bin:/bin" -- cargo build
@@ -335,54 +356,50 @@ Rules for reliable agent workflows:
 
 2. One mutating step per run. Each run = one history entry.
    Wrong:  contree run -s -- 'apt install curl && make test'
-   Right:  contree run -- sh -lc 'apt install -y curl'
+   Right:  contree run -s -- apt install -y curl
            contree run -- make test
 
-3. Why split? Chained runs collapse into one checkpoint.
-   If 'make test' fails, you can't rollback to just after
-   'apt install'. Split runs give you granular rollback.
+3. Why split? Chained runs collapse into one checkpoint. If `make test` fails, you can't rollback to just after `apt install`. Split runs give you granular rollback.
 
 4. Global flags (-f, -S, -p) MUST come before the subcommand:
    Right:  contree -S key -f json images
    Wrong:  contree images -S key -f json
 
-5. Use -f json for structured output in automation:
-   contree -f json images | jq '.uuid'
+5. Use -f json for structured output in automation: `contree -f json images | jq '.uuid'`.
 
-6. Agents must never run 'contree auth'. Only users manage auth.
+6. Agents must never run `contree auth`. Only users manage auth.
 
-7. Use --disposable (-D) for throwaway checks that should not
-   advance the session image. Omit -D to keep snapshots.
+7. Use --disposable (-D) for throwaway checks that should not advance the session image. Omit -D to keep snapshots.
 
 8. Prefer absolute paths for -C/--cwd and --file destinations.
 
-9. Branch before risky changes. Rollback is always available but
-   branching is cleaner.
+9. Branch before risky changes. Rollback is always available but branching is cleaner.
 
 Output formats
 ==============
 
-Global -f flag goes before the subcommand:
+Global -f flag goes before the subcommand. Always available formats:
 
   contree -f json images           one JSON object per line (JSONL)
   contree -f json-pretty ps        pretty-printed JSON array
   contree -f csv images            CSV with header row
   contree -f tsv ps                tab-separated values
+  contree -f plain images          key: value blocks
+
+`-f toml` is available only on Python 3.11+ (it relies on stdlib `tomllib`). On Python 3.10 it is silently absent from --help.
 
 Scripting examples:
   contree -f json images --prefix=python | jq -r '.uuid'
   contree -f json ps -a | jq 'select(.status=="SUCCESS")'
   contree -f csv images > images.csv
-  contree ps -q | xargs -I {} contree show {}
+  contree ps -q | xargs contree show
 
-Note: 'run' with default formatter prints raw stdout/stderr.
-Use -f json to get structured operation metadata instead.
+Note: `run` with default formatter prints raw stdout/stderr. Use -f json to get structured operation metadata instead.
 
 Profiles
 ========
 
-Profiles store API tokens for different projects. Each profile
-has its own session database — switching profiles isolates sessions.
+Profiles store API tokens for different projects. Each profile has its own session database — switching profiles isolates sessions.
 
   contree auth                        save token (secure prompt)
   contree auth ls                     list with status check
@@ -423,28 +440,28 @@ All commands
   build [CONTEXT]         Build image from Dockerfile (aliases: bd)
   images                  List/import images (aliases: i, img)
   tag [IMAGE] TAG         Tag image (aliases: t)
-  ps                      List operations
-  kill UUID               Cancel operation
+  ps                      List operations (shortcut for `operation ls`)
+  kill UUID [UUID...]     Cancel operations (shortcut for `operation cancel`); `--all` cancels every active
   show UUID               Show operation result
-  operation list          List operations (aliases: op ls)
-  operation show UUID...  Show one or more operation results (aliases: op)
-  operation cancel UUID...
-                          Cancel one or more operations (or --all)
+  operation list          List operations (aliases: ls)
+  operation show UUID...  Show one or more operation results (aliases: sh)
+  operation wait UUID...  Wait for operations to reach a terminal status (aliases: w); `--all` waits for every active op; `--timeout SECONDS` fails if not all complete (default: 60)
+  operation cancel UUID...  Cancel one or more operations (aliases: kill, k); `--all` cancels every active
   ls [PATH]               List files in image (no VM)
   cat PATH                Show file content (no VM)
   cp PATH DEST            Download file from image
   cd [PATH]               Change session working directory
-  env [KEY=VALUE ...]     Session env vars (-d to unset)
+  env [KEY=VALUE ...]     Session env vars (-U to unset)
   file edit PATH          Edit remote file via $EDITOR
   file cp SRC DEST        Stage local file for next run
   file ls [-q]            List uploaded files + local path (aliases: list)
   session list            List sessions (aliases: ls)
   session branch [NAME]   Create/list branches (aliases: br)
   session checkout BRANCH Switch branch (aliases: co)
-  session rollback [N]    Undo N steps (aliases: rb)
+  session rollback [N]    Jump to history id N (absolute); -N steps back (aliases: rb)
   session show            Show history DAG
   session delete KEY      Delete session (aliases: rm, del)
-  session wait [OPS]      Wait for operations
+  session wait [OPS]      Drain detached ops; no-arg form advances branch, UUID form polls only
   auth                    Save token
   auth ls                 List profiles (aliases: profiles)
   auth switch NAME        Switch profile
diff --git a/contree_cli/arguments.py b/contree_cli/arguments.py
index ccde59b..4b00675 100644
--- a/contree_cli/arguments.py
+++ b/contree_cli/arguments.py
@@ -13,13 +13,10 @@
     env,
     file,
     images,
-    kill,
     ls,
     operation,
-    ps,
     run,
     session,
-    show,
     skill,
     tag,
     use,
@@ -214,9 +211,21 @@ def register(
 register("build", "Build image from Dockerfile", build.setup_parser, aliases=["bd"])
 register("images", "List and import images", images.setup_parser, aliases=["i", "img"])
 register("tag", "Tag an image", tag.setup_parser, aliases=["t"])
-register("ps", "List operations/instances", ps.setup_parser)
-register("kill", "Cancel an operation", kill.setup_parser)
-register("show", "Show operation result", show.setup_parser)
+register(
+    "ps",
+    "List operations (alias for `operation ls`)",
+    operation.setup_list_parser,
+)
+register(
+    "kill",
+    "Cancel operations (alias for `operation cancel`)",
+    operation.setup_cancel_parser,
+)
+register(
+    "show",
+    "Show operation result (alias for `operation show`)",
+    operation.setup_show_parser,
+)
 register(
     "operation",
     "Manage operations (list/show/cancel)",
diff --git a/contree_cli/cli/build.py b/contree_cli/cli/build.py
index e6b1453..d0a6804 100644
--- a/contree_cli/cli/build.py
+++ b/contree_cli/cli/build.py
@@ -6,10 +6,11 @@
 materialised as branches named ``layer:<chain-hash>`` so that
 re-running the same Dockerfile reuses prior work.
 
-Supported directives (MVP): FROM, RUN, COPY, ADD (without URL/tar),
-WORKDIR, ENV, ARG, USER. Other Dockerfile directives parse cleanly
-but are skipped with a warning (CMD, ENTRYPOINT, LABEL, EXPOSE,
-VOLUME, STOPSIGNAL, MAINTAINER, HEALTHCHECK, ONBUILD, SHELL).
+Supported directives (MVP): FROM, RUN, COPY, ADD (local files/dirs
+and http(s) URLs; no tar auto-extraction), WORKDIR, ENV, ARG, USER.
+Other Dockerfile directives parse cleanly but are skipped with a
+warning (CMD, ENTRYPOINT, LABEL, EXPOSE, VOLUME, STOPSIGNAL,
+MAINTAINER, HEALTHCHECK, ONBUILD, SHELL).
 """
 
 from __future__ import annotations
diff --git a/contree_cli/cli/env.py b/contree_cli/cli/env.py
index 798e5a0..e96de7a 100644
--- a/contree_cli/cli/env.py
+++ b/contree_cli/cli/env.py
@@ -20,8 +20,8 @@
   contree env                                  list session env vars
   contree env PATH=/root/.cargo/bin:$PATH      set PATH
   contree env DEBUG=1 DB_HOST=localhost         set multiple
-  contree env -d PATH                          unset PATH
-  contree env -d PATH DEBUG                    unset multiple
+  contree env -U PATH                          unset PATH
+  contree env -U PATH DEBUG                    unset multiple
 """
 
 
diff --git a/contree_cli/cli/file.py b/contree_cli/cli/file.py
index 85cd4a6..968754b 100644
--- a/contree_cli/cli/file.py
+++ b/contree_cli/cli/file.py
@@ -33,13 +33,18 @@
     ArgumentsProtocol,
     SetupResult,
 )
-from contree_cli.client import ApiError, ContreeClient, resolve_image, stream_response
+from contree_cli.client import (
+    ApiError,
+    ContreeClient,
+    PaginatedFetcher,
+    resolve_image,
+    stream_response,
+)
 from contree_cli.config import EDITOR
-from contree_cli.session import SessionStore
+from contree_cli.session import CONTREE_CONCURRENCY, SessionStore
 from contree_cli.types import (
     FLAGS,
     isoformat_datetime,
-    parse_datetime,
     parse_interval,
     positive_int,
 )
@@ -75,7 +80,7 @@ def from_args(cls, ns: argparse.Namespace) -> FileCpArgs:
 
 
 FILE_LIST_LIMIT_DEFAULT = 1000
-FILE_LIST_PAGE_SIZE = 1000
+FILE_LIST_PAGE_SIZE = PaginatedFetcher.DEFAULT_PAGE_SIZE
 
 
 @dataclass(frozen=True)
@@ -323,17 +328,22 @@ def cmd_file_ls(args: FileListArgs) -> int | None:
     if args.until is not None:
         params["until"] = isoformat_datetime(args.until)
 
-    offset = 0
+    fetcher = PaginatedFetcher(
+        client,
+        "/v1/files",
+        params,
+        lambda body: json.loads(body).get("files", []),
+        limit=args.limit,
+        concurrency=CONTREE_CONCURRENCY,
+    )
+
     emitted = 0
-    while emitted < args.limit:
-        page_size = min(FILE_LIST_PAGE_SIZE, args.limit - emitted)
-        page = {**params, "offset": str(offset), "limit": str(page_size)}
-        resp = client.get("/v1/files", params=page)
-        data = json.loads(resp.read())
-        files = data.get("files", [])
-        if not files:
-            return None
-        for entry in files:
+    hit_limit = False
+    for page in fetcher:
+        for entry in page:
+            if emitted >= args.limit:
+                hit_limit = True
+                break
             uuid_str = entry.get("uuid")
             source = sources.get(uuid_str, "") if isinstance(uuid_str, str) else ""
             if args.quiet:
@@ -342,26 +352,15 @@ def cmd_file_ls(args: FileListArgs) -> int | None:
                     sha256=entry.get("sha256", ""),
                     source=source,
                 )
-                continue
-            row: dict[str, object] = {}
-            for key, value in entry.items():
-                if isinstance(value, (dict, list)):
-                    continue
-                if key in {"created_at", "updated_at"} and isinstance(value, str):
-                    value = parse_datetime(value)
-                row[key] = value
-            row["source"] = source
-            formatter(**row)
-        emitted += len(files)
-        if len(files) < page_size:
-            return None
-        offset += len(files)
-
-    probe = {**params, "offset": str(offset), "limit": "1"}
-    resp = client.get("/v1/files", params=probe)
-    data = json.loads(resp.read())
-    if data.get("files"):
+            else:
+                formatter(**{**entry, "source": source})
+            emitted += 1
         formatter.flush()
+        if hit_limit:
+            fetcher.stop()
+            break
+
+    if hit_limit:
         logger.warning(
             "Output truncated at --limit=%d files; more results are"
             " available. Raise --limit or narrow with --since/--until.",
diff --git a/contree_cli/cli/images.py b/contree_cli/cli/images.py
index 6e9896a..c142281 100644
--- a/contree_cli/cli/images.py
+++ b/contree_cli/cli/images.py
@@ -18,19 +18,19 @@
 from datetime import datetime
 
 from contree_cli import CLIENT, FORMATTER, ArgumentsProtocol, SetupResult
-from contree_cli.client import ApiError
+from contree_cli.client import ApiError, PaginatedFetcher
+from contree_cli.session import CONTREE_CONCURRENCY
 from contree_cli.types import (
     FLAGS,
     ArgumentsFormatter,
     isoformat_datetime,
-    parse_datetime,
     parse_interval,
     positive_int,
 )
 
 logger = logging.getLogger(__name__)
 
-PAGE_SIZE = 1000
+PAGE_SIZE = PaginatedFetcher.DEFAULT_PAGE_SIZE
 LIMIT_DEFAULT = 3000
 TERMINAL_STATUSES = frozenset({"SUCCESS", "FAILED", "CANCELLED"})
 DOCKER_HUB = "docker.io"
@@ -272,50 +272,30 @@ def cmd_images(args: ImagesArgs) -> None:
     if args.until is not None:
         base_params["until"] = isoformat_datetime(args.until)
 
-    offset = 0
-    emitted = 0
-    while emitted < args.limit:
-        page_size = min(PAGE_SIZE, args.limit - emitted)
-        params = {
-            **base_params,
-            "offset": str(offset),
-            "limit": str(page_size),
-        }
-        resp = client.get("/v1/images", params=params)
-        data = json.loads(resp.read())
-        images = data["images"]
-        if not images:
-            return
-        for image in images:
-            row: dict[str, object] = {}
-            for key, value in image.items():
-                if isinstance(value, (dict, list)):
-                    continue
-                if key == "created_at" and isinstance(value, str):
-                    value = parse_datetime(value)
-                if key == "tag" and value is None:
-                    value = ""
-                row[key] = value
-            formatter(**row)
-        emitted += len(images)
-        if len(images) < page_size:
-            return
-        offset += len(images)
-        if emitted < args.limit:
-            logger.info(
-                "Fetched %d images so far... (press Ctrl+C to break)",
-                emitted,
-            )
+    fetcher = PaginatedFetcher(
+        client,
+        "/v1/images",
+        base_params,
+        lambda body: json.loads(body)["images"],
+        limit=args.limit,
+        concurrency=CONTREE_CONCURRENCY,
+    )
 
-    # Hit the limit. Probe one extra record (offset=emitted, limit=1) to
-    # detect truncation without re-fetching a full page.
-    probe_params = {**base_params, "offset": str(offset), "limit": "1"}
-    resp = client.get("/v1/images", params=probe_params)
-    data = json.loads(resp.read())
-    if data.get("images"):
-        # Flush buffered output (e.g. TableFormatter) before the warning
-        # so the truncation note appears AFTER the listing on screen.
+    emitted = 0
+    hit_limit = False
+    for page in fetcher:
+        for image in page:
+            if emitted >= args.limit:
+                hit_limit = True
+                break
+            formatter(**image)
+            emitted += 1
         formatter.flush()
+        if hit_limit:
+            fetcher.stop()
+            break
+
+    if hit_limit:
         logger.warning(
             "Output truncated at --limit=%d images; more results are"
             " available. Raise --limit or narrow with"
@@ -353,6 +333,7 @@ def _derive_tag(ref: str) -> str:
 def cmd_import(args: ImportArgs) -> int | None:
     client = CLIENT.get()
     formatter = FORMATTER.get()
+    formatter.configure(tail=("error",))
 
     # 1. Build credentials (prompt for password when --username given)
     credentials: dict[str, str] | None = None
@@ -411,10 +392,12 @@ def cmd_import(args: ImportArgs) -> int | None:
                     if op["status"] != "SUCCESS":
                         failed = True
                     formatter(
-                        uuid=op_uuids[idx],
-                        status=op["status"],
-                        registry_url=imports[idx][0],
-                        image=(op.get("result") or {}).get("image", ""),
+                        **{
+                            **op,
+                            "uuid": op_uuids[idx],
+                            "registry_url": imports[idx][0],
+                            "image": (op.get("result") or {}).get("image", ""),
+                        }
                     )
     except KeyboardInterrupt:
         # Cancel ALL operations on Ctrl+C
diff --git a/contree_cli/cli/kill.py b/contree_cli/cli/kill.py
deleted file mode 100644
index 1af8ec0..0000000
--- a/contree_cli/cli/kill.py
+++ /dev/null
@@ -1,96 +0,0 @@
-"""Cancel a running operation.
-
-Sends a DELETE request to stop the specified operation. Use --all to
-cancel every active operation (PENDING, ASSIGNED, EXECUTING) in one go.
-"""
-
-from __future__ import annotations
-
-import argparse
-import json
-import logging
-from dataclasses import dataclass
-
-from contree_cli import CLIENT, ArgumentsProtocol, SetupResult
-from contree_cli.client import ApiError
-from contree_cli.types import FLAGS
-
-logger = logging.getLogger(__name__)
-
-PAGE_SIZE = 100
-ACTIVE_STATUSES = ("PENDING", "ASSIGNED", "EXECUTING")
-
-EPILOG = """\
-for coding agents:
-  mutating command
-  use UUID to cancel one operation, or --all to cancel all active ones
-"""
-
-
-@dataclass(frozen=True)
-class KillArgs(ArgumentsProtocol):
-    uuid: str | None = None
-    all: bool = False
-
-    @classmethod
-    def from_args(cls, ns: argparse.Namespace) -> KillArgs:
-        return cls(uuid=ns.uuid, all=ns.all)
-
-
-def setup_parser(p: argparse.ArgumentParser) -> SetupResult:
-    target = p.add_mutually_exclusive_group(required=True)
-    target.add_argument("uuid", nargs="?", help="Operation UUID")
-    target.add_argument(
-        *FLAGS["all"],
-        action="store_true",
-        help="Cancel all active operations",
-    )
-    return cmd_kill, KillArgs
-
-
-def _list_active(client: object) -> list[str]:
-    """Collect UUIDs of all active operations."""
-    from contree_cli.client import ContreeClient
-
-    assert isinstance(client, ContreeClient)
-    uuids: list[str] = []
-    for status in ACTIVE_STATUSES:
-        offset = 0
-        while True:
-            params = {
-                "status": status,
-                "limit": str(PAGE_SIZE),
-                "offset": str(offset),
-            }
-            resp = client.get("/v1/operations", params=params)
-            operations = json.loads(resp.read())
-            if not operations:
-                break
-            uuids.extend(op["uuid"] for op in operations)
-            if len(operations) < PAGE_SIZE:
-                break
-            offset += len(operations)
-    return uuids
-
-
-def cmd_kill(args: KillArgs) -> int | None:
-    client = CLIENT.get()
-
-    if args.all:
-        uuids = _list_active(client)
-        if not uuids:
-            logger.info("No active operations to cancel")
-            return None
-        failed = 0
-        for uuid in uuids:
-            try:
-                client.delete(f"/v1/operations/{uuid}")
-                logger.info("Cancelled operation %s", uuid)
-            except ApiError as exc:
-                logger.error("Failed to cancel %s: %s", uuid, exc)
-                failed += 1
-        return 1 if failed else None
-
-    client.delete(f"/v1/operations/{args.uuid}")
-    logger.info("Cancelled operation %s", args.uuid)
-    return None
diff --git a/contree_cli/cli/ls.py b/contree_cli/cli/ls.py
index ce9e274..de52c80 100644
--- a/contree_cli/cli/ls.py
+++ b/contree_cli/cli/ls.py
@@ -14,7 +14,6 @@
 import json
 import sys
 from dataclasses import dataclass
-from datetime import datetime, timezone
 from typing import Any, cast
 
 from contree_cli import CLIENT, FORMATTER, SESSION_STORE, ArgumentsProtocol, SetupResult
@@ -51,6 +50,7 @@ def from_args(cls, ns: argparse.Namespace) -> LsArgs:
 def cmd_ls(args: LsArgs) -> None:
     client = CLIENT.get()
     formatter = FORMATTER.get()
+    formatter.configure(tail=("type",))
     store = SESSION_STORE.get()
     image = store.current_image
     uuid = resolve_image(client, image)
@@ -83,11 +83,10 @@ def cmd_ls(args: LsArgs) -> None:
         else:
             ftype = "-"
         formatter(
-            path=f["path"],
-            size=f["size"],
-            mode=format(f["mode"], "o"),
-            owner=f.get("owner", f.get("uid", "")),
-            group=f.get("group", f.get("gid", "")),
-            mtime=datetime.fromtimestamp(f["mtime"], tz=timezone.utc),
-            type=ftype,
+            **{
+                **f,
+                "owner": f.get("owner") or f.get("uid", ""),
+                "group": f.get("group") or f.get("gid", ""),
+                "type": ftype,
+            }
         )
diff --git a/contree_cli/cli/operation.py b/contree_cli/cli/operation.py
index 8a4a9be..32e4399 100644
--- a/contree_cli/cli/operation.py
+++ b/contree_cli/cli/operation.py
@@ -5,7 +5,7 @@
 on in one invocation.
 
 Subcommands:
-  list (ls)             List operations. Same flags as `contree ps`.
+  list (ls)             List operations. ``contree ps`` is an alias.
   show UUID [UUID...]   Show one or more operation results.
   cancel UUID [UUID...] Cancel one or more operations (or --all).
 """
@@ -13,18 +13,55 @@
 from __future__ import annotations
 
 import argparse
+import itertools
+import json
 import logging
+import time
 from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any
 
-from contree_cli import CLIENT, ArgumentsProtocol, SetupResult
-from contree_cli.cli import kill as kill_module
-from contree_cli.cli import ps as ps_module
+from contree_cli import CLIENT, FORMATTER, ArgumentsProtocol, SetupResult
 from contree_cli.cli.show import ShowArgs, cmd_show
-from contree_cli.client import ApiError
-from contree_cli.types import FLAGS
+from contree_cli.client import ApiError, ContreeClient, PaginatedFetcher
+from contree_cli.output import OutputFormatter
+from contree_cli.refs import (
+    history_spec_from_ref,
+    looks_like_history_ref,
+    resolve_operation_uuids,
+)
+from contree_cli.session import CONTREE_CONCURRENCY
+from contree_cli.types import (
+    FLAGS,
+    isoformat_datetime,
+    parse_interval,
+    positive_int,
+)
+
+# Re-exported for backwards compatibility with code/tests that historically
+# pulled these helpers from `contree_cli.cli.operation`.
+__all__ = [
+    "history_spec_from_ref",
+    "looks_like_history_ref",
+    "resolve_operation_uuids",
+]
 
 logger = logging.getLogger(__name__)
 
+PAGE_SIZE = PaginatedFetcher.DEFAULT_PAGE_SIZE
+
+ACTIVE_STATUSES = frozenset({"PENDING", "ASSIGNED", "EXECUTING"})
+TERMINAL_STATUSES = frozenset({"SUCCESS", "FAILED", "CANCELLED"})
+WAIT_TIMEOUT_DEFAULT = 60
+STATUS_CHOICES = {
+    "P": "PENDING",
+    "A": "ASSIGNED",
+    "E": "EXECUTING",
+    "S": "SUCCESS",
+    "F": "FAILED",
+    "C": "CANCELLED",
+}
+
 EPILOG = """\
 for coding agents:
   list/show are read-only; cancel mutates remote state
@@ -33,13 +70,40 @@
 """
 
 
+@dataclass(frozen=True)
+class ListArgs(ArgumentsProtocol):
+    quiet: bool = False
+    all: bool = False
+    show_max: int | None = None
+    status: str | None = None
+    kind: str | None = None
+    since: datetime | None = None
+    until: datetime | None = None
+
+    @classmethod
+    def from_args(cls, ns: argparse.Namespace) -> ListArgs:
+        return cls(
+            quiet=ns.quiet,
+            all=getattr(ns, "all", False),
+            show_max=ns.show_max,
+            status=ns.status,
+            kind=ns.kind,
+            since=ns.since,
+            until=ns.until,
+        )
+
+
 @dataclass(frozen=True)
 class ShowMultiArgs(ArgumentsProtocol):
     uuids: list[str] = field(default_factory=list)
+    raw: bool = False
 
     @classmethod
     def from_args(cls, ns: argparse.Namespace) -> ShowMultiArgs:
-        return cls(uuids=list(ns.uuids))
+        return cls(
+            uuids=resolve_operation_uuids(list(ns.uuids)),
+            raw=getattr(ns, "raw", False),
+        )
 
 
 @dataclass(frozen=True)
@@ -49,7 +113,141 @@ class CancelArgs(ArgumentsProtocol):
 
     @classmethod
     def from_args(cls, ns: argparse.Namespace) -> CancelArgs:
-        return cls(uuids=list(ns.uuids or []), all=ns.all)
+        return cls(uuids=resolve_operation_uuids(list(ns.uuids or [])), all=ns.all)
+
+
+@dataclass(frozen=True)
+class WaitArgs(ArgumentsProtocol):
+    uuids: list[str] = field(default_factory=list)
+    all: bool = False
+    timeout: int = WAIT_TIMEOUT_DEFAULT
+
+    @classmethod
+    def from_args(cls, ns: argparse.Namespace) -> WaitArgs:
+        return cls(
+            uuids=resolve_operation_uuids(list(ns.uuids or [])),
+            all=ns.all,
+            timeout=ns.timeout,
+        )
+
+
+def setup_cancel_parser(p: argparse.ArgumentParser) -> SetupResult:
+    """Configure the cancel parser used by both `operation cancel` and `kill`."""
+    p.add_argument(
+        "uuids",
+        nargs="*",
+        metavar="UUID_OR_REF",
+        help=(
+            "Operations to cancel. Accepts UUIDs and session-history "
+            "references (HEAD, HEAD~N, @, @N, @-N, @+N, :N, bare N)."
+        ),
+    )
+    p.add_argument(
+        *FLAGS["all"],
+        action="store_true",
+        help="Cancel every active operation",
+    )
+    return cmd_cancel, CancelArgs
+
+
+def setup_show_parser(p: argparse.ArgumentParser) -> SetupResult:
+    """Configure the show parser used by both `operation show` and `show`."""
+    p.add_argument(
+        "uuids",
+        nargs="+",
+        metavar="UUID_OR_REF",
+        help=(
+            "Operations to inspect. Accepts UUIDs and session-history "
+            "references: @ or HEAD for the active branch tip, @N for "
+            "an absolute history id, @-N or HEAD~N for N steps back, "
+            "@+N for N steps forward."
+        ),
+    )
+    p.add_argument(
+        *FLAGS["raw"],
+        action="store_true",
+        help=(
+            "Print each operation's full server payload as JSONL "
+            "(one JSON object per line) to stdout, verbatim. Skips "
+            "formatter routing and derived columns; streams cleanly "
+            "into `jq -c`. Useful for debugging or for fields the "
+            "table view omits."
+        ),
+    )
+    return cmd_show_multi, ShowMultiArgs
+
+
+def setup_wait_parser(p: argparse.ArgumentParser) -> SetupResult:
+    """Configure the wait parser for `operation wait`."""
+    p.add_argument(
+        "uuids",
+        nargs="*",
+        metavar="UUID_OR_REF",
+        help=(
+            "Operations to wait for. Accepts UUIDs and session-history "
+            "references (HEAD, HEAD~N, @, @N, @-N, @+N, :N, bare N)."
+        ),
+    )
+    p.add_argument(
+        *FLAGS["all"],
+        action="store_true",
+        help="Wait for every active operation",
+    )
+    p.add_argument(
+        *FLAGS["timeout"],
+        type=positive_int,
+        default=WAIT_TIMEOUT_DEFAULT,
+        help=(
+            "Fail with exit code 1 if not all operations reach a terminal"
+            f" status within this many seconds (default: {WAIT_TIMEOUT_DEFAULT})"
+        ),
+    )
+    return cmd_wait, WaitArgs
+
+
+def setup_list_parser(p: argparse.ArgumentParser) -> SetupResult:
+    """Configure the listing parser used by both `operation ls` and `ps`."""
+    p.add_argument(
+        *FLAGS["quiet"],
+        action="store_true",
+        help="Only show UUIDs, useful for scripting",
+    )
+    p.add_argument(
+        *FLAGS["all"],
+        action="store_true",
+        help="Show all operations (default: active only)",
+    )
+    p.add_argument(
+        *FLAGS["status"],
+        choices=tuple(itertools.chain.from_iterable(STATUS_CHOICES.items())),
+        default=None,
+        help="Filter by status (default: EXECUTING only, unless -a is used)",
+    )
+    p.add_argument(
+        *FLAGS["kind"],
+        choices=("image_import", "instance"),
+        help="Filter by operation kind",
+    )
+    p.add_argument(
+        *FLAGS["since"],
+        type=parse_interval,
+        help=str(parse_interval.__doc__),
+    )
+    p.add_argument(
+        *FLAGS["until"],
+        type=parse_interval,
+        help="Show operations before. " + str(parse_interval.__doc__),
+    )
+    p.add_argument(
+        *FLAGS["show_max"],
+        type=positive_int,
+        default=1000,
+        help=(
+            "Show at most this many operations, useful"
+            " for --all with large history (default: 1000)"
+        ),
+    )
+    return cmd_list, ListArgs
 
 
 def setup_parser(p: argparse.ArgumentParser) -> SetupResult:
@@ -59,14 +257,15 @@ def setup_parser(p: argparse.ArgumentParser) -> SetupResult:
         "list",
         aliases=["ls"],
         help="List operations",
-        description=("List operations. Accepts the same flags as `contree ps`."),
+        description=("List operations. ``contree ps`` is an alias of this command."),
         epilog="for coding agents: read-only command",
     )
-    list_handler, list_loader = ps_module.setup_parser(list_p)
+    list_handler, list_loader = setup_list_parser(list_p)
     list_p.set_defaults(handler=list_handler, load_args=list_loader)
 
     show_p = sub.add_parser(
         "show",
+        aliases=["sh"],
         help="Show one or more operation results",
         description=(
             "Fetch and display the result of each given operation. "
@@ -79,16 +278,12 @@ def setup_parser(p: argparse.ArgumentParser) -> SetupResult:
             "  accepts multiple UUIDs; each rendered as its own row"
         ),
     )
-    show_p.add_argument(
-        "uuids",
-        nargs="+",
-        metavar="UUID",
-        help="One or more operation UUIDs (or @N history references)",
-    )
-    show_p.set_defaults(handler=cmd_show_multi, load_args=ShowMultiArgs)
+    show_handler, show_loader = setup_show_parser(show_p)
+    show_p.set_defaults(handler=show_handler, load_args=show_loader)
 
     cancel_p = sub.add_parser(
         "cancel",
+        aliases=["kill", "k"],
         help="Cancel one or more operations",
         description=(
             "Cancel each given operation. With --all, cancels every active "
@@ -100,27 +295,144 @@ def setup_parser(p: argparse.ArgumentParser) -> SetupResult:
             "  pass UUIDs to cancel specific operations or --all for everything"
         ),
     )
-    cancel_p.add_argument(
-        "uuids",
-        nargs="*",
-        metavar="UUID",
-        help="Operation UUIDs to cancel",
-    )
-    cancel_p.add_argument(
-        *FLAGS["all"],
-        action="store_true",
-        help="Cancel every active operation",
+    cancel_handler, cancel_loader = setup_cancel_parser(cancel_p)
+    cancel_p.set_defaults(handler=cancel_handler, load_args=cancel_loader)
+
+    wait_p = sub.add_parser(
+        "wait",
+        aliases=["w"],
+        help="Wait for operations to reach a terminal status",
+        description=(
+            "Poll the given operations until each reaches a terminal "
+            "status (SUCCESS, FAILED, CANCELLED) and print one row per "
+            "completion. With --all, waits for every currently active "
+            "operation (PENDING, ASSIGNED, EXECUTING)."
+        ),
+        epilog=(
+            "for coding agents:\n"
+            "  read-only command (polls the API; no state mutation)\n"
+            "  fails with exit code 1 if --timeout is hit before all complete\n"
+            "  exit code 1 also when any operation finished non-SUCCESS"
+        ),
     )
-    cancel_p.set_defaults(handler=cmd_cancel, load_args=CancelArgs)
+    wait_handler, wait_loader = setup_wait_parser(wait_p)
+    wait_p.set_defaults(handler=wait_handler, load_args=wait_loader)
 
     return cmd_show_multi, ShowMultiArgs
 
 
+CANCEL_ACTIVE_PAGE_SIZE = 100
+
+
+def extract_exit_code(op: dict[str, Any]) -> int | None:
+    """Pull the sandbox-process exit code out of an operation payload.
+
+    Operation ``status`` reflects orchestration -- whether the API ran the
+    job to completion -- and is left as-is. The sandbox process's own
+    exit code lives in ``metadata.result.state.exit_code`` (newer API
+    shape) or ``result.exit_code`` (older shape); this helper returns
+    whichever is present, or ``None`` if neither.
+    """
+    metadata = op.get("metadata") or {}
+    instance_result = metadata.get("result") if isinstance(metadata, dict) else None
+    state = instance_result.get("state") if isinstance(instance_result, dict) else None
+    raw = state.get("exit_code") if isinstance(state, dict) else None
+    if raw is None:
+        result = op.get("result")
+        raw = result.get("exit_code") if isinstance(result, dict) else None
+    try:
+        return int(raw) if raw is not None else None
+    except (TypeError, ValueError):
+        return None
+
+
+def list_active(client: ContreeClient) -> list[str]:
+    """Collect UUIDs of all active (PENDING/ASSIGNED/EXECUTING) operations."""
+    uuids: list[str] = []
+    for status in ACTIVE_STATUSES:
+        offset = 0
+        while True:
+            params = {
+                "status": status,
+                "limit": str(CANCEL_ACTIVE_PAGE_SIZE),
+                "offset": str(offset),
+            }
+            resp = client.get("/v1/operations", params=params)
+            operations = json.loads(resp.read())
+            if not operations:
+                break
+            uuids.extend(op["uuid"] for op in operations)
+            if len(operations) < CANCEL_ACTIVE_PAGE_SIZE:
+                break
+            offset += len(operations)
+    return uuids
+
+
+def cmd_list(args: ListArgs) -> None:
+    formatter: OutputFormatter = FORMATTER.get()
+    formatter.configure(tail=("error",))
+    client = CLIENT.get()
+
+    status: str | None = None
+    if args.status is not None:
+        if len(args.status) == 1:
+            status = STATUS_CHOICES.get(args.status, args.status)
+        else:
+            status = args.status
+    elif not args.all:
+        status = "EXECUTING"
+
+    base_params: dict[str, str] = {}
+    if status:
+        base_params["status"] = status
+    if args.kind:
+        base_params["kind"] = args.kind
+    if args.since is not None:
+        base_params["since"] = isoformat_datetime(args.since)
+    if args.until is not None:
+        base_params["until"] = isoformat_datetime(args.until)
+
+    limit = args.show_max
+    fetcher = PaginatedFetcher(
+        client,
+        "/v1/operations",
+        base_params,
+        json.loads,
+        limit=limit,
+        concurrency=CONTREE_CONCURRENCY,
+    )
+
+    emitted = 0
+    hit_limit = False
+    for page in fetcher:
+        for op in page:
+            if limit is not None and emitted >= limit:
+                hit_limit = True
+                break
+            if args.quiet:
+                print(op["uuid"])
+            else:
+                formatter(**op)
+            emitted += 1
+        formatter.flush()
+        if hit_limit:
+            fetcher.stop()
+            break
+
+    if hit_limit:
+        logger.warning(
+            "Output truncated at --show-max=%d operations; more results"
+            " are available. Raise --show-max or filter with"
+            " --status/--kind/--since/--until.",
+            limit,
+        )
+
+
 def cmd_show_multi(args: ShowMultiArgs) -> int | None:
     exit_code = 0
     for uuid in args.uuids:
         try:
-            result = cmd_show(ShowArgs(uuid=uuid))
+            result = cmd_show(ShowArgs(uuid=uuid, raw=args.raw))
         except ApiError as exc:
             logger.error("Failed to fetch %s: %s", uuid, exc)
             exit_code = max(exit_code, 1)
@@ -136,7 +448,7 @@ def cmd_cancel(args: CancelArgs) -> int | None:
     if args.all:
         if args.uuids:
             logger.warning("--all overrides explicit UUIDs; cancelling all active")
-        uuids = kill_module._list_active(client)
+        uuids = list_active(client)
         if not uuids:
             logger.info("No active operations to cancel")
             return None
@@ -155,3 +467,88 @@ def cmd_cancel(args: CancelArgs) -> int | None:
             logger.error("Failed to cancel %s: %s", uuid, exc)
             failed += 1
     return 1 if failed else None
+
+
+def cmd_wait(args: WaitArgs) -> int | None:
+    client = CLIENT.get()
+    formatter = FORMATTER.get()
+    # Pin uuid/status/exit_code/timed_out/duration up front for the typical
+    # eyeball scan ("did it finish? how long? what was the exit?"); `error`
+    # stays in the trailing slot.
+    formatter.configure(
+        head=("uuid", "status", "exit_code", "timed_out", "duration"),
+        tail=("error",),
+    )
+
+    if args.all:
+        if args.uuids:
+            logger.warning("--all overrides explicit UUIDs; waiting for all active")
+        uuids = list_active(client)
+        if not uuids:
+            logger.info("No active operations to wait for")
+            return None
+    else:
+        if not args.uuids:
+            logger.error("Provide at least one UUID, or use --all")
+            return 1
+        uuids = list(args.uuids)
+
+    deadline = time.monotonic() + args.timeout
+    pending = set(uuids)
+    exit_status = 0
+    sleep_time = 0.5
+
+    while pending and time.monotonic() < deadline:
+        for uuid in list(pending):
+            resp = client.get(f"/v1/operations/{uuid}")
+            op = json.loads(resp.read())
+            if op.get("status") in TERMINAL_STATUSES:
+                pending.discard(uuid)
+                exit_code = extract_exit_code(op)
+                formatter(
+                    **{
+                        **op,
+                        "exit_code": exit_code,
+                        "timed_out": False,
+                    }
+                )
+                # The operation status reflects orchestration (did the job
+                # run?), not what the sandbox process did with its exit
+                # code. Both feed the CLI's own exit status independently:
+                # non-SUCCESS ops fail the wait; SUCCESS ops with non-zero
+                # exit codes propagate that code so `op wait && next` does
+                # the right thing.
+                if op.get("status") != "SUCCESS":
+                    exit_status = max(exit_status, 1)
+                if exit_code is not None and exit_code != 0:
+                    exit_status = max(exit_status, exit_code)
+        formatter.flush()
+        if not pending:
+            break
+        remaining = deadline - time.monotonic()
+        if remaining <= 0:
+            break
+        time.sleep(min(sleep_time, max(remaining, 0.0)))
+        sleep_time = min(5.0, sleep_time * 2)
+
+    # Anything still pending after the deadline timed out; emit one last
+    # row per UUID with its observed non-terminal status so the user sees
+    # what state each operation was stuck in.
+    for uuid in sorted(pending):
+        try:
+            resp = client.get(f"/v1/operations/{uuid}")
+            op = json.loads(resp.read())
+        except ApiError as exc:
+            logger.error("Failed to fetch %s: %s", uuid, exc)
+            continue
+        formatter(**{**op, "timed_out": True})
+    if pending:
+        formatter.flush()
+        logger.warning(
+            "Timeout: %d operation(s) did not finish in %ds",
+            len(pending),
+            args.timeout,
+        )
+        exit_status = max(exit_status, 1)
+
+    return exit_status or None
diff --git a/contree_cli/cli/ps.py b/contree_cli/cli/ps.py
deleted file mode 100644
index 76a54ba..0000000
--- a/contree_cli/cli/ps.py
+++ /dev/null
@@ -1,220 +0,0 @@
-"""List operations (running and completed instances, image imports).
-
-By default shows only active operations (PENDING, ASSIGNED, EXECUTING).
-Use -a/--all to include completed ones, or -S/--status to filter by a
-specific status. Use -K/--kind to filter by operation type.
-
-Use -q/--quiet to print only UUIDs, useful for scripting.
-"""
-
-from __future__ import annotations
-
-import argparse
-import itertools
-import json
-import logging
-from dataclasses import dataclass
-from datetime import datetime, timedelta
-from typing import Any
-
-from contree_cli import CLIENT, FORMATTER, ArgumentsProtocol, SetupResult
-from contree_cli.output import OutputFormatter
-from contree_cli.types import (
-    FLAGS,
-    isoformat_datetime,
-    parse_datetime,
-    parse_interval,
-    positive_int,
-)
-
-logger = logging.getLogger(__name__)
-
-PAGE_SIZE = 1000
-ACTIVE_STATUSES = frozenset({"PENDING", "ASSIGNED", "EXECUTING"})
-
-EPILOG = """\
-for coding agents:
-  read-only command
-  default view is active operations only; use --all for full history
-  use -q for UUID-only output in scripts
-"""
-
-
-@dataclass(frozen=True)
-class PsArgs(ArgumentsProtocol):
-    quiet: bool = False
-    all: bool = False
-    show_max: int | None = None
-    status: str | None = None
-    kind: str | None = None
-    since: datetime | None = None
-    until: datetime | None = None
-
-    @classmethod
-    def from_args(cls, ns: argparse.Namespace) -> PsArgs:
-        return cls(
-            quiet=ns.quiet,
-            all=getattr(ns, "all", False),
-            show_max=ns.show_max,
-            status=ns.status,
-            kind=ns.kind,
-            since=ns.since,
-            until=ns.until,
-        )
-
-
-STATUS_CHOICES = {
-    "P": "PENDING",
-    "A": "ASSIGNED",
-    "E": "EXECUTING",
-    "S": "SUCCESS",
-    "F": "FAILED",
-    "C": "CANCELLED",
-}
-
-
-def setup_parser(p: argparse.ArgumentParser) -> SetupResult:
-    p.add_argument(
-        *FLAGS["quiet"],
-        action="store_true",
-        help="Only show UUIDs, useful for scripting",
-    )
-    p.add_argument(
-        *FLAGS["all"],
-        action="store_true",
-        help="Show all operations (default: active only)",
-    )
-    p.add_argument(
-        *FLAGS["status"],
-        choices=tuple(itertools.chain.from_iterable(STATUS_CHOICES.items())),
-        default=None,
-        help="Filter by status (default: EXECUTING only, unless -a is used)",
-    )
-    p.add_argument(
-        *FLAGS["kind"],
-        choices=("image_import", "instance"),
-        help="Filter by operation kind",
-    )
-    p.add_argument(
-        *FLAGS["since"],
-        type=parse_interval,
-        help=str(parse_interval.__doc__),
-    )
-    p.add_argument(
-        *FLAGS["until"],
-        type=parse_interval,
-        help="Show operations before. " + str(parse_interval.__doc__),
-    )
-
-    p.add_argument(
-        *FLAGS["show_max"],
-        type=positive_int,
-        default=1000,
-        help=(
-            "Show at most this many operations, useful"
-            " for --all with large history (default: 1000)"
-        ),
-    )
-
-    return cmd_ps, PsArgs
-
-
-DATETIME_FIELDS = frozenset({"created_at", "started_at", "finished_at", "updated_at"})
-
-
-def transform_field(key: str, value: Any) -> Any:
-    """Light-touch typing for known fields, pass-through for everything else."""
-    if value is None:
-        return "" if key == "error" else None
-    if key in DATETIME_FIELDS:
-        return parse_datetime(value)
-    if key == "duration":
-        return timedelta(seconds=value)
-    return value
-
-
-def emit_op(formatter: OutputFormatter, op: dict[str, Any], *, quiet: bool) -> None:
-    if quiet:
-        print(op["uuid"])
-        return
-    row = {
-        key: transform_field(key, value)
-        for key, value in op.items()
-        if key != "error" and not isinstance(value, (dict, list))
-    }
-    row["error"] = transform_field("error", op.get("error"))
-    formatter(**row)
-
-
-def cmd_ps(args: PsArgs) -> None:
-    formatter: OutputFormatter = FORMATTER.get()
-    client = CLIENT.get()
-
-    status: str | None = None
-    if args.status is not None:
-        if len(args.status) == 1:
-            status = STATUS_CHOICES.get(args.status, args.status)
-        else:
-            status = args.status
-    elif not args.all:
-        status = "EXECUTING"
-
-    base_params: dict[str, str] = {}
-    if status:
-        base_params["status"] = status
-    if args.kind:
-        base_params["kind"] = args.kind
-    if args.since is not None:
-        base_params["since"] = isoformat_datetime(args.since)
-    if args.until is not None:
-        base_params["until"] = isoformat_datetime(args.until)
-
-    limit = args.show_max
-    offset = 0
-    emitted = 0
-    hit_limit = False
-
-    while limit is None or emitted < limit:
-        page_size = PAGE_SIZE if limit is None else min(PAGE_SIZE, limit - emitted)
-        params = {
-            **base_params,
-            "offset": str(offset),
-            "limit": str(page_size),
-        }
-        resp = client.get("/v1/operations", params=params)
-        operations = json.loads(resp.read())
-        if not operations:
-            return
-        for op in operations:
-            if limit is not None and emitted >= limit:
-                hit_limit = True
-                break
-            emit_op(formatter, op, quiet=args.quiet)
-            emitted += 1
-        if hit_limit:
-            break
-        if len(operations) < page_size:
-            return
-        offset += len(operations)
-        if limit is None or emitted < limit:
-            logger.info(
-                "Fetched %d operations so far... (press Ctrl+C to break)",
-                emitted,
-            )
-
-    if limit is None:
-        return
-
-    # Hit the limit. Probe one extra record (offset=emitted, limit=1) to
-    # detect truncation without re-fetching a full page.
-    probe_params = {**base_params, "offset": str(emitted), "limit": "1"}
-    resp = client.get("/v1/operations", params=probe_params)
-    operations = json.loads(resp.read())
-    if operations:
-        formatter.flush()
-        logger.warning(
-            "Output truncated at --show-max=%d operations; more results"
-            " are available. Raise --show-max or filter with"
-            " --status/--kind/--since/--until.",
-            limit,
-        )
diff --git a/contree_cli/cli/run.py b/contree_cli/cli/run.py
index 0bec97a..74e74d4 100644
--- a/contree_cli/cli/run.py
+++ b/contree_cli/cli/run.py
@@ -61,7 +61,6 @@
 import time
 import uuid
 from dataclasses import dataclass, field
-from datetime import timedelta
 from multiprocessing.pool import ThreadPool
 
 from contree_cli import CLIENT, FORMATTER, SESSION_STORE, ArgumentsProtocol, SetupResult
@@ -511,12 +510,6 @@ def _display_operation(
     formatter: OutputFormatter,
 ) -> None:
     """Display an operation result using the given formatter."""
-    duration_raw = op.get("duration")
-    duration = (
-        timedelta(seconds=duration_raw)  # type: ignore[arg-type]
-        if duration_raw is not None
-        else None
-    )
     result = op.get("result") or {}
     assert isinstance(result, dict)
     metadata = op.get("metadata") or {}
@@ -531,17 +524,16 @@ def _display_operation(
         exit_code = state.get("exit_code")
 
     if formatter.STREAM:
+        formatter.configure(tail=("error",))
         formatter(
-            uuid=op["uuid"],
-            kind=op.get("kind", ""),
-            status=op["status"],
-            duration=duration,
-            exit_code=exit_code,
-            error=op.get("error") or "",
-            image=result.get("image") or "",
-            tag=result.get("tag") or "",
-            stdout=decode_stream(instance_result.get("stdout")),
-            stderr=decode_stream(instance_result.get("stderr")),
+            **{
+                **op,
+                "exit_code": exit_code,
+                "image": result.get("image") or "",
+                "tag": result.get("tag") or "",
+                "stdout": decode_stream(instance_result.get("stdout")),
+                "stderr": decode_stream(instance_result.get("stderr")),
+            }
         )
         formatter.flush()
         return
@@ -691,7 +683,8 @@ def _norm(item: object) -> dict[str, object]:
 
     # 4. Detach mode - exit immediately
     if args.detach:
-        formatter(uuid=op_uuid, status=op.get("status", "PENDING"))
+        formatter.configure(tail=("error",))
+        formatter(**{"uuid": op_uuid, "status": "PENDING", **op})
         formatter.flush()
         return None
 
diff --git a/contree_cli/cli/session.py b/contree_cli/cli/session.py
index cdc1e23..2440042 100644
--- a/contree_cli/cli/session.py
+++ b/contree_cli/cli/session.py
@@ -25,6 +25,7 @@
 
 from contree_cli import CLIENT, FORMATTER, SESSION_STORE, ArgumentsProtocol, SetupResult
 from contree_cli.output import DefaultFormatter
+from contree_cli.refs import resolve_operation_uuids
 from contree_cli.types import FLAGS, parse_datetime, parse_interval
 
 logger = logging.getLogger(__name__)
@@ -135,7 +136,7 @@ class WaitArgs(ArgumentsProtocol):
 
     @classmethod
     def from_args(cls, ns: argparse.Namespace) -> WaitArgs:
-        return cls(op_ids=ns.op_ids)
+        return cls(op_ids=resolve_operation_uuids(list(ns.op_ids)))
 
 
 @dataclass(frozen=True)
@@ -311,17 +312,34 @@ def setup_parser(p: argparse.ArgumentParser) -> SetupResult:
 
     wait_p = sub.add_parser(
         "wait",
-        help="Wait for operations to reach terminal state",
+        help="Drain detached ops in the current session",
         description=(
-            "Wait for specific operations (by UUID). Without arguments, waits for "
-            "active operations of the current session only."
+            "Drain detached operations in the current session. With no "
+            "arguments, reads the session's pending-ops cache, polls each "
+            "to a terminal status, and advances the active branch to each "
+            "non-disposable result image (recording `disposable-<uuid>` "
+            "branches for disposable runs). With explicit UUIDs, this "
+            "command degrades to a plain polling loop: it prints completion "
+            "rows but does NOT touch the active branch, because the pending "
+            "metadata is not loaded for explicit UUIDs."
+        ),
+        epilog=(
+            "for coding agents:\n"
+            "  no-arg form mutates session history (advances active branch)\n"
+            "  UUID form is a pure polling observer\n"
+            "  if you need result images from explicit UUIDs, use\n"
+            "    `op show UUID | jq -r .image` and `contree use`"
         ),
-        epilog="for coding agents: read-only command",
     )
     wait_p.add_argument(
         "op_ids",
         nargs="*",
-        help="Operation UUIDs to wait for (default: all active operations)",
+        metavar="UUID_OR_REF",
+        help=(
+            "Operations to wait for (default: all active in this session). "
+            "Accepts UUIDs and session-history references "
+            "(HEAD, HEAD~N, @, @N, @-N, @+N, :N, bare N)."
+        ),
     )
     wait_p.set_defaults(handler=cmd_wait, load_args=WaitArgs)
 
@@ -598,6 +616,7 @@ def _parse_filter(value: datetime | str) -> datetime:
 def cmd_wait(args: WaitArgs) -> int | None:
     client = CLIENT.get()
     formatter = FORMATTER.get()
+    formatter.configure(tail=("error",))
 
     store = SESSION_STORE.get()
     op_ids = list(args.op_ids)
@@ -665,11 +684,6 @@ def cmd_wait(args: WaitArgs) -> int | None:
             op = json.loads(resp.read())
             status = op.get("status", "")
             if status in WAIT_TERMINAL_STATUSES:
-                duration = (
-                    timedelta(seconds=op["duration"])
-                    if op.get("duration") is not None
-                    else None
-                )
                 metadata = op.get("metadata") or {}
                 instance_result = metadata.get("result") or {}
                 state = instance_result.get("state") or {}
@@ -692,11 +706,14 @@ def cmd_wait(args: WaitArgs) -> int | None:
                     else str(op.get("title") or "")
                 )
 
-                effective_status = status
-                if status == "SUCCESS" and exit_code not in (None, 0):
-                    effective_status = "FAILED"
-
-                if effective_status == "SUCCESS" and op.get("kind") == "instance":
+                # Branch advancement requires both an API-level success AND
+                # a zero sandbox exit: a process that exited non-zero left
+                # the image in a state we should not silently roll forward
+                # to (matches non-detached `run` semantics). The displayed
+                # `status` is the server's word verbatim -- exit_code lives
+                # in its own column.
+                run_succeeded = status == "SUCCESS" and exit_code in (None, 0)
+                if run_succeeded and op.get("kind") == "instance":
                     result = op.get("result") or {}
                     new_image = result.get("image")
                     if meta and meta.get("disposable", False):
@@ -709,16 +726,15 @@ def cmd_wait(args: WaitArgs) -> int | None:
                             operation_uuid=op_id,
                         )
                 formatter(
-                    uuid=op_id,
-                    status=effective_status,
-                    kind=op.get("kind", ""),
-                    duration=duration,
-                    exit_code=exit_code,
-                    title=title,
-                    error=op.get("error") or "",
+                    **{
+                        **op,
+                        "uuid": op_id,
+                        "exit_code": exit_code,
+                        "title": title,
+                    }
                 )
                 failure_exit = 0
-                if effective_status != "SUCCESS":
+                if status != "SUCCESS":
                     failure_exit = 1
                 if exit_code is not None and exit_code != 0:
                     failure_exit = max(failure_exit, exit_code)
diff --git a/contree_cli/cli/show.py b/contree_cli/cli/show.py
index ef00d4f..4b59aa2 100644
--- a/contree_cli/cli/show.py
+++ b/contree_cli/cli/show.py
@@ -1,9 +1,11 @@
-"""Show the result of an operation.
-
-Fetches the operation by UUID and displays its status, duration, exit
-code, result image, and captured stdout/stderr. Terminal operations
-(SUCCESS, FAILED, CANCELLED) are cached locally to avoid redundant API
-calls.
+"""Per-UUID inspect handler used by `contree operation show` (and its
+top-level shortcut ``contree show``).
+
+The top-level ``show`` command is registered against
+:func:`contree_cli.cli.operation.setup_show_parser`; that handler loops
+over each UUID and calls :func:`cmd_show` here. This module owns the
+single-UUID logic: ``@N`` history-reference resolution, terminal
+operation caching, and stdout/stderr decoding.
 """
 
 from __future__ import annotations
@@ -13,67 +15,46 @@
 import logging
 import sys
 from dataclasses import dataclass
-from datetime import timedelta
 from typing import Any, cast
 
-from contree_cli import CLIENT, FORMATTER, SESSION_STORE, ArgumentsProtocol, SetupResult
+from contree_cli import CLIENT, FORMATTER, SESSION_STORE, ArgumentsProtocol
 from contree_cli.client import decode_stream
 from contree_cli.output import DefaultFormatter, JSONFormatter, JSONPrettyFormatter
-from contree_cli.session import SessionStore
+from contree_cli.refs import history_spec_from_ref, resolve_operation_uuid
 
-logger = logging.getLogger(__name__)
+# Re-exported for backwards compatibility with anything that historically
+# imported these helpers from `contree_cli.cli.show`.
+__all__ = [
+    "ShowArgs",
+    "cmd_show",
+    "history_spec_from_ref",
+    "resolve_operation_uuid",
+]
 
-EPILOG = """\
-for coding agents:
-  read-only command
-  terminal operation states are cached locally
-  use -f json for structured metadata + decoded stdout/stderr fields
-"""
+logger = logging.getLogger(__name__)
 
 
 @dataclass(frozen=True)
 class ShowArgs(ArgumentsProtocol):
     uuid: str
+    raw: bool = False
 
     @classmethod
     def from_args(cls, ns: argparse.Namespace) -> ShowArgs:
-        return cls(uuid=ns.uuid)
-
-
-def setup_parser(p: argparse.ArgumentParser) -> SetupResult:
-    p.add_argument("uuid", help="Operation UUID or session entry (e.g., @12)")
-    return cmd_show, ShowArgs
+        return cls(uuid=ns.uuid, raw=getattr(ns, "raw", False))
 
 
-def _resolve_operation_uuid(raw: str, store: SessionStore) -> str:
-    # Support @N, :N, or bare numeric history IDs for current session
-    prefix_stripped = raw[1:] if raw.startswith(("@", ":")) else raw
-    if prefix_stripped.isdigit():
-        session = store.session
-        if session is None:
-            raise ValueError(
-                "No active session; cannot resolve history entry. "
-                "Run `contree use` first.",
-            )
-        entry_id = int(prefix_stripped)
-        entry = store._get_history_entry(entry_id)
-        op_uuid = entry.operation_uuid
-        if not op_uuid:
-            raise ValueError(f"History entry {entry_id} has no operation UUID")
-        return op_uuid
-    return raw
-
-
-_TERMINAL = frozenset({"SUCCESS", "FAILED", "CANCELLED"})
+TERMINAL = frozenset({"SUCCESS", "FAILED", "CANCELLED"})
 
 
 def cmd_show(args: ShowArgs) -> int | None:
     client = CLIENT.get()
     formatter = FORMATTER.get()
+    formatter.configure(tail=("error",))
     store = SESSION_STORE.get()
 
     try:
-        op_uuid = _resolve_operation_uuid(args.uuid, store)
+        op_uuid = resolve_operation_uuid(args.uuid, store)
     except ValueError as exc:
         print(str(exc), file=sys.stderr)
         return 1
@@ -85,12 +66,18 @@ def cmd_show(args: ShowArgs) -> int | None:
     else:
         resp = client.get(f"/v1/operations/{op_uuid}")
         op = json.loads(resp.read())
-        if op.get("status") in _TERMINAL:
+        if op.get("status") in TERMINAL:
             store.cache[cache_key] = op
 
-    duration = (
-        timedelta(seconds=op["duration"]) if op.get("duration") is not None else None
-    )
+    if args.raw:
+        # Pass through the server payload verbatim, one operation per
+        # line (JSONL), so multi-UUID `op show --raw` streams cleanly
+        # into `jq -c`, `awk`, etc. Skips formatter routing, derived
+        # columns, and stdout/stderr decoding -- the user asked for raw.
+        json.dump(op, sys.stdout)
+        sys.stdout.write("\n")
+        return None
+
     result = op.get("result") or {}
     metadata = op.get("metadata") or {}
     instance_result = metadata.get("result") or {}
@@ -100,19 +87,13 @@ def cmd_show(args: ShowArgs) -> int | None:
     if state:
         exit_code = state.get("exit_code")
 
-    status = op.get("status", "")
-    if status == "SUCCESS" and exit_code not in (None, 0):
-        status = "FAILED"
-
     formatter(
-        uuid=op["uuid"],
-        kind=op["kind"],
-        status=status,
-        duration=duration,
-        exit_code=exit_code,
-        error=op.get("error") or "",
-        image=result.get("image") or "",
-        tag=result.get("tag") or "",
+        **{
+            **op,
+            "exit_code": exit_code,
+            "image": result.get("image") or "",
+            "tag": result.get("tag") or "",
+        }
     )
     formatter.flush()
 
diff --git a/contree_cli/cli/skill.py b/contree_cli/cli/skill.py
index 3137ef1..dff0742 100644
--- a/contree_cli/cli/skill.py
+++ b/contree_cli/cli/skill.py
@@ -122,7 +122,9 @@ def spec_arg(parser: argparse.ArgumentParser) -> None:
     spec_arg(remove_p)
     remove_p.set_defaults(handler=cmd_skill_remove, load_args=SkillRemoveArgs)
 
-    upgrade_p = sub.add_parser("upgrade", help="Upgrade installed skill files")
+    upgrade_p = sub.add_parser(
+        "upgrade", aliases=["u", "update"], help="Upgrade installed skill files"
+    )
     upgrade_p.set_defaults(handler=cmd_skill_upgrade, load_args=SkillUpgradeArgs)
     spec_arg(upgrade_p)
 
diff --git a/contree_cli/cli/tag.py b/contree_cli/cli/tag.py
index 49fd532..872cb37 100644
--- a/contree_cli/cli/tag.py
+++ b/contree_cli/cli/tag.py
@@ -6,7 +6,7 @@
 With one argument, tags the current session image.
 With two arguments, the first is the image reference and the second is the tag.
 
-Use -d/--delete to remove a tag instead of assigning one.
+Use -U/--delete/--rm to remove a tag instead of assigning one.
 """
 
 from __future__ import annotations
@@ -26,7 +26,7 @@
   contree tag python-dev:latest            # tag current session image
   contree tag UUID python-dev:latest       # tag specific image by UUID
   contree tag tag:alpine:latest my-alpine  # re-tag by reference
-  contree tag -d UUID my-tag               # remove a tag
+  contree tag -U UUID my-tag               # remove a tag (or --delete/--rm)
 
 for coding agents:
   mutating command
diff --git a/contree_cli/client.py b/contree_cli/client.py
index c74d841..e49dd2f 100644
--- a/contree_cli/client.py
+++ b/contree_cli/client.py
@@ -1,17 +1,22 @@
 from __future__ import annotations
 
 import base64
+import collections
+import contextlib
 import http.client
 import io
 import json
 import logging
 import platform
+import socket
 import sys
+import threading
 import time
 from abc import ABC, abstractmethod
-from collections.abc import Iterable, Iterator
+from collections.abc import Callable, Iterable, Iterator
+from concurrent.futures import Future, ThreadPoolExecutor
 from importlib.metadata import PackageNotFoundError, version
-from typing import IO, cast
+from typing import IO, Any, cast
 from urllib.parse import urlencode, urlsplit
 
 from contree_cli.config import AuthType, ConfigProfile
@@ -20,6 +25,16 @@
 
 RETRY_DELAYS = (1, 2, 4, 5, 10, 10, 10)
 
+# Socket-level / connection-level errors that warrant a retry. DNS hiccups
+# (gaierror), refused/reset connections, and broken HTTP framing are all
+# transient — the server may come back. TimeoutError already retried below.
+RETRYABLE_NETWORK_ERRORS: tuple[type[BaseException], ...] = (
+    socket.gaierror,
+    ConnectionError,
+    http.client.HTTPException,
+    OSError,
+)
+
 
 def cli_version() -> str:
     try:
@@ -214,6 +229,7 @@ def request(
 
         full_path = self._prefix + path
         last_error: ApiError | None = None
+        last_network_error: BaseException | None = None
         attempts = len(RETRY_DELAYS) + 1
 
         log.debug(
@@ -225,13 +241,21 @@ def request(
         )
 
         for attempt in range(attempts):
-            if last_error is not None:
+            if last_error is not None or last_network_error is not None:
                 delay = RETRY_DELAYS[attempt - 1]
-                log.warning(
-                    "Server error %d, retrying in %ds…",
-                    last_error.status,
-                    delay,
-                )
+                if last_network_error is not None:
+                    log.warning(
+                        "Network error (%s), retrying in %ds…",
+                        type(last_network_error).__name__,
+                        delay,
+                    )
+                else:
+                    assert last_error is not None
+                    log.warning(
+                        "Server error %d, retrying in %ds…",
+                        last_error.status,
+                        delay,
+                    )
                 time.sleep(delay)
 
             if attempt > 0 and hasattr(body, "seek"):
@@ -249,6 +273,18 @@ def request(
                 resp = conn.getresponse()
             except TimeoutError as exc:
                 raise TimeoutError(f"Request timed out: {method} {full_path}") from exc
+            except http.client.InvalidURL:
+                # Malformed URL is a permanent caller-side error — retrying
+                # would just spin through the back-off ladder for nothing.
+                raise
+            except RETRYABLE_NETWORK_ERRORS as exc:
+                last_network_error = exc
+                last_error = None
+                continue
+
+            # Successful round-trip clears the network-error trail so the
+            # final raise below doesn't pick up stale failure context.
+            last_network_error = None
 
             if 200 <= resp.status < 300:
                 log.debug(
@@ -291,6 +327,8 @@ def request(
 
             raise error
 
+        if last_network_error is not None:
+            raise last_network_error
         assert last_error is not None
         raise last_error
 
@@ -511,3 +549,109 @@ def decode_stream(stream: dict[str, object] | None) -> str:
             errors="replace",
         )
     return value
+
+
+class PaginatedFetcher:
+    """Iterate paginated API endpoint pages concurrently.
+
+    Issues GET ``path`` requests with ``offset``/``limit`` query params,
+    pulling pages in parallel via :class:`ThreadPool.imap` (results
+    delivered in offset order). Stops on the first empty or short
+    (``< page_size``) page or after ``max_pages`` requests. May
+    over-fetch by up to ``concurrency - 1`` pages past the actual end
+    of data; the trade-off buys roughly ``concurrency``-fold latency
+    reduction on multi-page listings.
+
+    Callers that hit their own record limit mid-iteration should call
+    :meth:`stop` to short-circuit pending workers — fetches that have
+    not yet issued their HTTP request will skip it and return an empty
+    page, ending iteration.
+
+    :attr:`exhausted` is ``True`` after iteration finishes only if the
+    helper saw the end of data (short/empty page) — including via the
+    ``stop`` signal. It stays ``False`` if it stopped because
+    ``max_pages`` was reached without seeing the end.
+    """
+
+    DEFAULT_PAGE_SIZE = 1000
+    UNLIMITED_MAX_PAGES = 1000  # 1M-record safety cap when limit=None.
+
+    def __init__(
+        self,
+        client: ContreeClient,
+        path: str,
+        params: dict[str, str],
+        extract: Callable[[bytes], list[dict[str, Any]]],
+        *,
+        limit: int | None,
+        page_size: int | None = None,
+        concurrency: int = 8,
+    ) -> None:
+        """Configure a paginated fetch.
+
+        ``limit`` is the caller's record budget (``--limit``, ``--show-max``).
+        ``None`` means "fetch everything up to ``UNLIMITED_MAX_PAGES * page_size``
+        records". When set, ``page_size`` is capped at ``limit + 1`` so a
+        small budget like ``--limit 5`` doesn't pull a 1000-row page just
+        to discard 995, and ``max_pages`` is sized to cover ``limit + 1``
+        records (the extra record lets callers detect "more available"
+        and warn). ``page_size`` defaults to :attr:`DEFAULT_PAGE_SIZE`.
+        """
+        self.client = client
+        self.path = path
+        self.params = params
+        self.extract = extract
+        self.concurrency = concurrency
+        self.exhausted = False
+        self._stop = threading.Event()
+
+        default_page_size = page_size or self.DEFAULT_PAGE_SIZE
+        if limit is None:
+            self.page_size = default_page_size
+            self.max_pages = self.UNLIMITED_MAX_PAGES
+        else:
+            # Fetch one extra record so callers can detect "more results
+            # exist past --limit" and emit a warning.
+            self.page_size = min(default_page_size, limit + 1)
+            self.max_pages = (limit + self.page_size) // self.page_size + 1
+
+    def stop(self) -> None:
+        """Signal that the caller has seen enough; skip pending fetches."""
+        self._stop.set()
+
+    def _fetch(self, offset: int) -> list[dict[str, Any]]:
+        if self._stop.is_set():
+            return []
+        page_params = {
+            **self.params,
+            "offset": str(offset),
+            "limit": str(self.page_size),
+        }
+        resp = self.client.get(self.path, params=page_params)
+        return self.extract(resp.read())
+
+    def __iter__(self) -> Iterator[list[dict[str, Any]]]:
+        offsets = iter(i * self.page_size for i in range(self.max_pages))
+        pending: collections.deque[Future[list[dict[str, Any]]]] = collections.deque()
+        with ThreadPoolExecutor(max_workers=self.concurrency) as pool:
+            # Prime the pool with up to `concurrency` in-flight fetches.
+            for _ in range(self.concurrency):
+                with contextlib.suppress(StopIteration):
+                    pending.append(pool.submit(self._fetch, next(offsets)))
+
+            while pending:
+                page = pending.popleft().result()
+                if not page:
+                    self.exhausted = True
+                    self._stop.set()
+                    continue
+                if len(page) < self.page_size:
+                    # Mark exhausted before yielding so callers that break
+                    # out of the loop still see the correct end-of-data flag.
+                    self.exhausted = True
+                    self._stop.set()
+                yield page
+                # Refill so in-flight count stays at `concurrency`.
+                if not self._stop.is_set():
+                    with contextlib.suppress(StopIteration):
+                        pending.append(pool.submit(self._fetch, next(offsets)))
diff --git a/contree_cli/manual.md b/contree_cli/manual.md
index 7584ad6..b96a9e8 100644
--- a/contree_cli/manual.md
+++ b/contree_cli/manual.md
@@ -38,7 +38,7 @@ Key commands:
   contree session show            view history DAG
   contree session branch <name>   create branch
   contree session checkout <name> switch branch
-  contree session rollback [N]    undo N steps
+  contree session rollback [N]    jump to history id N (absolute); use -N for steps back
   contree session delete <key>    delete session
 
 Branch workflow:
@@ -49,9 +49,15 @@ Branch workflow:
   contree session branch -d experiment   clean up
 
 Rollback:
-  contree session rollback 1             undo last run
+  contree session rollback               back one entry (default)
+  contree session rollback -- -3         back three entries (note `--`)
+  contree session rollback +1            forward one entry
+  contree session rollback 42            absolute jump to history id 42
   contree session show                   inspect before rollback
 
+  WARNING: a bare positive N is an ABSOLUTE history id, not "back N steps".
+  Use `--` plus a negative N for relative back-navigation.
+
 More: contree session --help
 
 Interactive shell
@@ -153,7 +159,13 @@ Detached workflow:
   contree run -d -- long-task
   contree ps                             check status
   contree show UUID                      view result
-  contree session wait                   block until done
+  contree op wait UUID                   block until terminal
+
+Fan-out + join (use -f json BEFORE run so jq sees JSON):
+  A=$(contree -f json run -d -- make a | jq -r .uuid)
+  B=$(contree -f json run -d -- make b | jq -r .uuid)
+  contree op wait "$A" "$B"              wait for both; one row each
+  contree op wait --all --timeout 600    or block on every active op
 
 More: contree run --help
 
@@ -182,9 +194,20 @@ Operations
   contree show UUID          show operation result
   contree kill UUID          cancel operation
   contree kill -a            cancel all active
-  contree session wait       wait for active operations
+  contree op wait UUID...    wait for given operations to finish
+  contree op wait --all      wait for every active operation
+  contree op wait --timeout SECONDS   bound the wait (default 60s)
+
+Every positional shown as UUID_OR_REF in --help (op show, op cancel,
+op wait, top-level show/kill, session wait) also accepts session-
+history references against the active session:
+  HEAD / @ / :          latest op on the active branch tip
+  HEAD~N / @-N          N steps back from the tip
+  HEAD~                 shorthand for HEAD~1
+  @+N                   N steps forward (latest child)
+  @N / :N / bare N      absolute history id
 
-More: contree ps --help, contree show --help
+More: contree ps --help, contree op wait --help
 
 Profiles
 ========
@@ -238,23 +261,29 @@ All commands
   run [-- CMD]            Spawn sandbox instance (aliases: r)
   images                  List/import images (aliases: i, img)
   tag [IMAGE] TAG         Tag image (aliases: t)
-  ps                      List operations
-  kill UUID               Cancel operation
+  ps                      List operations (shortcut for `operation ls`)
+  kill UUID [UUID...]     Cancel operations (shortcut for `operation cancel`)
   show UUID               Show operation result
+  operation list          List operations (aliases: ls)
+  operation show UUID...  Show multiple operation results (aliases: sh)
+  operation wait UUID...  Block until operations finish (aliases: w);
+                          `--all` waits for every active op; `--timeout
+                          SECONDS` fails if not all complete (default: 60)
+  operation cancel UUID.. Cancel multiple operations (aliases: kill, k); `--all` cancels every active
   ls [PATH]               List files in image (no VM)
   cat PATH                Show file content (no VM)
   cp PATH DEST            Download file from image
   cd [PATH]               Change session working directory
-  env [KEY=VALUE ...]     Session env vars (-d to unset)
+  env [KEY=VALUE ...]     Session env vars (-U to unset)
   file edit PATH          Edit remote file via $EDITOR
   file cp SRC DEST        Stage local file for next run
   session list            List sessions (aliases: ls)
   session branch [NAME]   Create/list branches (aliases: br)
   session checkout BRANCH Switch branch (aliases: co)
-  session rollback [N]    Undo N steps (aliases: rb)
+  session rollback [N]    Jump to history id N (absolute); -N steps back (aliases: rb)
   session show            Show history DAG
   session delete KEY      Delete session (aliases: rm, del)
-  session wait [OPS]      Wait for operations
+  session wait [OPS]      Drain detached ops; no-arg form advances branch, UUID form polls only
   auth                    Save token
   auth ls                 List profiles (aliases: profiles)
   auth switch NAME        Switch profile
diff --git a/contree_cli/output.py b/contree_cli/output.py
index 6ca656c..6b93aaf 100644
--- a/contree_cli/output.py
+++ b/contree_cli/output.py
@@ -7,14 +7,74 @@
 import logging
 import shutil
 import sys
-from datetime import datetime, timedelta
+import threading
+from collections import OrderedDict
+from datetime import datetime, timedelta, timezone
 from types import MappingProxyType
+from typing import Any
 
-from contree_cli.types import STDOUT_IS_A_TTY, Colors
+from contree_cli.types import STDOUT_IS_A_TTY, Colors, parse_datetime
 
 log = logging.getLogger(__name__)
 
 
+DATETIME_FIELDS = frozenset({"created_at", "updated_at"})
+
+
+def transform_field(key: str, value: Any) -> Any:
+    """Apply field-name based type conversion for known API shapes."""
+    if key in DATETIME_FIELDS and isinstance(value, str):
+        return parse_datetime(value)
+    if key == "duration" and isinstance(value, (int, float)):
+        return timedelta(seconds=value)
+    if (key == "error" and value is None) or (key == "tag" and value is None):
+        return ""
+    if key == "mode" and isinstance(value, int):
+        return format(value, "o")
+    if key == "mtime" and isinstance(value, (int, float)):
+        return datetime.fromtimestamp(value, tz=timezone.utc)
+    return value
+
+
+class ListSorter:
+    """Reorder an API record dict for table output.
+
+    Drops nested values (dict/list), applies light typing to known fields
+    (timestamps, duration, mode, mtime, nullable error/tag), and yields
+    columns in a stable order: ``head`` first, then any new keys
+    discovered in record order (memoised across calls so the order stays
+    stable across rows), then ``tail`` last. Keys named in
+    ``head``/``tail`` that are absent from the record are skipped.
+    """
+
+    def __init__(
+        self,
+        *,
+        head: tuple[str, ...] = (),
+        tail: tuple[str, ...] = (),
+    ) -> None:
+        self.tail = tail
+        self.columns: list[str] = list(head)
+        self.seen: set[str] = set(head) | set(tail)
+
+    def order(self, fields: dict[str, Any]) -> OrderedDict[str, Any]:
+        for key, value in fields.items():
+            if key in self.seen or isinstance(value, (dict, list)):
+                continue
+            self.columns.append(key)
+            self.seen.add(key)
+
+        out: OrderedDict[str, Any] = OrderedDict()
+        for key in (*self.columns, *self.tail):
+            if key not in fields:
+                continue
+            value = fields[key]
+            if isinstance(value, (dict, list)):
+                continue
+            out[key] = transform_field(key, value)
+        return out
+
+
 @functools.singledispatch
 def _format_value(value: object) -> str:
     """Human-friendly string for a value."""
@@ -23,6 +83,9 @@ def _format_value(value: object) -> str:
 
 @_format_value.register
 def _(value: datetime) -> str:
+    # API returns UTC; render in the user's local timezone for readability.
+    if value.tzinfo is not None:
+        value = value.astimezone()
     return value.strftime("%Y-%m-%d %H:%M:%S")
 
 
@@ -107,69 +170,115 @@ def _fit_columns(
 
 
 class OutputFormatter:
-    """Base formatter - subclasses decide the serialisation style."""
+    """Base formatter - subclasses decide the serialisation style.
+
+    Maintains an internal :class:`ListSorter` that drops nested values,
+    applies light typing to known fields (timestamps, duration, mode,
+    mtime, nullable error/tag), and reorders columns according to
+    optional ``head``/``tail`` configured via :meth:`configure`.
+    """
 
     # Not suitable for streaming stdout/stderr output (e.g. from `run`)
     STREAM = False
 
+    def __init__(self) -> None:
+        self.sorter = ListSorter()
+
+    def configure(
+        self,
+        *,
+        head: tuple[str, ...] = (),
+        tail: tuple[str, ...] = (),
+    ) -> None:
+        """Configure column ordering for this formatter."""
+        self.sorter = ListSorter(head=head, tail=tail)
+
     def __call__(self, **kwargs: object) -> None:
+        self.write(self.sorter.order(kwargs))
+
+    def write(self, row: OrderedDict[str, Any]) -> None:
         raise NotImplementedError
 
     def flush(self) -> None:
         """Flush any buffered output. No-op for streaming formatters."""
 
+    def close(self) -> None:
+        """Finalise the output stream. Defaults to a final flush."""
+        self.flush()
+
 
 class CSVFormatter(OutputFormatter):
     def __init__(self) -> None:
+        super().__init__()
         self._header_written = False
 
-    def __call__(self, **kwargs: object) -> None:
+    def write(self, row: OrderedDict[str, Any]) -> None:
         buf = io.StringIO()
         writer = csv.writer(buf)
         if not self._header_written:
-            writer.writerow(kwargs.keys())
+            writer.writerow(row.keys())
             self._header_written = True
-        writer.writerow(_format_value(v) for v in kwargs.values())
+        writer.writerow(_format_value(v) for v in row.values())
         sys.stdout.write(buf.getvalue())
 
 
 class TSVFormatter(OutputFormatter):
     def __init__(self) -> None:
+        super().__init__()
         self._header_written = False
 
-    def __call__(self, **kwargs: object) -> None:
+    def write(self, row: OrderedDict[str, Any]) -> None:
         buf = io.StringIO()
         writer = csv.writer(buf, dialect="excel-tab")
         if not self._header_written:
-            writer.writerow(kwargs.keys())
+            writer.writerow(row.keys())
             self._header_written = True
-        writer.writerow(_format_value(v) for v in kwargs.values())
+        writer.writerow(_format_value(v) for v in row.values())
         sys.stdout.write(buf.getvalue())
 
 
 class JSONFormatter(OutputFormatter):
     STREAM = True
 
-    def __call__(self, **kwargs: object) -> None:
-        sys.stdout.write(json.dumps(kwargs, default=_json_default) + "\n")
+    def write(self, row: OrderedDict[str, Any]) -> None:
+        sys.stdout.write(json.dumps(row, default=_json_default) + "\n")
 
 
 class JSONPrettyFormatter(OutputFormatter):
     STREAM = True
 
     def __init__(self) -> None:
-        self._rows: list[dict[str, object]] = []
+        super().__init__()
+        self._rows: list[OrderedDict[str, Any]] = []
+        self._opened = False
+        self._first_row = True
+        self.flush_lock = threading.RLock()
 
-    def __call__(self, **kwargs: object) -> None:
-        self._rows.append(kwargs)
+    def write(self, row: OrderedDict[str, Any]) -> None:
+        self._rows.append(row)
 
     def flush(self) -> None:
-        if not self._rows:
-            return
-        sys.stdout.write(
-            json.dumps(self._rows, indent=2, default=_json_default) + "\n",
-        )
-        self._rows.clear()
+        with self.flush_lock:
+            if not self._rows:
+                return
+            if not self._opened:
+                sys.stdout.write("[\n")
+                self._opened = True
+            for row in self._rows:
+                prefix = "" if self._first_row else ",\n"
+                self._first_row = False
+                sys.stdout.write(
+                    prefix + json.dumps(row, indent=2, default=_json_default)
+                )
+            self._rows.clear()
+
+    def close(self) -> None:
+        self.flush()
+        with self.flush_lock:
+            if self._opened:
+                sys.stdout.write("\n]\n")
+                self._opened = False
+                self._first_row = True
 
 
 class TableFormatter(OutputFormatter):
@@ -198,84 +307,93 @@ class TableFormatter(OutputFormatter):
     )
 
     def __init__(self) -> None:
-        self._rows: list[dict[str, object]] = []
-
-    def __call__(self, **kwargs: object) -> None:
-        self._rows.append(kwargs)
+        super().__init__()
+        self._rows: list[OrderedDict[str, Any]] = []
+        # Layout decided on the first non-empty flush, reused on subsequent
+        # flushes so paginated output keeps the same column alignment.
+        self._columns: list[str] | None = None
+        self._widths: dict[str, int] | None = None
+        self._col_colors: dict[str, Colors] = {}
+        self.flush_lock = threading.RLock()
+
+    def write(self, row: OrderedDict[str, Any]) -> None:
+        self._rows.append(row)
 
     def flush(self) -> None:
-        if not self._rows:
-            return
-        columns = list(self._rows[0].keys())
-        widths = {col: len(col) for col in columns}
-        # Split each cell into lines and compute natural column widths.
-        split_rows: list[dict[str, list[str]]] = []
-        for row in self._rows:
-            split_row: dict[str, list[str]] = {}
-            for col in columns:
-                lines = _format_value(row.get(col, "")).split("\n")
-                split_row[col] = lines
-                for line in lines:
-                    widths[col] = max(widths[col], len(line))
-            split_rows.append(split_row)
-        # Constrain to terminal width when outputting to a TTY.
-        truncated = False
-        if STDOUT_IS_A_TTY:
-            term_width = shutil.get_terminal_size().columns
-            separator_space = (len(columns) - 1) * 2
-            available = term_width - separator_space
-            if sum(widths.values()) > available > 0:
-                widths, truncated = _fit_columns(
-                    widths,
-                    columns,
-                    available,
-                    self.MIN_COL_WIDTH,
-                )
-        # Assign a color per column (cycling through the palette).
-        palette = self.COLUMN_PALETTE
-        col_colors: dict[str, Colors] = {}
-        if STDOUT_IS_A_TTY:
-            for idx, col in enumerate(columns):
-                col_colors[col] = palette[idx % len(palette)]
-        # Render header (bold when TTY).
-        header_parts: list[str] = []
-        for col in columns:
-            padded = _truncate(
-                col.upper(),
-                widths[col],
-                self.ELLIPSIS,
-            ).ljust(widths[col])
-            if STDOUT_IS_A_TTY:
-                padded = Colors.BOLD(padded)
-            header_parts.append(padded)
-        sys.stdout.write("  ".join(header_parts) + "\n")
-        # Render rows.
-        for split_row in split_rows:
-            height = max(len(split_row[col]) for col in columns)
-            for i in range(height):
-                parts: list[str] = []
+        with self.flush_lock:
+            if not self._rows:
+                return
+            first_flush = self._columns is None
+            if first_flush:
+                columns = list(self._rows[0].keys())
+                widths = {col: len(col) for col in columns}
+                for row in self._rows:
+                    for col in columns:
+                        for line in _format_value(row.get(col, "")).split("\n"):
+                            widths[col] = max(widths[col], len(line))
+                truncated = False
+                if STDOUT_IS_A_TTY:
+                    term_width = shutil.get_terminal_size().columns
+                    separator_space = (len(columns) - 1) * 2
+                    available = term_width - separator_space
+                    if sum(widths.values()) > available > 0:
+                        widths, truncated = _fit_columns(
+                            widths,
+                            columns,
+                            available,
+                            self.MIN_COL_WIDTH,
+                        )
+                self._columns = columns
+                self._widths = widths
+                if STDOUT_IS_A_TTY:
+                    for idx, col in enumerate(columns):
+                        self._col_colors[col] = self.COLUMN_PALETTE[
+                            idx % len(self.COLUMN_PALETTE)
+                        ]
+                header_parts: list[str] = []
                 for col in columns:
-                    lines = split_row[col]
-                    cell = lines[i] if i < len(lines) else ""
                     padded = _truncate(
-                        cell,
+                        col.upper(),
                         widths[col],
                         self.ELLIPSIS,
                     ).ljust(widths[col])
-                    if col in col_colors:
-                        color = self.VALUE_COLORS.get(
-                            cell.strip(),
-                            col_colors[col],
-                        )
-                        padded = color(padded)
-                    parts.append(padded)
-                sys.stdout.write("  ".join(parts) + "\n")
-        if truncated:
-            log.warning(
-                "Output truncated to fit terminal;"
-                " use --format json to see full values",
-            )
-        self._rows.clear()
+                    if STDOUT_IS_A_TTY:
+                        padded = Colors.BOLD(padded)
+                    header_parts.append(padded)
+                sys.stdout.write("  ".join(header_parts) + "\n")
+                if truncated:
+                    log.warning(
+                        "Output truncated to fit terminal;"
+                        " use --format json to see full values",
+                    )
+            assert self._columns is not None
+            assert self._widths is not None
+            columns = self._columns
+            widths = self._widths
+            for row in self._rows:
+                split_row = {
+                    col: _format_value(row.get(col, "")).split("\n") for col in columns
+                }
+                height = max(len(split_row[col]) for col in columns)
+                for i in range(height):
+                    parts: list[str] = []
+                    for col in columns:
+                        lines = split_row[col]
+                        cell = lines[i] if i < len(lines) else ""
+                        padded = _truncate(
+                            cell,
+                            widths[col],
+                            self.ELLIPSIS,
+                        ).ljust(widths[col])
+                        if col in self._col_colors:
+                            color = self.VALUE_COLORS.get(
+                                cell.strip(),
+                                self._col_colors[col],
+                            )
+                            padded = color(padded)
+                        parts.append(padded)
+                    sys.stdout.write("  ".join(parts) + "\n")
+            self._rows.clear()
 
 
 class DefaultFormatter(TableFormatter):
@@ -286,14 +404,15 @@ class PlainFormatter(OutputFormatter):
     STREAM = True
 
     def __init__(self) -> None:
+        super().__init__()
         self._count = 0
 
-    def __call__(self, **kwargs: object) -> None:
+    def write(self, row: OrderedDict[str, Any]) -> None:
         if self._count:
             sys.stdout.write("---\n")
         self._count += 1
-        key_width = max(len(k) for k in kwargs) if kwargs else 0
-        for key, val in kwargs.items():
+        key_width = max(len(k) for k in row) if row else 0
+        for key, val in row.items():
             text = _format_value(val)
             label = f"{key}:".ljust(key_width + 2)
             lines = text.split("\n")
@@ -302,6 +421,9 @@ def __call__(self, **kwargs: object) -> None:
             for line in lines[1:]:
                 sys.stdout.write(f"{indent}{line}\n")
 
+    def flush(self) -> None:
+        sys.stdout.flush()
+
 
 FORMATTERS: dict[str, type[OutputFormatter]] = {
     "csv": CSVFormatter,
@@ -369,22 +491,25 @@ class TOMLFormatter(OutputFormatter):
         STREAM = True
 
         def __init__(self) -> None:
-            self._rows: list[dict[str, object]] = []
+            super().__init__()
+            self._rows: list[OrderedDict[str, Any]] = []
+            self.flush_lock = threading.RLock()
 
-        def __call__(self, **kwargs: object) -> None:
-            self._rows.append(kwargs)
+        def write(self, row: OrderedDict[str, Any]) -> None:
+            self._rows.append(row)
 
         def flush(self) -> None:
-            if not self._rows:
-                return
-            parts: list[str] = []
-            for row in self._rows:
-                parts.append("[[results]]")
-                for key, val in row.items():
-                    parts.append(f"{key} = {_toml_value(val)}")
-                parts.append("")
-            sys.stdout.write("\n".join(parts))
-            self._rows.clear()
+            with self.flush_lock:
+                if not self._rows:
+                    return
+                parts: list[str] = []
+                for row in self._rows:
+                    parts.append("[[results]]")
+                    for key, val in row.items():
+                        parts.append(f"{key} = {_toml_value(val)}")
+                    parts.append("")
+                sys.stdout.write("\n".join(parts))
+                self._rows.clear()
 
     FORMATTERS["toml"] = TOMLFormatter
 
diff --git a/contree_cli/refs.py b/contree_cli/refs.py
new file mode 100644
index 0000000..6dd592e
--- /dev/null
+++ b/contree_cli/refs.py
@@ -0,0 +1,157 @@
+"""Operation-reference parsing for CLI positional arguments.
+
+Every CLI command that accepts operation UUIDs also accepts session-
+history references in the same positional slot. The accepted forms
+are:
+
+- bare ``@``, ``:``, or ``HEAD``   -- the active branch tip.
+- ``@N``, ``:N`` or bare ``N``     -- absolute history id.
+- ``@-N``, ``:-N``, ``HEAD~N``     -- N steps back from the tip.
+- ``HEAD~``                        -- shorthand for ``HEAD~1``.
+- ``@+N``, ``:+N``                 -- N steps forward from the tip.
+
+This module centralises the parsing so any UUID-list-accepting
+command behaves identically. ``resolve_operation_uuids`` is the entry point
+used by ``from_args`` methods; ``resolve_operation_uuid`` is the
+single-token form used by ``cmd_show``.
+"""
+
+from __future__ import annotations
+
+import re
+from uuid import UUID
+
+from contree_cli import SESSION_STORE
+from contree_cli.session import SessionStore
+
+# All accepted history references (apart from the git-style ``HEAD~``/
+# ``HEAD~N`` shorthand handled separately below) fit this shape:
+# an optional ``HEAD``/``@``/``:`` prefix followed by an optional signed
+# integer. Examples that match: ``HEAD``, ``HEAD5``, ``HEAD+2``, ``@``,
+# ``@5``, ``@-1``, ``:``, ``:7``, ``:+3``, bare ``5``. The named groups
+# make the post-match logic obvious.
+HISTORY_REF_RE = re.compile(r"^(?P<prefix>HEAD|@|:)?(?P<sign>[+-])?(?P<value>\d+)?$")
+
+
+def history_spec_from_ref(raw: str) -> str | None:
+    """Translate a user-facing history reference into a SessionStore spec.
+
+    Returns ``None`` when ``raw`` does not look like a history reference
+    and should be passed through (e.g. a UUID).
+    """
+    # Git-style HEAD~ / HEAD~N shorthand is normalised to a back-step
+    # spec, so HEAD~ == HEAD~1 == HEAD-1 (= spec "-1").
+    if raw.startswith("HEAD~"):
+        suffix = raw[len("HEAD~") :]
+        if suffix == "":
+            return "-1"
+        if suffix.isdigit():
+            return f"-{suffix}"
+        return None
+
+    m = HISTORY_REF_RE.match(raw)
+    if m is None:
+        return None
+    prefix = m.group("prefix")
+    sign = m.group("sign")
+    value = m.group("value")
+
+    # Bare sign with no number ("+", "-") is not a reference; bare
+    # signed numbers ("-3", "+1") aren't either because argparse would
+    # treat them as options and they read like flags to humans. Bare
+    # unsigned numerics ("5") are allowed as a shorthand for absolute id.
+    if prefix is None:
+        if sign is not None or value is None:
+            return None
+        return value
+
+    # Prefix-only: HEAD, @, or :. Means "the tip".
+    if value is None:
+        if sign is not None:
+            return None
+        return ""
+
+    # Prefix + value (with optional sign). resolve_history_spec validates
+    # the numeric range (e.g. zero rejected), keeping the lexer pure.
+    return f"{sign or ''}{value}" if sign else value
+
+
+def looks_like_history_ref(value: str) -> bool:
+    """True for session-history references; see module docstring for forms."""
+    return history_spec_from_ref(value) is not None
+
+
+def resolve_operation_uuid(raw: str, store: SessionStore) -> str:
+    """Resolve a single token (UUID or history reference) to an operation UUID.
+
+    Returns ``raw`` unchanged when it does not look like a history
+    reference (so callers can pass real UUIDs through unaltered).
+    Raises :class:`ValueError` when the active session is missing, the
+    referenced history entry does not exist, or it has no operation
+    UUID attached.
+    """
+    spec = history_spec_from_ref(raw)
+    if spec is None:
+        return raw
+    session = store.session
+    if session is None:
+        raise ValueError(
+            "No active session; cannot resolve history entry. Run `contree use` first.",
+        )
+    entry = store.resolve_history_spec(spec)
+    if not entry.operation_uuid:
+        raise ValueError(f"History entry {entry.id} has no operation UUID")
+    return entry.operation_uuid
+
+
+def resolve_token(token: str, store: SessionStore) -> str:
+    """Resolve a single positional token to an operation UUID.
+
+    Returns the input unchanged when it is already a real UUID;
+    resolves history references against the active session and
+    returns the underlying operation UUID. Raises :class:`ValueError`
+    for malformed UUIDs and unresolvable references; the message is
+    informative for references (e.g. "History entry 99 not found")
+    and a generic ``UUID()`` parse error for literal tokens.
+    """
+    if history_spec_from_ref(token) is None:
+        UUID(token)
+        return token
+    resolved = resolve_operation_uuid(token, store)
+    UUID(resolved)
+    return resolved
+
+
+def resolve_operation_uuids(items: list[str]) -> list[str]:
+    """Flatten and resolve positional operation references.
+
+    Each ``item`` is split on whitespace, then each token is resolved
+    via :func:`resolve_token`. Tokens that fail to resolve are
+    collected and reported together via a single :class:`ValueError`,
+    so the user sees every bad token in one shot instead of
+    discovering them one at a time. History-reference errors keep
+    their context (e.g. "@99: History entry 99 not found"); literal
+    UUID parse failures are reported as just the token.
+
+    Splitting on whitespace handles the common case where an agent or
+    shell user passes multiple UUIDs as one quoted string (e.g.
+    ``op wait "$UUIDS"`` where ``$UUIDS`` is a multi-line value).
+    """
+    tokens = [t for item in items for t in item.split() if t]
+    if not tokens:
+        return []
+    store = SESSION_STORE.get()
+    out: list[str] = []
+    invalid: list[str] = []
+    for token in tokens:
+        try:
+            out.append(resolve_token(token, store))
+        except ValueError as exc:
+            if history_spec_from_ref(token) is not None:
+                invalid.append(f"{token}: {exc}")
+            else:
+                invalid.append(token)
+    if invalid:
+        plural = "s" if len(invalid) > 1 else ""
+        raise ValueError(f"Invalid operation reference{plural}: {' '.join(invalid)}")
+    return out
diff --git a/contree_cli/session.py b/contree_cli/session.py
index e052bda..f5cdbc2 100644
--- a/contree_cli/session.py
+++ b/contree_cli/session.py
@@ -578,6 +578,84 @@ def rollback(self, n: int = 1) -> HistoryEntry:
             raise ValueError("Rollback steps must be >= 1")
         return self.navigate(-n)
 
+    def tip_history_id(self) -> int:
+        cur = self._conn.execute(
+            """
+            SELECT b.history_id
+            FROM session_state s
+            JOIN session_branches b
+                ON b.session_key = s.session_key
+               AND b.branch_name = s.active_branch
+            WHERE s.session_key = ?
+            """,
+            (self._session_key,),
+        )
+        row = cur.fetchone()
+        if row is None:
+            raise ValueError("No active session")
+        return int(row["history_id"])
+
+    def resolve_history_spec(self, spec: str) -> HistoryEntry:
+        """Resolve a non-mutating history reference.
+
+        Accepted forms (mirrors ``session rollback`` semantics):
+
+        - ``""`` (empty)     -- the active branch tip (current entry).
+        - ``"N"`` (positive) -- absolute history id.
+        - ``"-N"``           -- walk N steps back from the active branch tip.
+        - ``"+N"``           -- walk N steps forward from the tip, picking the
+          latest child at each branch point.
+
+        ``"0"`` is rejected because it is ambiguous (matches both
+        "absolute 0" and "no movement"). Raises :class:`ValueError` if
+        the spec is malformed, the resulting entry does not exist in
+        this session, or the requested walk exceeds the available
+        ancestors/children.
+        """
+        if spec == "":
+            tip_id = self.tip_history_id()
+            return self._get_history_entry(tip_id)
+
+        if spec[:1] in ("+", "-"):
+            sign = spec[0]
+            digits = spec[1:]
+        else:
+            sign = ""
+            digits = spec
+        if not digits.isdigit():
+            raise ValueError(f"Invalid history reference: {spec!r}")
+        n = int(digits)
+        if n == 0:
+            raise ValueError("History reference must be a non-zero number")
+
+        if sign == "":
+            return self._get_history_entry(n)
+
+        current_id = self.tip_history_id()
+
+        if sign == "-":
+            for i in range(n):
+                entry = self._get_history_entry(current_id)
+                if entry.parent_id is None:
+                    raise ValueError(
+                        f"Cannot go back {n} steps: only {i} ancestors available"
+                    )
+                current_id = entry.parent_id
+        else:
+            for i in range(n):
+                child = self._conn.execute(
+                    "SELECT id FROM session_history "
+                    "WHERE parent_id = ? AND session_key = ? "
+                    "ORDER BY id DESC LIMIT 1",
+                    (current_id, self._session_key),
+                ).fetchone()
+                if child is None:
+                    raise ValueError(
+                        f"Cannot go forward {n} steps: only {i} children available"
+                    )
+                current_id = child["id"]
+        return self._get_history_entry(current_id)
+
     def create_branch(
         self,
         name: str,
diff --git a/contree_cli/shell/argmap.py b/contree_cli/shell/argmap.py
index 5c9b5cb..66abc11 100644
--- a/contree_cli/shell/argmap.py
+++ b/contree_cli/shell/argmap.py
@@ -24,8 +24,8 @@
     (("use",), "image"): "image",
     (("tag",), "args"): "image",
     # show / kill / wait -- operation UUIDs.
-    (("show",), "uuid"): "operation",
-    (("kill",), "uuid"): "operation",
+    (("show",), "uuids"): "operation",
+    (("kill",), "uuids"): "operation",
     (("session", "wait"), "op_ids"): "operation",
     (("operation", "show"), "uuids"): "operation",
     (("operation", "cancel"), "uuids"): "operation",
@@ -56,6 +56,9 @@
     (("run",), "cwd"): "sandbox-dir",
     (("run",), "file"): "mapped-file",
     (("run",), "use"): "image",
+    # build -- Dockerfile build context and file paths on the host.
+    (("build",), "context"): "host-path",
+    (("build",), "dockerfile"): "host-path",
     # env / skill.
     (("env",), "vars"): "env-key",
     (("skill", "install"), "specs"): "skill-spec",
diff --git a/contree_cli/skill.py b/contree_cli/skill.py
index 4fe7f31..3c7634f 100644
--- a/contree_cli/skill.py
+++ b/contree_cli/skill.py
@@ -117,7 +117,10 @@ def forget_installed(skill: Skill) -> None:
 1. Use `contree` from PATH — no bundled wrapper needed.
 2. Output formats (global `-f` flag BEFORE the subcommand):
    `-f json` (JSONL), `-f json-pretty`, `-f csv`, `-f tsv`,
-   `-f plain`, `-f table`, `-f toml`, `-f default`
+   `-f plain`, `-f table`, `-f default`, and `-f toml` on Python 3.11+.
+   The authoritative list is whatever `contree --help` shows on this
+   install — if `toml` is missing there, this Python lacks `tomllib`
+   and `-f toml` will fail with an argparse error.
 3. For the full built-in manual: `contree agent`\
 """
 
@@ -126,9 +129,20 @@ def forget_installed(skill: Skill) -> None:
 ## Quick reference
 
 ```bash
-contree -S <key> use tag:alpine:latest
-contree -S <key> run -s -- apt-get update
-contree -S <key> run -- make test
+# 1) Discover what images are available -- do NOT assume a tag exists.
+contree images --prefix ubuntu         # narrow listing (preferred)
+# Fallback for when you don't know the prefix shape -- much slower:
+# contree -f plain images | grep -i ubuntu
+
+# 2) Bootstrap a session against a tag actually present in the listing.
+contree -S <key> use <image-or-tag-from-list>
+
+# 3) Run plain executables in direct mode; reach for -s only for
+#    pipes / redirects / && / ; / variable expansion.
+contree -S <key> run -- true
+contree -S <key> run -- uname -a
+
+# 4) See the full manual / per-command help.
 contree agent                          # full manual
 contree <command> --help               # per-command help
 ```\
diff --git a/contree_cli/skill_body.md b/contree_cli/skill_body.md
index 2193657..3419793 100644
--- a/contree_cli/skill_body.md
+++ b/contree_cli/skill_body.md
@@ -2,491 +2,120 @@
 
 {intro}
 
-Use `contree` from PATH. If not found, ask the user to install it:
-`uv tool install contree-cli` or `pip install contree-cli` or `pipx install contree-cli`.
+Use `contree` from PATH. If it is missing, ask the user to install it: `uv tool install contree-cli`, `pipx install contree-cli`, or `pip install contree-cli`.
 
-## Sandbox requirements (Codex)
+## Codex Sandbox
 
-`contree` requires network access (API calls) and write access to its
-data directory (`~/.config/contree-cli`). In Codex, add to `~/.codex/config.toml`:
+`contree` needs network access and write access to its data directory: `$CONTREE_HOME`, or `$XDG_CONFIG_HOME/contree`, or `~/.config/contree`.
+
+For default Codex config:
 
 ```toml
 [sandbox_workspace_write]
 network_access = true
-writable_roots = ["~/.config/contree-cli"]
+writable_roots = ["~/.config/contree"]
 ```
 
-Without this, `contree` will fail with `sqlite3.OperationalError`.
-If sandbox cannot be configured, stop and ask the user.
+If the user overrides `CONTREE_HOME` or `XDG_CONFIG_HOME`, the writable root must point at the resolved ConTree data directory. Without this, the CLI can fail with `sqlite3.OperationalError`. If the sandbox cannot be configured, stop and ask the user.
 
-## Quick start
+## Required Workflow
 
 {first_step}
-4. If something fails or syntax is unclear, run `contree agent <topic>` BEFORE retrying.
-   Topics: sessions, images, files, execution, output, profiles, commands.
-5. Agents must never run `contree auth`. If auth is missing or invalid, stop and ask the user to run `contree auth`.
-5. Choose an explicit session key before anything else and pass it on every command with `-S`, for example `agent_<task>` or `agent_<task>_<subagent>`.
-6. BEFORE choosing an image, list what is available.
-   Projects can have thousands of images — always use `--prefix` to filter:
-   `contree images --prefix python`
-   `contree images --prefix compiler/ubuntu`
-   Without prefix, use `-f plain` and grep: `contree -f plain images | grep tag`
-   Do NOT assume `ubuntu:latest` or any other tag exists. Pick from the actual list.
-7. Bootstrap the session with:
-   `contree -S <key> use <image-or-tag>`
-   `contree -S <key> cd /root`
-8. Inspect first with read-only commands, then mutate in small rollbackable steps.
-9. After installing tools or setting up an environment, TAG the image for reuse.
-   Convention: `PURPOSE/OS:TAG` — designed for search with `--prefix`.
-   Examples:
-     `contree -S <key> tag compiler/ubuntu:gcc`      (build-essential)
-     `contree -S <key> tag compiler/ubuntu:go`       (golang)
-     `contree -S <key> tag compiler/alpine:rust`     (rustup + cargo)
-     `contree -S <key> tag python/ubuntu:3.12-ml`    (python + numpy + pandas)
-     `contree -S <key> tag node/alpine:20`           (node.js 20)
-   ALWAYS search before building a new environment:
-     `contree images --prefix compiler/`   find all compiler images
-     `contree images --prefix python/`     find python environments
-   If a matching image exists, use it instead of rebuilding:
-     `contree -S <key> use tag:compiler/ubuntu:gcc`
-10. If no suitable image exists, import from any Docker registry:
-   `contree images import ubuntu:noble`            (Docker Hub)
-   `contree images import --timeout 600 ubuntu:noble`
-   `contree images import python:3.12-slim`        (Docker Hub)
-   `contree images import golang:1.22-alpine`      (Docker Hub)
-   `contree images import ghcr.io/org/image:tag`   (GitHub Container Registry)
-   `contree images import registry.example.com/img:tag`  (private)
-   Import is async — the CLI polls until complete. Press Ctrl+C to cancel.
-   Use `--timeout <seconds>` to raise or lower the import operation timeout.
-   After import, the image is available as `tag:<name>`.
-   For private registries, use `--username` (password is prompted).
-   TIP: importing a ready-made image is faster than installing from scratch.
-   For example, `images import rust:1.79-slim` gives you a full Rust
-   toolchain in seconds, vs minutes of `curl rustup | sh`.
-11. To inspect whether saved auth profiles actually work, run:
-   `contree -f json auth ls`
-   Use `-O` / `--offline` to skip network probes.
-
-## Session bootstrap details
-
-- In normal CLI use, `contree use IMAGE` prints a `CONTREE_SESSION` export line. Humans often use:
-  `eval $(contree use tag:ubuntu:latest)`
-- Agents should prefer passing `-S <key>` on every command instead of depending on exported shell state.
-- `contree use --new IMAGE` creates a fresh session key. Use it when you explicitly want new state instead of resuming an old session.
-- `contree use` without an image is read-only and prints current session state.
-- Inside `contree shell`, no `eval` is needed because the shell manages the active session internally.
-
-## Memory loop
-
-Sessions are the agent memory model. Reuse them deliberately instead of creating fresh state by default.
-
-1. `contree session list --filter <hint>`
-2. `contree session show --session <name>`
-3. `contree -S <name> use <image-or-tag>`
-
-If nothing suitable exists, create a new explicit session key and keep using it throughout the task.
-Unsure about sessions? Run `contree session --help` or `contree agent sessions`
-
-## Core workflow
-
-1. Discover command shape before execution when unsure:
-   `contree --help`
-   `contree <command> --help`
-   `contree session --help`
-2. Bind the task to an image or tag:
-   `contree -S <key> use tag:ubuntu:latest`
-3. Set the session working directory early:
-   `contree -S <key> cd /root`
-4. Inspect current state with:
-   `images`, `ls`, `cat`, `ps`, `show`, `session`, `session show`
-5. Build environments in separate operations:
-   install -> verify -> build -> test
-6. Tag useful results immediately:
-   `contree tag <result-uuid> <tag>`
-7. Use `session branch`, `session checkout`, and `session rollback` around risky changes.
-
-## Non-negotiable rules
-
-- Always pass `-S/--session` on agent-driven commands. Do not rely on auto-generated sessions.
-- `contree run` is remote execution. Host files are not visible unless attached with `--file` or staged with `contree file cp`.
-- Every `run` spawns a NEW isolated microVM. There is no way to exec into a running instance, attach to a process, or connect to a server started in a previous run. If you need a server response, start the server AND make the request in the same run using `-s`.
-- Keep one mutating step per `contree run`.
-- Do not chain stateful steps with `&&`, long shell expressions, or pipelines when the result should remain rollbackable.
-- ALWAYS use `-s` (shell mode) when passing shell commands as strings. Do NOT wrap in `sh -lc '...'` or `sh -c '...'` manually:
-  WRONG: `contree run -- sh -lc 'apt-get update -qq'`
-  RIGHT: `contree run -s -- apt-get update -qq`
-  The `-s` flag joins all args and passes to `sh -c` automatically.
-  Quotes are only needed for shell metacharacters like `&&`, `|`, `$`:
-    `contree run -s -- apt-get install -y curl`  (no quotes needed)
-    `contree run -s -- 'echo $HOME && ls /'`     (quotes needed for && and $)
-  Use direct mode (no `-s`) for simple executables: `contree run -- make test`
-  Unsure? Run `contree run --help` or `contree agent execution`
-- Prefer non-disposable runs when you want the environment to persist; use `--disposable` only for throwaway checks.
-- Prefer `--file` over `file cp` when you need files for a single run.
-  `file cp` stages files in the session for ALL future runs.
-  `--file` attaches files to just one run — cleaner and more explicit:
-  RIGHT: `contree run --file ./src:/app/src -- make -C /app/src`
-  AVOID: `contree file cp ./src /app/src` then `contree run -- make -C /app/src`
-  Use `file cp` only when you need files to persist across multiple runs without re-attaching.
-  Unsure? Run `contree file --help` or `contree agent files`
-- For detached (background) runs use `-d`/`--detach`:
-  `contree run -d -- long-running-server`
-  Then check: `contree ps`, `contree show UUID`
-- Use `contree cd /path` or `contree run -C /path` to set the working directory.
-  Do NOT use `cd` inside `-s` shell expressions — it does not persist and
-  clutters the command:
-  RIGHT: `contree cd /root/project` then `contree run -- make test`
-  RIGHT: `contree run -C /root/project -- make test`   (per-run override)
-  WRONG: `contree run -s -- 'cd /root/project && make test'`
-- Prefer absolute paths for sandbox workdirs and destination paths.
-- Search for reusable images before rebuilding: `contree images --prefix <prefix>`.
-- For common tools (rust, go, node, python, gcc, etc.) PREFER importing a ready-made
-  Docker image over manual installation. It is faster and more reliable:
-  Timeout values are in seconds when you use `--timeout`.
-  `contree images import rust:1-slim`
-  `contree images import --timeout 600 rust:1-slim`
-  `contree images import golang:1.22-alpine`
-  `contree images import node:20-slim`
-  Only install manually when you need a custom combination not available as a single image.
-- ALWAYS tag images after installing tools or setting up environments. Without tags, useful images are lost — they can only be found by UUID. Tags make images discoverable by future sessions and other agents.
-  Unsure about tagging? Run `contree tag --help` or `contree agent images`
-- Stay inside `contree ...` when the task specifically wants sandboxed execution rather than host-local commands.
-- If auth is missing, the CLI raises an API error that effectively means "No token configured. Run `contree auth` first." Treat that as a user action item, not something the agent should self-fix.
-- `contree auth profiles` is the default profile health check and shows `status` values `ok`, `timeout`, `error`, or `offline`.
-- `contree auth profiles --offline` is only for explicit no-network situations.
-- For automation, prefer `contree -f json auth profiles` over table output.
-
-## Command map
-
-- `use`: bind the session to an image or reusable tag.
-- `run`: execute a command in the current session image.
-- `build`: interpret a `Dockerfile` and produce a tagged image, reusing
-  cached layers per context directory. Prefer this over hand-running
-  each Dockerfile step when one already exists.
-- `ls` / `cat`: inspect files from the image without spawning a VM.
-- `cp`: download a file from the image to the host.
-- `file edit`: open a remote file in a host editor and stage it for the next run.
-- `file cp`: upload a local file and stage it for the next run.
-- `file ls`: list uploaded files; rows produced from this host carry a
-  `source` field (host path for `run --file` / `COPY`, URL for
-  `ADD URL`). Add `-q` for a tight `uuid sha256 source` view.
-
-  **`source` is THIS-MACHINE ONLY.** The mapping lives in the local
-  CLI SQLite cache (`$CONTREE_HOME/cli/sessions/<profile>.db`) keyed
-  by `path + inode + mtime + size` for host paths and by the URL
-  itself for URL fetches. It is not synced anywhere. Rows uploaded
-  from a different machine, by another teammate, or before tracking
-  landed will show an empty `source` -- that is expected, not a bug.
-  When working across hosts, treat the remote `uuid`/`sha256` as the
-  authoritative identifier and never rely on `source` resolving.
-- `session branch`: create an experimental branch.
-- `session checkout`: switch active branch.
-- `session rollback`: move the active branch pointer backward.
-- `session wait`: wait for active operations, or specific operation UUIDs.
-- `ps` / `show` / `kill`: inspect, read, or cancel a single operation.
-- `operation` (alias `op`): grouped namespace for the same actions plus
-  multi-UUID variants. Use this when monitoring background work.
-  - `op ls` -- same flags as `ps`, lists operations. Pipe to `-q` for UUIDs.
-  - `op show UUID1 UUID2 ...` -- fetch several operation results in one call.
-  - `op cancel UUID1 UUID2 ...` -- cancel several operations, or `--all`
-    to cancel every active one.
-
-## Execution patterns
-
-Good:
-
-```bash
-contree -S agent_build use tag:ubuntu:latest
-contree -S agent_build cd /root
-contree -S agent_build run -s -- apt-get update -qq
-contree -S agent_build run -s -- apt-get install -y build-essential
-contree -S agent_build run -- make -C /work build
-contree -S agent_build run -- make -C /work test
-```
-
-Note: use `-s` for shell commands (apt-get, pip, etc.) and direct mode
-for simple executables (make, cargo, python).
-
-Bad — chaining multiple steps:
-
-```bash
-contree -S agent_build run -s -- 'apt-get update && apt-get install -y build-essential && make test'
-```
-
-Why: a chained run collapses several mutable steps into one history entry, which weakens rollback, branching, and reuse.
-
-## Environment variables and PATH
-
-After installing tools that place binaries outside the default PATH
-(rustup, nvm, pyenv, etc.), you need to set env vars and persist them
-into the image so subsequent runs see them.
-
-Env vars are NEVER preserved in the image automatically. You must
-explicitly pass `--preserve-env` to save them into the resulting image.
-
-**`contree env`** sets session-level vars that the CLI sends on every run.
-Without `--preserve-env` they are injected per-run but not baked into images:
-
-```bash
-contree -S agent_build env PATH=/root/.cargo/bin:/usr/local/bin:/usr/bin:/bin:/sbin
-contree -S agent_build run -- cargo build              # PATH injected but NOT saved
-contree -S agent_build run --preserve-env -- cargo build  # PATH saved into image
-contree -S agent_build run -- cargo test                  # PATH still there from image
-```
-
-**`-e` + `--preserve-env`** — same idea for one-off vars:
-
-```bash
-contree run --preserve-env -e PATH="/root/.cargo/bin:/usr/bin:/bin" -- cargo build
-contree run -- cargo test   # PATH persists from preserved image
-```
-
-**`-e` without `--preserve-env`** is ephemeral — gone on next run:
-
-```bash
-contree run -e DEBUG=1 -- ./app
-```
-
-Do not use absolute binary paths — they are brittle and do not propagate
-to child processes. Use `env` or `-e` to set PATH instead.
+4. If syntax or behavior is unclear, consult the built-in manual before retrying: `contree agent <topic>` or `contree <command> --help`. Useful topics: `sessions`, `images`, `files`, `execution`, `output`, `profiles`, `command_safety`, `all_commands`, `all`.
+5. Do not run bare or mutating auth commands. Agents may run read-only `contree -f json auth ls` / `auth profiles`; if auth is missing or invalid, ask the user to run `contree auth`.
+6. Choose one explicit session key, then pass `-S <key>` on every current-session command: `use`, `run`, `cd`, `env`, `ls`, `cat`, `cp`, `file`, implicit-current-image `tag`, and current-session `session show/branch/checkout/rollback/wait`.
+7. Before `use`, list available images with a prefix. Do not assume a tag exists: `contree images --prefix python`, `contree images --prefix ubuntu`, `contree images --prefix compiler/`. An empty result just means that prefix has no tags in this project — broaden or vary the prefix (`python` vs `python-`, `compiler/` vs `compiler/python/`) before importing or rebuilding.
+8. Bootstrap: `contree -S <key> use <tag-or-image-from-list>` then `contree -S <key> cd /root`.
+9. Inspect first with `ls`, `cat`, `session show`, `ps`/`op ls`, or `op show`. Mutate in small rollbackable steps.
+10. After installing tools or setting up an environment, tag the result: `contree -S <key> tag <purpose/base:tag>`.
 
-## `run` modes
+Project-scoped or explicit-target commands usually do not need `-S`: `images`, `auth ls/profiles`, `op ls/show/wait/cancel`, `skill`, `agent`, `build`, `session list`, `session show NAME`, and help.
 
-`contree run` has four practical modes:
+## Running Commands
 
-- **Direct command** (default) — each arg is a separate argv entry:
-  `contree run -- uname -a`
+- `contree run` executes remotely. Host files are invisible unless attached with `--file` or staged with `contree file cp`.
+- Every `run` starts a fresh microVM. You cannot exec into a previous run or connect to a server started in a previous run. Start the server and client in the same `run -s` command when needed.
+- Prefer direct mode for plain executables: `contree -S <key> run -- make test`.
+- Use shell mode only for shell features such as pipes, redirects, `&&`, `;`, or variable expansion: `contree -S <key> run -s -- 'echo $HOME && ls /'`. Do not wrap `-s` commands in your own `sh -c`.
+- Use `run -- sh -lc '...'` only when a login shell is explicitly required. Prefer `env`, `-e`, `cd`, and `-C`.
+- Keep one mutating step per non-disposable run. Avoid chaining setup, build, and test into one history entry.
+- Use `contree -S <key> cd /path` or `run -C /path`; do not put `cd` inside shell expressions just to set the workdir.
+- Use `--disposable` only for throwaway checks. Non-disposable runs persist the resulting image in session history.
 
-- **Shell mode** (`-s`) — joins args into a single `sh -c` expression.
-  Use when you need pipes, redirects, `&&`, or variable expansion:
-  `contree run -s -- 'echo $HOME && ls /'`
-  `contree run -s -- 'cat /etc/passwd | grep root'`
+## Files
 
-- **Interpreter mode** (`-I`) — runs a local script file in the sandbox:
-  `contree run -I ./script.sh`
+- One-off attachment: `contree -S <key> run --file ./src:/work/src -- make -C /work/src`.
+- Stage for future runs: `contree -S <key> file cp ./config.yaml /etc/app/config.yaml`.
+- Edit a remote file and stage it: `contree -S <key> file edit /etc/app/config.ini`.
 
-- **Piped stdin** — non-TTY stdin is forwarded to the sandbox:
-  `echo 'uname -a' | contree run /bin/sh`
+Prefer `--file` for files needed by one command. Use `file cp` only when the staged file should be injected into multiple future runs. Pending files are included in the next run, including disposable runs; they are cleared only after a successful non-disposable run commits them into the next image. Explicit `--file` mappings win over pending files at the same destination.
 
-When to use shell mode vs direct:
-- Direct: `run -- make test` — clearer, no shell escaping issues
-- Shell: `run -s -- 'cd /app && make test'` — when you need shell features
-- Prefer `sh -lc` in direct mode for login shell: `run -- sh -lc 'command'`
+Directory attachments recurse and exclude common junk such as `.git`, hidden files, `__pycache__`, `.venv`, `node_modules`, `dist`, and `build`. Add patterns with `--file-excludes`.
 
-## Interactive shell behavior
+## Sessions And Rollback
 
-Inside `contree shell`:
+- Reuse sessions deliberately: `contree session list --filter <hint>`, then `contree -S <key> session show`.
+- Branch before risky work: `contree -S <key> session branch experiment` then `contree -S <key> session checkout experiment`.
+- `session rollback N` with positive `N` is an absolute history id, not "back N steps". Use: `session rollback` for one step back, `session rollback -- -3` for three steps back, `session rollback +1` for one step forward. Inspect with `session show` before rollback.
 
-- Bare commands are implicit sandbox `run` commands.
-- Bare `ls` and `cat` are special: they map to fast API inspection commands instead of spawning a VM.
-- If pending files exist, or if `ls`/`cat` arguments use flags or globs, the shell falls back to running them inside the sandbox.
-- Bare editor names like `vim`, `vi`, `nvim`, and `nano` map to `contree file edit`.
-- Flags like `-D`, `-e`, `-d`, or `--file` require the explicit `contree run` prefix.
+## Detached Work
 
-This means shell transcripts are convenient, but agent instructions should still be precise about whether a command is expected to use API inspection or remote execution.
+- Start detached: `contree -S <key> -f json run -d --disposable -- pytest tests/a`.
+- Capture UUIDs with global `-f json` before `run`; default detached output is not reliable for `jq`.
+- `op wait UUID...` is a pure observer. It polls, prints one row per completion with the server-reported `status` (`SUCCESS`/`FAILED`/`CANCELLED`) and a separate `exit_code` column for the sandbox process. It does not advance session state. The CLI's own exit code is 1 when any op finished non-`SUCCESS`, or the sandbox `exit_code` when a `SUCCESS` op exited non-zero, so `op wait && next` still composes naturally.
+- `op wait --all` is project-wide. Prefer explicit UUIDs when multiple agents or shells may share the project.
+- `session wait` with no UUIDs drains detached operations spawned from this session's local cache. Successful non-disposable runs advance the active branch; disposable runs are recorded as disposable branches.
+- `session wait UUID...` is only a polling form in current CLI behavior; it does not load pending metadata and does not advance the branch. For explicit UUID workflows, extract the result image from `op show` or wait output (`.image` / `result_image_uuid`) and then `contree -S <key> use "$IMG"` or tag it.
 
-## Files and staged changes
+## Images, Imports, And Build
 
-- Inline injection: `contree run --file ./app.py:/app/app.py -- python /app/app.py`
-- Stage for next run: `contree file cp ./config.yaml /etc/app/config.yaml`
-- Edit an existing remote file: `contree file edit /etc/app/config.ini`
-- Pending files are merged automatically into the next non-disposable `run`.
-- Explicit `--file` mappings win over pending files on the same destination path.
-- Directory attachments recurse and exclude common junk by default: `.*`, `.git`, `*.pyc`, `__pycache__`, `.venv`, `.mypy_cache`, `.pytest_cache`, `node_modules`, `dist`, `build`.
-- Add more directory exclusion patterns with `--file-excludes`.
-- The CLI keeps a local upload cache keyed by path, inode, mtime, and size, so repeated attachments often avoid re-uploading.
+- Search before rebuilding: `contree images --prefix <prefix>`.
+- Prefer importing ready-made registry images for common toolchains: `contree images import rust:1-slim`, `contree images import node:20-slim`, `contree images import golang:1.22-alpine`. Use `--timeout <seconds>` for long imports.
+- Private registries use `--username`; password is prompted.
+- If the repo has a Dockerfile, prefer `contree build` over replaying each step by hand. `build` owns its own `build:<hash>` session; `-S` is harmless but does not bind it to your agent session. Verify from a normal session after the build.
+- Toolchain images often install binaries outside the default `PATH` (e.g. `golang:1.22-alpine` puts `go` at `/usr/local/go/bin/go`). After `use tag:<toolchain>`, probe `PATH` with `contree -S <key> run -- printenv PATH` and either set it for the session (`contree -S <key> env PATH=/usr/local/go/bin:/usr/local/bin:/usr/bin:/bin`) or per-run (`run -e PATH=...`). Pair with `--preserve-env` if you want the change baked into the image.
 
-Use staged files when several edits should land together on the next run. Use `--file` when the file is only needed for a single execution.
+## Output And Automation
 
-## Sessions, branching, and rollback
+- Global flags go before the subcommand: `contree -f json images --prefix python`.
+- Prefer structured output in automation: `json`, `json-pretty`, `csv`, or `tsv`. `toml` is available only on Python 3.11+.
+- `json` is line-delimited for streaming and multi-row commands.
+- Default `run` output prints raw stdout/stderr, not a structured row.
+- `cat` and `cp` are content-oriented; do not parse them as table/json listings.
 
-- Sessions are durable and backed by local SQLite state.
-- Use the same session key to resume task memory later.
-- Create branches before risky work:
-  `contree -S <key> session branch experiment`
-  `contree -S <key> session checkout experiment`
-- Roll back small units:
-  `contree -S <key> session rollback 1`
-- `session rollback` supports absolute IDs and relative navigation; inspect with `session show` before destructive movement.
-- Use `contree session show` to inspect the history DAG.
-- `session show` defaults to the last 20 entries unless asked for the full history.
+## Operation References
 
-## Detached operations
+Anywhere `--help` shows a positional named `UUID_OR_REF` (`op show`, `op cancel`, `op wait`, top-level `show`/`kill`, `session wait`) the CLI accepts both real operation UUIDs and current-session history refs. Refs require `-S <key>` and a history entry whose `operation_uuid` is set (`use` entries have none — error is "has no operation UUID"). Accepted forms:
 
-Use detached runs whenever a step is slow (large image imports, builds,
-test suites). The CLI returns immediately with an operation UUID;
-monitoring is then a polling problem rather than a blocking one.
+| Form | Meaning |
+|---|---|
+| `@`, `:`, `HEAD` | active branch tip |
+| `@N`, `:N`, bare `N` | absolute history id `N` |
+| `@-N`, `:-N`, `HEAD~N` | `N` steps back from the tip |
+| `HEAD~` | shorthand for `HEAD~1` |
+| `@+N`, `:+N` | `N` steps forward (latest child) |
 
-- Start long work detached: `contree -S <key> run -d -- long-job`
-- Fan out several jobs in parallel: each `run -d` returns its own UUID.
+When unsure, use `session show` to find the absolute id and pass that.
 
-Monitoring background operations:
+`contree op show --raw UUID_OR_REF...` (also `contree show --raw ...`) prints each operation's full server payload as JSONL — one compact JSON object per line, no derived columns, no stdout/stderr decoding. Use it when the flat row hides what you need (`metadata`, `resources`, raw `result.state`, …) or pipe it into `jq -c`.
 
-- `contree ps` -- active operations (PENDING, ASSIGNED, EXECUTING).
-- `contree ps -a` -- include completed/failed/cancelled.
-- `contree ps -q` -- UUIDs only, pipe-friendly.
-- `contree op ls` -- alias for `ps`, identical flags.
-- `contree show UUID` -- single-operation detail (status, duration,
-  exit code, stdout/stderr, resulting image).
-- `contree op show UUID1 UUID2 UUID3` -- fetch several operations in
-  one shot. Convenient when fanning out runs and checking the batch.
-- `contree session wait` -- block until all active ops of the current
-  session reach terminal state.
-- `contree session wait UUID1 UUID2` -- block on specific UUIDs.
-
-Cancelling:
-
-- `contree kill UUID` -- single operation.
-- `contree op cancel UUID1 UUID2` -- batch of UUIDs.
-- `contree op cancel --all` -- every active operation (use sparingly).
-
-Common patterns:
-
-```bash
-# Fan out: start three builds, wait for all, inspect each
-A=$(contree run -d -- make -C /work/a build | jq -r .uuid)
-B=$(contree run -d -- make -C /work/b build | jq -r .uuid)
-C=$(contree run -d -- make -C /work/c build | jq -r .uuid)
-contree session wait "$A" "$B" "$C"
-contree op show "$A" "$B" "$C"
-
-# Snapshot what is running right now
-contree -f json op ls | jq '.uuid'
-
-# Find recent failures across the project
-contree -f json ps -a -S FAILED --since=1h
-```
-
-## Output and automation
-
-- Prefer structured output in automation with `-f json`, `-f json-pretty`, `-f csv`, or `-f tsv`.
-- `contree run` propagates the sandbox exit code, so it works naturally in scripts.
-- For executable host scripts that should run inside the sandbox, prefer `contree run -I`.
-- If the environment might drop session-related env vars, keep `-S <key>` on every command instead of relying on exported state.
-- Global flags like `-f json` must go before the subcommand.
-- `run` with the default formatter prints raw stdout/stderr, not structured rows.
-- `cat` and `cp` are content-oriented commands; do not assume they will emit table/json-style records like listing commands do.
-
-## Using contree in subagents
-
-This skill teaches how to wire subagents correctly. It does NOT grant
-permission to spawn them — that requires explicit user authorization
-or a top-level agent policy.
-
-### Wiring a subagent for contree
-
-Subagents do NOT inherit skills automatically. You MUST either:
-
-1. Preload the skill in subagent frontmatter:
-   ```yaml
-   ---
-   name: build-agent
-   tools: Bash, Read, Grep
-   skills:
-     - contree
-   ---
-   ```
-
-2. Or restate the critical rules directly in the subagent prompt:
-   - Always use `-S <key>` on every command
-   - Use `contree agent` for the full built-in manual
-   - Bash must be in the subagent's allowed tools
-
-The subagent's `allowed-tools` MUST include `Bash` — without it,
-contree cannot execute.
-
-### Session isolation (mandatory)
-
-Every subagent MUST use its own unique session key. Sharing sessions
-between parallel subagents corrupts image state.
-
-Convention: `agent_<task>_<concern>`, e.g.:
-- `agent_build_go`, `agent_build_rust`, `agent_build_nim`
-- `agent_solve_approach1`, `agent_solve_approach2`
-
-### Parallel execution pattern
-
-When a task has multiple independent concerns (languages, approaches,
-experiments), launch one subagent per concern with isolated sessions:
-
-1. **One concern per subagent** — one language, one approach, one experiment.
-2. **Search for existing images first** — `contree images --prefix compiler/`.
-   Do NOT assume any tag exists. Pick from the actual list.
-3. **Use `--file` to inject local source** into the sandbox:
-   `contree -S agent_task_go run --file ./src:/work/src -- go build /work/src/...`
-4. **Use `contree cp` to retrieve outputs** back to the host:
-   `contree -S agent_task_go cp /work/output ./results/go/`
-5. **Verify after every run** — check with `contree ls` or content inspection
-   that the expected output actually exists before proceeding.
-6. **Save deterministic output paths** so the parent agent can collect results.
-
-Example — build & test in three languages simultaneously:
-
-```bash
-# Subagent 1 (Go):
-contree -S agent_task_go use tag:compiler/ubuntu:go
-contree -S agent_task_go cd /work
-contree -S agent_task_go run --file ./project:/work/project -- go build ./project/...
-contree -S agent_task_go run -- go test ./project/...
-contree -S agent_task_go cp /work/project/output ./results/go/
-
-# Subagent 2 (Rust):
-contree -S agent_task_rust use tag:compiler/ubuntu:rust
-contree -S agent_task_rust cd /work
-contree -S agent_task_rust run --file ./project:/work/project -- cargo build --manifest-path /work/project/Cargo.toml
-contree -S agent_task_rust run -- cargo test --manifest-path /work/project/Cargo.toml
-contree -S agent_task_rust cp /work/project/target ./results/rust/
-
-# Subagent 3 (Nim):
-contree -S agent_task_nim use tag:compiler/ubuntu:nim
-contree -S agent_task_nim cd /work
-contree -S agent_task_nim run --file ./project:/work/project -- nim compile /work/project/main.nim
-contree -S agent_task_nim cp /work/project/main ./results/nim/
-```
+## Subagents
 
-Each subagent works in complete isolation. The parent agent collects
-`./results/<lang>/` after all subagents finish.
+This skill does not grant permission to spawn subagents. If the host allows subagents and the user/policy authorizes them, give every subagent a unique `-S` key and restate the critical ConTree rules in the subagent prompt. Never share a session across parallel subagents.
 
-## Building from a Dockerfile
+## Fallback
 
-When a repo already has a `Dockerfile`, do not reproduce each step by
-hand. Run `contree build` instead:
+Use the built-in manual instead of carrying all reference material in this skill:
 
 ```bash
-contree build . --tag myapp:dev
-contree build ./app --dockerfile ./app/Dockerfile.prod --tag svc:prod
-contree build . --build-arg VERSION=1.2
-contree build . --no-cache
+contree agent
+contree agent sessions
+contree agent images
+contree agent files
+contree agent execution
+contree agent output
+contree agent profiles
+contree <command> --help
 ```
 
-- Cache is keyed by `abspath(CONTEXT)`. Same context + same Dockerfile
-  + same build args = full layer cache hit on re-runs.
-- Supported directives: `FROM`, `RUN`, `COPY`, `ADD` (local paths
-  only), `WORKDIR`, `ENV`, `ARG`, `USER`. `CMD`/`ENTRYPOINT`/`LABEL`
-  /`EXPOSE`/`VOLUME`/`STOPSIGNAL`/`MAINTAINER`/`HEALTHCHECK`/`ONBUILD`
-  /`SHELL` are parsed but skipped with a warning.
-- Multi-stage (`FROM ... AS x`, `COPY --from=x`) is not yet supported;
-  use a single linear pipeline for now.
-- `<CONTEXT>/.dockerignore` filters `COPY`/`ADD` walks. Globs `*` /
-  `**` / `?` / `[abc]` work; trailing `/` matches a directory and
-  everything below it; lines starting with `!` re-include.
-- Tag the resulting image with `--tag NAME[:TAG]` to make it
-  reusable.
-
-Use `contree build --help` for the full flag list.
-
-## Built-in manual
-
-If something doesn't work or you need more details on a specific topic,
-consult the built-in manual:
-
-  `contree agent`                full manual
-  `contree agent sessions`       session management details
-  `contree agent files`          file attachment syntax and caching
-  `contree agent images`         tagging, importing, conventions
-  `contree agent execution`      run modes, shebang, detach
-  `contree agent output`         JSON/CSV output and jq examples
-  `contree agent profiles`       multi-project setup
-
-Each topic is self-contained with examples and edge cases.
 {fallback}{references}
diff --git a/contree_cli/types.py b/contree_cli/types.py
index c98a4ff..21f940e 100644
--- a/contree_cli/types.py
+++ b/contree_cli/types.py
@@ -42,6 +42,7 @@
         "profile": ("-p", "--profile"),
         "offline": ("-O", "--offline"),
         "status": ("--status",),
+        "raw": ("--raw",),
         # run
         "cwd": ("-C", "--cwd"),
         "detach": ("-d", "--detach", "--no-wait"),
diff --git a/docs/commands/agent.md b/docs/commands/agent.md
index c9087c6..c56f145 100644
--- a/docs/commands/agent.md
+++ b/docs/commands/agent.md
@@ -1,4 +1,4 @@
-# agent
+# agent - Coding-agent manual
 
 Display the coding-agent manual — a guide for AI agents and automated
 workflows using contree-cli.
diff --git a/docs/commands/auth.md b/docs/commands/auth.md
index 4ef71af..ee3d539 100644
--- a/docs/commands/auth.md
+++ b/docs/commands/auth.md
@@ -1,4 +1,4 @@
-# auth
+# auth - Configure credentials and profiles
 
 Configure authentication tokens and manage profiles. Each profile stores
 credentials for a different project or environment.
diff --git a/docs/commands/build.md b/docs/commands/build.md
index 8c4b85a..d63820b 100644
--- a/docs/commands/build.md
+++ b/docs/commands/build.md
@@ -1,5 +1,5 @@
 % build command reference for the Docker-style Dockerfile interpreter
-# build
+# build - Build an image from a Dockerfile
 
 Build an image from a `Dockerfile`. Each directive runs against the
 contree API and produces a new image layer; successful layers are
@@ -79,6 +79,19 @@ contree session list --filter build:
 contree session show
 ```
 
+:::{note}
+`build` is **project-scoped from the user's point of view**: it does
+not bind to the agent's `-S <key>` session. Passing `-S` is harmless
+but does not move that key's image. After a successful build, attach
+the result to your normal agent session by tag:
+
+```bash
+contree build . --tag myapp:dev
+contree -S agent_verify use tag:myapp:dev
+contree -S agent_verify run -D -- myapp --version
+```
+:::
+
 ## `.dockerignore`
 
 `contree build` reads `<CONTEXT>/.dockerignore` and filters every
diff --git a/docs/commands/cat.md b/docs/commands/cat.md
index 9440ad5..5aab885 100644
--- a/docs/commands/cat.md
+++ b/docs/commands/cat.md
@@ -1,4 +1,4 @@
-# cat
+# cat - Show file content from the image
 
 Display the contents of a file from the session image.
 
diff --git a/docs/commands/cd.md b/docs/commands/cd.md
index 0f7cbe2..fe66be4 100644
--- a/docs/commands/cd.md
+++ b/docs/commands/cd.md
@@ -1,4 +1,4 @@
-# cd
+# cd - Change session working directory
 
 Change the working directory for subsequent commands in the current session.
 
diff --git a/docs/commands/cp.md b/docs/commands/cp.md
index 49508dc..e02257e 100644
--- a/docs/commands/cp.md
+++ b/docs/commands/cp.md
@@ -1,4 +1,4 @@
-# cp
+# cp - Download a file from the image
 
 Download a file from the session image to a local path.
 
diff --git a/docs/commands/env.md b/docs/commands/env.md
index f178816..b841ce4 100644
--- a/docs/commands/env.md
+++ b/docs/commands/env.md
@@ -1,4 +1,4 @@
-# env
+# env - Manage session environment variables
 
 Manage session-level environment variables. Variables set with `env` are
 applied to every `contree run` automatically. Per-run `-e` flags override
@@ -17,8 +17,8 @@ contree env DEBUG=1 DB_HOST=localhost
 contree env
 
 # Unset variables
-contree env -d PATH
-contree env -d DEBUG DB_HOST
+contree env -U PATH
+contree env -U DEBUG DB_HOST
 
 # Per-run -e overrides session env
 contree run -e DEBUG=0 -- ./app
diff --git a/docs/commands/file.md b/docs/commands/file.md
index a8e4f48..0b63a88 100644
--- a/docs/commands/file.md
+++ b/docs/commands/file.md
@@ -1,4 +1,4 @@
-# file
+# file - Stage file edits for the next run
 
 Stage file changes for the next `contree run`. Pending files are
 automatically included without needing `--file` flags.
diff --git a/docs/commands/images.md b/docs/commands/images.md
index b251167..b78c882 100644
--- a/docs/commands/images.md
+++ b/docs/commands/images.md
@@ -1,4 +1,4 @@
-# images
+# images - List and import images
 
 List images in the project. Images are the filesystem snapshots that sandboxes
 run from -- every non-disposable `contree run` produces a new one.
diff --git a/docs/commands/index.md b/docs/commands/index.md
index 474312b..4d92285 100644
--- a/docs/commands/index.md
+++ b/docs/commands/index.md
@@ -7,15 +7,16 @@
 :maxdepth: 1
 :caption: Commands
 
+auth
 use
 run
 build
 images
 tag
+operation
 ps
-kill
 show
-operation
+kill
 ls
 cat
 cp
@@ -24,7 +25,6 @@ env
 file
 session
 shell
-auth
 skill
 agent
 ```
diff --git a/docs/commands/kill.md b/docs/commands/kill.md
index 78e7315..e466b35 100644
--- a/docs/commands/kill.md
+++ b/docs/commands/kill.md
@@ -1,6 +1,15 @@
-# kill
+# kill - Cancel operations
 
-Cancel a running operation. Only active operations (PENDING, ASSIGNED,
+:::{note}
+**`contree kill` is a top-level shortcut for {doc}`operation cancel <operation>` (`contree op cancel`).**
+
+Both share one argparse setup and one handler. The top-level `kill`
+accepts the same positional UUIDs and `--all` flag as `op cancel`,
+including multiple UUIDs in a single invocation. See the
+{doc}`operation` page for the full description.
+:::
+
+Cancel running operations. Only active operations (PENDING, ASSIGNED,
 EXECUTING) can be cancelled.
 
 ## Examples
@@ -9,6 +18,9 @@ EXECUTING) can be cancelled.
 # Cancel a specific operation
 contree kill 3f2a7b...
 
+# Cancel multiple operations in one call
+contree kill 3f2a7b... a1b2c3... 9d8e7f...
+
 # Cancel all active operations
 contree kill --all
 ```
@@ -20,14 +32,20 @@ contree kill --all
 
 ## Behavior
 
-The CLI sends a `DELETE` request to the API. The operation transitions to
-`CANCELLED` status. If the sandbox is already running, execution is
-interrupted.
+The CLI sends a `DELETE` request to the API for each UUID. The
+operation transitions to `CANCELLED` status. If the sandbox is already
+running, execution is interrupted.
+
+`--all` finds and cancels every active operation in the project. When
+`--all` is combined with explicit UUIDs, `--all` wins and the explicit
+UUIDs are ignored with a `WARNING`.
 
-`--all` finds and cancels every active operation in the project.
+On per-UUID API errors (e.g. 404 for an unknown UUID), the command
+logs the failure and continues with the remaining UUIDs, exiting with
+status `1` at the end.
 
 ## See also
 
-- {doc}`ps` -- list operations to find UUIDs
-- {doc}`operation` -- multi-UUID variant: `contree op cancel UUID1 UUID2 ...`
-- {doc}`run` -- Ctrl-C during `contree run` also cancels the operation
+- {doc}`operation` — the canonical command (`contree kill` is its shortcut)
+- {doc}`ps` — list operations to find UUIDs
+- {doc}`run` — Ctrl-C during `contree run` also cancels the operation
diff --git a/docs/commands/ls.md b/docs/commands/ls.md
index 6108a2b..0abbcd7 100644
--- a/docs/commands/ls.md
+++ b/docs/commands/ls.md
@@ -1,4 +1,4 @@
-# ls
+# ls - List files in the image
 
 List files and directories in the session image without spawning a sandbox.
 
diff --git a/docs/commands/operation.md b/docs/commands/operation.md
index 0e2a51a..676abf6 100644
--- a/docs/commands/operation.md
+++ b/docs/commands/operation.md
@@ -1,4 +1,4 @@
-# operation (op)
+# operation - Manage operations
 
 Manage operations under a single namespace. Aggregates `ps` (list),
 `show` (inspect), and `kill` (cancel), and adds **multi-UUID support** to
@@ -11,8 +11,9 @@ Manage operations under a single namespace. Aggregates `ps` (list),
 | Subcommand | Aliases | Description |
 |------------|---------|-------------|
 | `list` | `ls` | List operations. Same flags as `contree ps`. |
-| `show UUID [UUID...]` | -- | Show one or more operation results. |
-| `cancel UUID [UUID...]` | -- | Cancel one or more operations (or `--all`). |
+| `show UUID [UUID...]` | `sh` | Show one or more operation results. |
+| `wait UUID [UUID...]` | `w` | Wait for operations to reach a terminal status (or `--all`). |
+| `cancel UUID [UUID...]` | `kill`, `k` | Cancel one or more operations (or `--all`). |
 
 ## Examples
 
@@ -20,7 +21,7 @@ Manage operations under a single namespace. Aggregates `ps` (list),
 # List active operations (same as `contree ps`)
 contree op list
 contree op ls
-contree op ls -a -S FAILED       # all flags from ps are accepted
+contree op ls -a --status FAILED # all flags from ps are accepted
 
 # Inspect a single operation
 contree op show 3f2a7b...
@@ -50,10 +51,12 @@ usage and routes to the three subcommands described below.
 ## `op list` -- dynamic columns
 
 `contree op list` (alias `op ls`) accepts the same filter flags as
-`contree ps` (`-a`, `-S STATUS`, `-K KIND`, `--since`, `--until`,
-`-q`/`--quiet`) and shares its rendering pipeline. Reach for it when
-you want the operations namespace to feel symmetric with the
-multi-UUID `show` and `cancel`; otherwise `contree ps` is just as good.
+`contree ps` (`-a`, `--status STATUS`, `-K KIND`, `--since`,
+`--until`, `-q`/`--quiet`) and shares its rendering pipeline. Reach
+for it when you want the operations namespace to feel symmetric with
+the multi-UUID `show` and `cancel`; otherwise `contree ps` is just
+as good. `-S` is the global session flag and only works BEFORE the
+subcommand.
 
 ```{terminal-shell} contree op list --help
 ```
@@ -79,10 +82,20 @@ otherwise push the rest of the row out of alignment.
 ## `op show` -- multiple UUIDs
 
 Each UUID is fetched and rendered through the same code path as
-`contree show`, so cached terminal results and `@N` history references
-work uniformly. On API errors (e.g. 404 for an unknown UUID), the
-command logs the failure and continues with the remaining UUIDs, exiting
-with status `1` at the end.
+`contree show`, so cached terminal results and history references work
+uniformly. Accepted reference forms (mirroring `session rollback`
+syntax with a git-style alias):
+
+- `@`, `:`, or `HEAD` -- the operation at the active branch tip.
+- `@N` (or `:N`, bare `N`) -- absolute history id.
+- `@-N`, `:-N`, or `HEAD~N` -- walk N steps back from the tip.
+- `HEAD~` -- shorthand for `HEAD~1`.
+- `@+N` (or `:+N`) -- walk N steps forward from the tip, picking the
+  latest child at each branch point.
+
+On API errors (e.g. 404 for an unknown UUID), the command logs the
+failure and continues with the remaining UUIDs, exiting with status
+`1` at the end.
 
 ```{terminal-shell} contree op show --help
 ```
@@ -93,6 +106,84 @@ currently renders as its own mini-table. Use `default` or `json` for a
 unified stream view across multiple UUIDs.
 :::
 
+## `op wait` -- block until completion
+
+Poll the given operations until each reaches a terminal status
+(`SUCCESS`, `FAILED`, `CANCELLED`) and print one row per completion
+with the columns `uuid`, `status`, `exit_code`, `timed_out`,
+`duration` (and every other scalar field the API returns; `error` is
+pinned to the last column).
+
+`--all` waits for every currently active operation in the project.
+`--timeout SECONDS` (default `60`) caps the wait — when the deadline
+hits, the command emits one extra row per unfinished operation with
+`timed_out=true` and the operation's last observed status (e.g.
+`EXECUTING`), then exits with status `1`.
+
+`status` is the server's word: it reflects orchestration (did the
+API run the job?), not what the sandbox process did with its exit
+code. The exit code is a separate column. The CLI's own exit status
+is `1` whenever any operation finished non-`SUCCESS`, or the actual
+`exit_code` when a `SUCCESS` op exited non-zero — so
+`op wait UUID && next-step` composes correctly with sandbox commands
+like `run -- false`.
+
+:::{important}
+`op wait` is a **pure observer**: it polls operation status and
+prints rows, but it **never updates session state**. In particular,
+the `detached-<op-uuid>` branch created when you ran
+`contree run -d` keeps pointing at the **starting** image — `op
+wait` does not advance it to the result image. The pattern therefore
+fits non-image-producing runs (`--disposable`) most cleanly; for
+non-disposable fan-out, the result image of each leg lives only on
+the server and you must recover it explicitly (see the non-disposable
+example below).
+:::
+
+:::{warning}
+`--all` is **project-scoped**. If multiple agents (or multiple shell
+sessions) share the same project, `op wait --all` will block on every
+active operation across all of them — not just the ones you launched.
+The wait still completes correctly; it just waits for more than you
+might expect. For multi-agent setups, prefer the explicit
+`op wait UUID1 UUID2 ...` form with the UUIDs you actually own.
+:::
+
+```{terminal-shell} contree op wait --help
+```
+
+Preferred — `--disposable` fan-out, no image to track. Note the
+global `-f json` before `run` so `jq` sees JSON; the default
+formatter is plain.
+
+```bash
+A=$(contree -f json run -d --disposable -- pytest tests/a | jq -r .uuid)
+B=$(contree -f json run -d --disposable -- pytest tests/b | jq -r .uuid)
+C=$(contree -f json run -d --disposable -- pytest tests/c | jq -r .uuid)
+contree op wait "$A" "$B" "$C"
+contree op show "$A" "$B" "$C"          # stdout/stderr per leg
+```
+
+Non-disposable fan-out — must recover the chosen leg's image yourself:
+
+```bash
+A=$(contree -f json run -d -- apt-get install -y curl | jq -r .uuid)
+B=$(contree -f json run -d -- apt-get install -y wget | jq -r .uuid)
+contree op wait "$A" "$B"
+
+# Pull the result image out and bind it back into the session,
+# or tag it for later reuse.
+IMG_A=$(contree -f json op show "$A" | jq -r .image)
+contree use "$IMG_A"
+contree tag "$IMG_A" feature/curl-tools
+```
+
+Block on the whole project (5 min cap):
+
+```bash
+contree op wait --all --timeout 300
+```
+
 ## `op cancel` -- multiple UUIDs or `--all`
 
 Either pass UUIDs explicitly or use `--all` to cancel every active
@@ -111,22 +202,25 @@ contree op cancel --all ignored-1
 
 ## Comparison with the top-level commands
 
-`contree op` does not replace `ps`/`show`/`kill` -- those keep their
-single-target semantics. The new namespace exists for grouping and for
-multi-UUID workflows:
+`contree ps` and `contree kill` are top-level **shortcuts** that share
+the same argparse setup and handler as `op list` / `op cancel`
+respectively — there is no separate implementation. `contree show`
+keeps its own single-UUID handler (the multi-UUID `op show` wraps it).
 
 | Need | Use |
 |------|-----|
 | List active operations | `contree ps` *or* `contree op ls` |
 | Inspect one operation | `contree show UUID` *or* `contree op show UUID` |
 | Inspect multiple | `contree op show UUID1 UUID2 ...` |
+| Block on multiple | `contree op wait UUID1 UUID2 ...` |
+| Block on everything active | `contree op wait --all` |
 | Cancel one operation | `contree kill UUID` *or* `contree op cancel UUID` |
-| Cancel multiple | `contree op cancel UUID1 UUID2 ...` |
+| Cancel multiple | `contree kill UUID1 UUID2 ...` *or* `contree op cancel UUID1 UUID2 ...` |
 | Cancel everything active | `contree kill --all` *or* `contree op cancel --all` |
 
 ## See also
 
-- {doc}`ps` -- single-purpose list command (delegated to by `op list`)
+- {doc}`ps` -- top-level shortcut for `op list`
 - {doc}`show` -- single-UUID inspect (delegated to by `op show`)
-- {doc}`kill` -- single-UUID cancel
+- {doc}`kill` -- top-level shortcut for `op cancel`
 - {doc}`run` -- the command that creates operations
diff --git a/docs/commands/ps.md b/docs/commands/ps.md
index 9c40539..fd59e07 100644
--- a/docs/commands/ps.md
+++ b/docs/commands/ps.md
@@ -1,4 +1,14 @@
-# ps
+# ps - List activity
+
+:::{note}
+**`contree ps` is a top-level shortcut for {doc}`operation list <operation>` (`contree op ls`).**
+
+Both share one argparse setup and one handler — `ps` exists for the
+Docker-like UX. New flags or columns added to `operation list` apply
+automatically here. See the {doc}`operation` page for the full
+description of dynamic columns, error handling, and multi-UUID
+workflows in the operation namespace.
+:::
 
 List operations and their statuses. By default shows only active operations
 (PENDING, ASSIGNED, EXECUTING).
@@ -15,8 +25,8 @@ contree ps -a
 # UUIDs only (for scripting)
 contree ps -q
 
-# Filter by status
-contree ps -S FAILED
+# Filter by status (note: --status, not -S; -S is the global session flag)
+contree ps --status FAILED
 
 # Filter by kind
 contree ps -K instance
@@ -54,7 +64,7 @@ to the last column. See {doc}`operation` for the full description.
 
 ## See also
 
-- {doc}`show` -- inspect a specific operation
-- {doc}`kill` -- cancel a running operation
-- {doc}`operation` -- group + multi-UUID variants (`contree op ls/show/cancel`)
-- {doc}`/tutorial/workflows` -- monitoring and scripting patterns
+- {doc}`operation` — the canonical command (`contree ps` is its shortcut)
+- {doc}`show` — inspect a specific operation
+- {doc}`kill` — cancel a running operation
+- {doc}`/tutorial/workflows` — monitoring and scripting patterns
diff --git a/docs/commands/run.md b/docs/commands/run.md
index bb0a09a..cd0c7b3 100644
--- a/docs/commands/run.md
+++ b/docs/commands/run.md
@@ -1,4 +1,4 @@
-# run
+# run - Execute a command in the sandbox
 
 Spawn a sandbox instance from the session image and execute a command.
 
diff --git a/docs/commands/session.md b/docs/commands/session.md
index f3a8d1f..c157c71 100644
--- a/docs/commands/session.md
+++ b/docs/commands/session.md
@@ -1,4 +1,4 @@
-# session
+# session - Manage sessions, branches, history
 
 Manage session branches and history. Sessions track the image state as you
 run commands, with support for branching and rollback.
@@ -31,8 +31,14 @@ contree session branch
 # Undo last operation
 contree session rollback
 
-# Undo last 3 operations
-contree session rollback 3
+# Undo last 3 operations (`--` stops argparse from eating `-3` as a flag)
+contree session rollback -- -3
+
+# Forward one entry
+contree session rollback +1
+
+# Absolute jump to a specific history id (use `session show` first)
+contree session rollback 42
 
 # Import image from another session
 contree session use other-session
diff --git a/docs/commands/shell.md b/docs/commands/shell.md
index cc04014..8881ddc 100644
--- a/docs/commands/shell.md
+++ b/docs/commands/shell.md
@@ -1,4 +1,4 @@
-# shell
+# shell - Interactive REPL
 
 Start an interactive REPL for managing sessions and running sandbox commands.
 
diff --git a/docs/commands/show.md b/docs/commands/show.md
index 6ca7fc6..584a40a 100644
--- a/docs/commands/show.md
+++ b/docs/commands/show.md
@@ -1,14 +1,43 @@
-# show
+# show - Inspect an operation
 
-Display the full result of an operation, including stdout and stderr from
-sandbox execution.
+:::{note}
+**`contree show` is a top-level shortcut for {doc}`operation show <operation>` (`contree op show`).**
+
+Both share one argparse setup and one handler. The top-level `show`
+accepts one or more UUIDs and history references — each entry renders
+as its own row. Accepted reference forms:
+
+- `@`, `:`, or `HEAD` — the operation at the active branch tip.
+- `@N`, `:N`, bare `N` — absolute history id.
+- `@-N`, `:-N`, `HEAD~N` — N steps back from the tip.
+- `HEAD~` — shorthand for `HEAD~1`.
+- `@+N`, `:+N` — N steps forward from the tip (latest child).
+
+See the {doc}`operation` page for the full description.
+:::
+
+Display the full result of one or more operations, including stdout
+and stderr from sandbox execution.
 
 ## Examples
 
 ```bash
-# Show operation details
+# Show a single operation
 contree show 3f2a7b...
 
+# Show multiple operations in one call
+contree show 3f2a7b... a1b2c3... 9d8e7f...
+
+# History references (resolved against the active session)
+contree show @5 @4 @3
+# Relative to the active branch tip (like `session rollback`)
+contree show @-1          # the operation one step back from the tip
+contree show @+1          # the next operation forward (latest child)
+# Git-style HEAD notation, equivalent to @ and @-N
+contree show HEAD         # current tip operation
+contree show HEAD~        # one step back (shorthand for HEAD~1)
+contree show HEAD~3       # three steps back from the tip
+
 # JSON output for scripting
 contree -f json show 3f2a7b...
 
@@ -24,15 +53,35 @@ contree show UUID
 
 ## Output
 
-The command displays:
+The command renders every scalar top-level field the API returns
+(typically: **uuid**, **kind**, **status**, **created_at**,
+**started_at**, **finished_at**, **duration**, **session_key**, …) and
+adds these derived fields:
+
+- **exit_code** -- the sandbox process exit code (extracted from
+  `metadata.result.state.exit_code`)
+- **image** -- resulting image UUID from `result.image`
+- **tag** -- image tag from `result.tag`
+- **stdout / stderr** -- sandbox output, decoded (for `default`,
+  `json`, and `json-pretty` formats)
+
+`status` is the server's word: it reflects whether the API ran the
+operation to completion, not whether the sandbox process exited with
+zero. A `SUCCESS` row with `exit_code=1` means "the API completed the
+job; your command returned 1". `error` is pinned to the last column.
+Nested objects (`metadata`, `result`) are dropped from the flat row
+— use `--raw` for the full server payload, or `-f json` to keep the
+flat structured row.
+
+Pass `--raw` to skip all of the above and print each operation's
+full server JSON payload as JSONL (one object per line) to stdout,
+verbatim. Streams cleanly into `jq -c`. Useful for debugging or
+pulling fields the table view omits (resources, full metadata, etc.).
 
-- **uuid**, **kind**, **status**, **duration** -- operation metadata
-- **exit_code** -- the sandbox process exit code (if completed)
-- **error** -- error message (if failed)
-- **image** -- resulting image UUID (for non-disposable runs)
-- **tag** -- image tag (if assigned)
-- **stdout / stderr** -- sandbox output (for `default`, `json`, and
-  `json-pretty` formats)
+Timestamps come back from the API in UTC and are converted to the
+**local timezone** for human-readable formatters (`default`, `table`,
+`csv`, `tsv`, `plain`). The JSON formatters preserve the source
+timezone offset.
 
 For `csv`, `tsv`, and `table` formats, stdout/stderr are omitted -- use
 `default` or `json` to see sandbox output.
diff --git a/docs/commands/skill.md b/docs/commands/skill.md
index 821d279..c0e055d 100644
--- a/docs/commands/skill.md
+++ b/docs/commands/skill.md
@@ -1,4 +1,4 @@
-# skill
+# skill - Install agent skills
 
 Install, remove, or upgrade ConTree agent skills for Codex and Claude Code.
 
diff --git a/docs/commands/tag.md b/docs/commands/tag.md
index ac07f48..96d4fb9 100644
--- a/docs/commands/tag.md
+++ b/docs/commands/tag.md
@@ -1,4 +1,4 @@
-# tag
+# tag - Tag or untag an image
 
 Assign or remove a tag from an image. Tags are human-readable names that
 make images easier to reference.
diff --git a/docs/commands/use.md b/docs/commands/use.md
index 7a5456f..9075bca 100644
--- a/docs/commands/use.md
+++ b/docs/commands/use.md
@@ -1,4 +1,4 @@
-# use
+# use - Choose an image for the session
 
 Set the session image or show the current session state.
 
diff --git a/docs/tutorial/configuration.md b/docs/tutorial/configuration.md
index 6e98a32..57cc6de 100644
--- a/docs/tutorial/configuration.md
+++ b/docs/tutorial/configuration.md
@@ -61,10 +61,13 @@ SHA256), active status, and a health check result.
 `auth ls` verifies each profile against the API with a 2-second timeout.
 Possible status values:
 
-- `ok` — token is valid
+- `ok` — token is valid and has the required sandbox permission
 - `timeout` — server did not respond in time
 - `error` — bad token or network error
 - `offline mode` — you passed `-O` / `--offline`
+- `no url` — the profile has no API URL configured (re-run `contree auth`)
+- `inactive` — token authenticates, but the configured project does not
+  grant the sandbox permission this CLI needs
 
 Skip the network check:
 
diff --git a/docs/tutorial/sessions.md b/docs/tutorial/sessions.md
index d6a3697..824d4f3 100644
--- a/docs/tutorial/sessions.md
+++ b/docs/tutorial/sessions.md
@@ -121,27 +121,44 @@ contree session branch
 
 ## Rollback
 
-Undo the last N operations on the current branch:
+Move the branch pointer in the history chain. The argument distinguishes
+absolute jumps from relative navigation:
+
+| Argument | Meaning |
+|---|---|
+| _(none)_ | Back one entry (default) |
+| `-- -N` | Back N entries (the `--` stops argparse from parsing `-N` as a flag) |
+| `+N` | Forward N entries |
+| `N` (bare positive) | **Absolute** jump to history id `N` |
 
 ::::{tab-set}
 :::{tab-item} CLI
 ```bash
-contree session rollback      # undo last 1
-contree session rollback 3    # undo last 3
+contree session rollback           # back one entry
+contree session rollback -- -3     # back three entries
+contree session rollback +1        # forward one entry
+contree session rollback 42        # absolute jump to history id 42
 ```
 :::
 
 :::{tab-item} Shell
 ```text
 contree session rollback
-contree session rollback 3
+contree session rollback -- -3
+contree session rollback +1
+contree session rollback 42
 ```
 :::
 ::::
 
-This moves the branch pointer backwards in the history chain. The history
-entries still exist and can be recovered by creating a branch at a specific
-point.
+:::{warning}
+A bare positive number is an **absolute** history id, not "back N steps".
+Use `--` followed by a negative number for relative back-navigation.
+Inspect with `contree session show` first to avoid surprise jumps.
+:::
+
+The history entries still exist and can be recovered by creating a branch at
+a specific point.
 
 ## Starting a fresh session
 
diff --git a/docs/tutorial/workflows.md b/docs/tutorial/workflows.md
index 86f9909..a596008 100644
--- a/docs/tutorial/workflows.md
+++ b/docs/tutorial/workflows.md
@@ -273,6 +273,102 @@ contree kill --all
 :::
 ::::
 
+### Fan-out + wait
+
+When several independent steps can run at the same time, spawn each
+one detached and join them with `contree op wait` (alias `contree
+operation wait`). The wait command polls the API and prints one row
+per operation as soon as it reaches a terminal status, with columns
+`uuid`, `status`, `exit_code`, `timed_out`, `duration`, and any other
+scalar field the API returns. The `status` column is the server's
+verdict (did the API run the job?) and is reported verbatim; the
+sandbox process's own exit code is in the separate `exit_code` column.
+The CLI exit status is `1` when any op finished non-`SUCCESS`, or the
+actual `exit_code` when a `SUCCESS` op exited non-zero, so the wait
+still composes naturally with `&&`.
+
+:::{important}
+`op wait` is a **pure observer** — it polls completion status but
+**does not touch local session state**. That makes the pattern most
+natural with `--disposable` (no image to track). For non-disposable
+fan-out, the result images live only on the server; the
+`detached-<op-uuid>` branches created at spawn time still point at
+the **starting** image and never get moved. See the non-disposable
+recovery example below.
+:::
+
+The preferred shape — disposable runs, parallel independent checks.
+The global `-f json` must come BEFORE the subcommand so that `jq`
+gets JSON; the default `run -d` formatter is plain.
+
+```bash
+# Three parallel test suites, results discarded after the runs
+A=$(contree -f json run -d --disposable -- pytest tests/a | jq -r .uuid)
+B=$(contree -f json run -d --disposable -- pytest tests/b | jq -r .uuid)
+C=$(contree -f json run -d --disposable -- pytest tests/c | jq -r .uuid)
+
+# Block until each one finishes (or 60 s elapses, whichever comes first)
+contree op wait "$A" "$B" "$C"
+
+# Inspect stdout/stderr per leg
+contree op show "$A" "$B" "$C"
+```
+
+Non-disposable fan-out works too, but you have to recover the result
+images yourself — `op wait` will not bind them into the session:
+
+```bash
+A=$(contree -f json run -d -- apt-get install -y curl | jq -r .uuid)
+B=$(contree -f json run -d -- apt-get install -y wget | jq -r .uuid)
+contree op wait "$A" "$B"
+
+# Pull the winning leg's image out of the operation result and
+# attach it to the active session.
+IMG_A=$(contree -f json op show "$A" | jq -r .image)
+contree use "$IMG_A"
+
+# Or tag it for reuse later.
+contree tag "$IMG_A" feature/curl-tools
+```
+
+After fan-out + wait the session retains a `detached-<op-uuid>`
+branch per spawn. They all point at the image that existed when the
+fan-out started, so they are mostly cosmetic — feel free to delete
+them with `contree session branch --prune` when you no longer need
+them.
+
+Useful flags:
+
+- `--timeout SECONDS` — cap on the wait (default 60). If the deadline
+  hits before every operation reaches a terminal status, `op wait`
+  emits one extra row per unfinished op with `timed_out=true` and the
+  operation's last observed status (e.g. `EXECUTING`), then exits
+  with status `1`.
+- `--all` — wait for every currently active operation in the project,
+  not just the ones you passed.
+
+```bash
+# Block on every active op, up to 5 minutes
+contree op wait --all --timeout 300
+```
+
+:::{warning}
+`--all` is **project-scoped**. If multiple agents or shell sessions
+share the same project, `op wait --all` will block on every active
+operation across all of them — not just the ones you launched. For
+multi-agent or multi-shell setups prefer the explicit
+`op wait UUID1 UUID2 ...` form with the UUIDs you actually own.
+:::
+
+`op wait` exits non-zero whenever any operation finished with a
+non-`SUCCESS` status (so it composes naturally with shell `&&`
+chains), even when no `--timeout` was hit.
+
+```bash
+# Run fan-out + tests; bail if any leg failed
+contree op wait "$A" "$B" "$C" && echo "all green" || echo "some failed"
+```
+
 ### Scripting patterns
 
 :::{note}
diff --git a/tests/conftest.py b/tests/conftest.py
index 052b871..34828ca 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,20 +1,34 @@
 from __future__ import annotations
 
+import atexit
 import http.client
 import json
 import os
+import shutil
+import tempfile
 from collections.abc import Generator
 from dataclasses import dataclass
 from pathlib import Path
 
-import pytest
-
-import contree_cli.arguments  # noqa: F401  populates COMMAND_REGISTRY
-import contree_cli.config as config_mod
-from contree_cli import CLIENT, PROFILE
-from contree_cli.client import ContreeClient, ContreeIAMClient
-from contree_cli.config import ConfigProfile
-from contree_cli.session import ImageCache, SessionStore
+# Redirect CONTREE_HOME to a throwaway directory BEFORE importing
+# contree_cli.config (which reads the variable at import time). Without
+# this, tests would write SQLite databases under the user's real
+# ~/.config/contree -- which also breaks inside sandboxes that block
+# writes to $HOME.
+CONTREE_HOME_TMP = Path(tempfile.mkdtemp(prefix="contree-pytest-"))
+os.environ["CONTREE_HOME"] = str(CONTREE_HOME_TMP)
+atexit.register(shutil.rmtree, CONTREE_HOME_TMP, ignore_errors=True)
+
+# The CONTREE_HOME override above MUST run before any contree_cli import
+# touches contree_cli.config, hence the deferred import block below.
+import pytest  # noqa: E402
+
+import contree_cli.arguments  # noqa: E402, F401  populates COMMAND_REGISTRY
+import contree_cli.config as config_mod  # noqa: E402
+from contree_cli import CLIENT, PROFILE  # noqa: E402
+from contree_cli.client import ContreeClient, ContreeIAMClient  # noqa: E402
+from contree_cli.config import ConfigProfile  # noqa: E402
+from contree_cli.session import ImageCache, SessionStore  # noqa: E402
 
 for var in (
     "CONTREE_TOKEN",
@@ -29,6 +43,22 @@
     os.environ.pop(var, None)
 
 
+@pytest.fixture(autouse=True)
+def sequential_pagination(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Force PaginatedFetcher to use concurrency=1 in tests.
+
+    The mock client's response queue is FIFO and not thread-safe;
+    parallel fetches would race on it. Sequential keeps tests
+    deterministic without affecting handler logic under test.
+    """
+    for mod in (
+        "contree_cli.cli.operation",
+        "contree_cli.cli.images",
+        "contree_cli.cli.file",
+    ):
+        monkeypatch.setattr(f"{mod}.CONTREE_CONCURRENCY", 1, raising=False)
+
+
 @dataclass
 class FakeResponse:
     """Minimal HTTPResponse-compatible object for tests."""
diff --git a/tests/test_argmap.py b/tests/test_argmap.py
index abb3d26..7dfd6f0 100644
--- a/tests/test_argmap.py
+++ b/tests/test_argmap.py
@@ -90,9 +90,9 @@ def test_op_show_resolves_operation_source(self):
         result = walk(root_parser, ["op", "show"])
         assert lookup(result.command_path, "uuids") == "operation"
 
-    def test_kill_uuid_resolves_operation_source(self):
+    def test_kill_uuids_resolves_operation_source(self):
         result = walk(root_parser, ["kill"])
-        assert lookup(result.command_path, "uuid") == "operation"
+        assert lookup(result.command_path, "uuids") == "operation"
 
     def test_run_use_flag_resolves_image_source(self):
         # `--use` is a flag, walk just records it; positional/flag-value
@@ -107,7 +107,7 @@ class TestRepresentativeMappings:
     @pytest.mark.parametrize(
         "command_path,dest,expected",
         [
-            (("show",), "uuid", "operation"),
+            (("show",), "uuids", "operation"),
             (("use",), "image", "image"),
             (("tag",), "args", "image"),
             (("ls",), "path", "sandbox-path"),
diff --git a/tests/test_client.py b/tests/test_client.py
index 8590fec..aefd871 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import contextlib
 import json
 import logging
 from unittest.mock import patch
@@ -15,6 +16,7 @@
     ContreeClient,
     ContreeJWTClient,
     HeaderFormatter,
+    PaginatedFetcher,
     resolve_image,
 )
 
@@ -181,6 +183,70 @@ def test_retry_recovers_midway(self):
         delays = [call.args[0] for call in mock_sleep.call_args_list]
         assert delays == list(RETRY_DELAYS[:3])
 
+    def test_retry_on_network_error_then_succeeds(self):
+        """A transient gaierror is retried like a 5xx response."""
+        import socket
+
+        c = ContreeTestClient("https://contree.dev", "tok")
+        c.respond(status=200, body=b'{"ok":true}')
+
+        call_count = {"n": 0}
+        real_connect = c._connect
+
+        def flaky_connect():
+            call_count["n"] += 1
+            if call_count["n"] < 3:
+                raise socket.gaierror(8, "nodename nor servname provided")
+            return real_connect()
+
+        c._connect = flaky_connect  # type: ignore[method-assign]
+
+        with patch("contree_cli.client.time.sleep") as mock_sleep:
+            result = c.request("GET", "/v1/images")
+
+        assert result.body == b'{"ok":true}'
+        assert call_count["n"] == 3
+        assert mock_sleep.call_count == 2
+
+    def test_retry_exhausted_raises_network_error(self):
+        """When retries run out, the last network error propagates."""
+        import socket
+
+        c = ContreeTestClient("https://contree.dev", "tok")
+
+        def always_fails():
+            raise socket.gaierror(8, "nodename nor servname provided")
+
+        c._connect = always_fails  # type: ignore[method-assign]
+
+        with (
+            patch("contree_cli.client.time.sleep"),
+            pytest.raises(socket.gaierror),
+        ):
+            c.request("GET", "/v1/images")
+
+    def test_invalid_url_is_not_retried(self):
+        """InvalidURL is a permanent caller-side error — should raise immediately."""
+        import http.client
+
+        c = ContreeTestClient("https://contree.dev", "tok")
+
+        call_count = {"n": 0}
+
+        def fail_with_invalid_url():
+            call_count["n"] += 1
+            raise http.client.InvalidURL("control characters in URL")
+
+        c._connect = fail_with_invalid_url  # type: ignore[method-assign]
+
+        with (
+            patch("contree_cli.client.time.sleep") as mock_sleep,
+            pytest.raises(http.client.InvalidURL),
+        ):
+            c.request("GET", "/v1/images")
+        assert call_count["n"] == 1
+        mock_sleep.assert_not_called()
+
 
 # ---------------------------------------------------------------------------
 # Convenience methods
@@ -550,3 +616,65 @@ def test_raises_without_token(self):
         c = ContreeTestIAMClient("https://example.com", None, "aiproject-x")
         with pytest.raises(ApiError, match="No token"):
             c.request("GET", "/v1/images")
+
+
+class TestPaginatedFetcherLimit:
+    """``limit=`` lives in :class:`PaginatedFetcher` so callers don't repeat
+    the page-size math, and a small budget like ``--limit 5`` doesn't pull a
+    full 1000-row page just to discard 995."""
+
+    def make_fetcher(
+        self,
+        client: ContreeTestClient,
+        *,
+        limit: int | None,
+        page_size: int | None = None,
+    ) -> PaginatedFetcher:
+        return PaginatedFetcher(
+            client,
+            "/v1/things",
+            {},
+            lambda body: json.loads(body)["items"],
+            limit=limit,
+            page_size=page_size,
+            concurrency=1,
+        )
+
+    def test_small_limit_caps_page_size(self, contree_client):
+        f = self.make_fetcher(contree_client, limit=5)
+        # +1 so callers can detect "more exists past the limit".
+        assert f.page_size == 6
+        # Need to cover at most limit+1 records; +1 page of safety pad
+        # plus the +1 ceiling makes the math straightforward.
+        assert f.max_pages >= 2
+
+    def test_large_limit_uses_default_page_size(self, contree_client):
+        f = self.make_fetcher(contree_client, limit=10000)
+        assert f.page_size == PaginatedFetcher.DEFAULT_PAGE_SIZE
+        assert f.max_pages >= 10000 // PaginatedFetcher.DEFAULT_PAGE_SIZE
+
+    def test_no_limit_uses_default_and_safety_cap(self, contree_client):
+        f = self.make_fetcher(contree_client, limit=None)
+        assert f.page_size == PaginatedFetcher.DEFAULT_PAGE_SIZE
+        assert f.max_pages == PaginatedFetcher.UNLIMITED_MAX_PAGES
+
+    def test_explicit_page_size_still_capped_by_limit(self, contree_client):
+        # Caller-supplied page_size is an upper bound; limit can still
+        # squash it smaller.
+        f = self.make_fetcher(contree_client, limit=3, page_size=100)
+        assert f.page_size == 4
+
+    def test_small_limit_uses_capped_page_size_in_request(self, contree_client):
+        # `limit=5` must request `limit=6` (capped page size + 1 for the
+        # truncation probe), not the default 1000.
+        contree_client.respond_json({"items": [{"i": i} for i in range(6)]})
+        f = self.make_fetcher(contree_client, limit=5)
+        pages_iter = iter(f)
+        first = next(pages_iter)
+        assert len(first) == 6
+        f.stop()  # mirror the real caller, which calls stop() after hitting limit
+        with contextlib.suppress(StopIteration):
+            next(pages_iter)
+        req = contree_client.get_request(0)
+        assert "limit=6" in req.path
+        assert "limit=1000" not in req.path
diff --git a/tests/test_images.py b/tests/test_images.py
index 4972be0..b0ece40 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -201,24 +201,16 @@ def test_all_images_emitted(self, contree_client, capsys):
         out = capsys.readouterr().out
         assert out.count("uuid-") == PAGE_SIZE + 5
 
-    def test_progress_logged_per_full_page(self, contree_client, caplog):
-        """Each completed full page emits a progress line at INFO level."""
-        import logging
-
+    def test_pages_flushed_progressively(self, contree_client, capsys):
+        """Each full page is flushed as it completes (streaming output)."""
         page1 = [_make_image(i) for i in range(PAGE_SIZE)]
         page2 = [_make_image(i) for i in range(PAGE_SIZE, PAGE_SIZE * 2)]
         page3 = [_make_image(i) for i in range(PAGE_SIZE * 2, PAGE_SIZE * 2 + 3)]
-        with caplog.at_level(logging.INFO, logger="contree_cli.cli.images"):
-            _run_cmd_pages(contree_client, [page1, page2, page3])
-        msgs = [r.getMessage() for r in caplog.records]
-        assert any(
-            f"Fetched {PAGE_SIZE} images so far" in m and "Ctrl+C" in m for m in msgs
-        )
-        assert any(
-            f"Fetched {PAGE_SIZE * 2} images so far" in m and "Ctrl+C" in m
-            for m in msgs
-        )
-        assert not any(f"{PAGE_SIZE * 2 + 3}" in m for m in msgs)
+        _run_cmd_pages(contree_client, [page1, page2, page3])
+        out = capsys.readouterr().out
+        assert f"uuid-{PAGE_SIZE - 1}" in out
+        assert f"uuid-{PAGE_SIZE * 2 - 1}" in out
+        assert f"uuid-{PAGE_SIZE * 2 + 2}" in out
 
     def test_default_limit_matches_constant(self):
         assert LIMIT_DEFAULT > 0
@@ -288,18 +280,21 @@ def test_limit_no_warning_when_no_more(self, contree_client, caplog):
         warns = [r for r in caplog.records if r.levelname == "WARNING"]
         assert not any("truncated" in r.getMessage() for r in warns)
 
-    def test_limit_request_uses_capped_page_size(self, contree_client):
-        """When --limit < PAGE_SIZE, the API request asks for limit items only."""
-        contree_client.respond_json({"images": [_make_image(i) for i in range(3)]})
-        contree_client.respond_json({"images": []})  # probe
+    def test_limit_smaller_than_page_size_emits_only_limit_records(
+        self, contree_client, capsys
+    ):
+        """--limit < PAGE_SIZE: caller emits exactly limit records from the page."""
+        contree_client.respond_json(
+            {"images": [_make_image(i) for i in range(PAGE_SIZE)]}
+        )
 
         FORMATTER.set(CSVFormatter())
         ctx = copy_context()
         ctx.run(cmd_images, ImagesArgs(limit=3))
 
-        assert "limit=3" in contree_client.request_paths[0]
-        assert "limit=1" in contree_client.request_paths[1]
-        assert contree_client.request_count == 2
+        out = capsys.readouterr().out
+        # 1 header row + 3 data rows.
+        assert len(out.strip().splitlines()) == 4
 
     def test_progress_not_logged_for_single_short_page(self, contree_client, caplog):
         """Final/only partial page does not emit progress (output covers it)."""
diff --git a/tests/test_kill.py b/tests/test_kill.py
index ac55f18..fe3aec9 100644
--- a/tests/test_kill.py
+++ b/tests/test_kill.py
@@ -2,20 +2,18 @@
 
 from contextvars import copy_context
 
-import pytest
 from conftest import ContreeTestClient
 
 from contree_cli import CLIENT
-from contree_cli.cli.kill import ACTIVE_STATUSES, KillArgs, cmd_kill
-from contree_cli.client import ApiError
+from contree_cli.cli.operation import ACTIVE_STATUSES, CancelArgs, cmd_cancel
 
 
 def _run_cmd(tc: ContreeTestClient, uuid, *, status=202):
     tc.respond(status=status, body=b"")
     ctx = copy_context()
 
-    args = KillArgs(uuid=uuid)
-    ctx.run(cmd_kill, args)
+    args = CancelArgs(uuids=[uuid])
+    ctx.run(cmd_cancel, args)
 
 
 class TestCmdKill:
@@ -30,21 +28,22 @@ def test_logs_cancellation(self, contree_client, caplog):
             _run_cmd(contree_client, "op-456")
         assert "Cancelled operation op-456" in caplog.text
 
-    def test_not_found_raises(self, contree_client):
+    def test_not_found_logs_and_sets_exit(self, contree_client, caplog):
         contree_client.respond(status=404, body=b"nope")
+        CLIENT.set(contree_client)
         ctx = copy_context()
-        args = KillArgs(uuid="bad-uuid")
-        with pytest.raises(ApiError) as exc_info:
-            ctx.run(cmd_kill, args)
-        assert exc_info.value.status == 404
+        with caplog.at_level("ERROR"):
+            rc = ctx.run(cmd_cancel, CancelArgs(uuids=["bad-uuid"]))
+        assert rc == 1
+        assert "Failed to cancel bad-uuid" in caplog.text
 
-    def test_conflict_raises(self, contree_client):
+    def test_conflict_logs_and_sets_exit(self, contree_client, caplog):
         contree_client.respond(status=409, body=b"already done")
+        CLIENT.set(contree_client)
         ctx = copy_context()
-        args = KillArgs(uuid="done-op")
-        with pytest.raises(ApiError) as exc_info:
-            ctx.run(cmd_kill, args)
-        assert exc_info.value.status == 409
+        with caplog.at_level("ERROR"):
+            rc = ctx.run(cmd_cancel, CancelArgs(uuids=["done-op"]))
+        assert rc == 1
 
 
 # ---------------------------------------------------------------------------
@@ -57,11 +56,7 @@ def _ops_for_status(status, count):
 
 
 def _run_kill_all(ops_by_status, *, delete_failures=None):
-    """Run cmd_kill --all with mocked list + delete responses.
-
-    ops_by_status: dict mapping status string to list of op dicts
-    delete_failures: set of UUIDs that should return 409
-    """
+    """Run cmd_cancel --all with mocked list + delete responses."""
     delete_failures = delete_failures or set()
     tc = ContreeTestClient()
 
@@ -80,9 +75,9 @@ def _run_kill_all(ops_by_status, *, delete_failures=None):
 
     CLIENT.set(tc)
     ctx = copy_context()
-    args = KillArgs(all=True)
+    args = CancelArgs(uuids=[], all=True)
 
-    rc = ctx.run(cmd_kill, args)
+    rc = ctx.run(cmd_cancel, args)
     return tc, rc
 
 
diff --git a/tests/test_ls.py b/tests/test_ls.py
index bd3476f..1298a1b 100644
--- a/tests/test_ls.py
+++ b/tests/test_ls.py
@@ -146,6 +146,16 @@ def test_json_output(self, contree_client, session_store, capsys):
         assert parsed["path"] == "/bin/sh"
         assert parsed["size"] == 42
 
+    def test_unknown_field_passes_through(self, contree_client, session_store, capsys):
+        """New server fields reach the row even when not hardcoded."""
+        f = _make_file()
+        f["future_field"] = "anything"
+        f["inode"] = 4242
+        _run_cmd(contree_client, [f], store=session_store, formatter=JSONFormatter())
+        parsed = json.loads(capsys.readouterr().out.strip())
+        assert parsed["future_field"] == "anything"
+        assert parsed["inode"] == 4242
+
     def test_table_output(self, contree_client, session_store, capsys):
         files = [_make_file(), _make_file(path="/etc/passwd")]
         fmt = TableFormatter()
diff --git a/tests/test_operation.py b/tests/test_operation.py
index bb87b72..5ed0526 100644
--- a/tests/test_operation.py
+++ b/tests/test_operation.py
@@ -7,12 +7,14 @@
 
 from contree_cli import CLIENT, FORMATTER, SESSION_STORE
 from contree_cli.arguments import parser
-from contree_cli.cli.kill import ACTIVE_STATUSES
 from contree_cli.cli.operation import (
+    ACTIVE_STATUSES,
     CancelArgs,
     ShowMultiArgs,
+    WaitArgs,
     cmd_cancel,
     cmd_show_multi,
+    cmd_wait,
 )
 from contree_cli.output import CSVFormatter, JSONFormatter
 from contree_cli.session import SessionStore
@@ -108,18 +110,46 @@ def test_cancel_all_flag(self):
         assert ns.all is True
         assert ns.uuids == []
 
-    def test_list_delegates_to_ps_handler(self):
-        from contree_cli.cli.ps import cmd_ps
+    def test_list_delegates_to_cmd_list(self):
+        from contree_cli.cli.operation import cmd_list
 
         ns = parser.parse_args(["op", "list", "-q"])
-        assert ns.handler is cmd_ps
+        assert ns.handler is cmd_list
         assert ns.quiet is True
 
     def test_list_ls_alias(self):
-        from contree_cli.cli.ps import cmd_ps
+        from contree_cli.cli.operation import cmd_list
 
         ns = parser.parse_args(["op", "ls"])
-        assert ns.handler is cmd_ps
+        assert ns.handler is cmd_list
+
+    def test_ps_shares_handler_with_op_list(self):
+        """`contree ps` is a top-level shortcut for `contree op list`."""
+        from contree_cli.cli.operation import cmd_list
+
+        ns = parser.parse_args(["ps"])
+        assert ns.handler is cmd_list
+
+    def test_show_sh_alias(self):
+        from contree_cli.cli.operation import cmd_show_multi
+
+        ns = parser.parse_args(["op", "sh", "uuid-1"])
+        assert ns.handler is cmd_show_multi
+        assert ns.uuids == ["uuid-1"]
+
+    def test_cancel_kill_alias(self):
+        from contree_cli.cli.operation import cmd_cancel
+
+        ns = parser.parse_args(["op", "kill", "uuid-1"])
+        assert ns.handler is cmd_cancel
+        assert ns.uuids == ["uuid-1"]
+
+    def test_cancel_k_alias(self):
+        from contree_cli.cli.operation import cmd_cancel
+
+        ns = parser.parse_args(["op", "k", "uuid-1"])
+        assert ns.handler is cmd_cancel
+        assert ns.uuids == ["uuid-1"]
 
 
 # ----------------------------------------------------------------------
@@ -206,6 +236,29 @@ def test_show_history_reference_uses_session_store(
         assert contree_client.request_count == 1
         assert contree_client.get_request(0).path == "/v1/operations/op-from-history"
 
+    def test_show_raw_multi_uuid_emits_jsonl(
+        self, contree_client, session_store, capsys
+    ):
+        # Multi-UUID `op show --raw` should produce one JSON line per
+        # operation so the output streams cleanly into `jq -c`.
+        import json as _json
+
+        ops = [make_op("op-a"), make_op("op-b"), make_op("op-c")]
+        for op in ops:
+            contree_client.respond_json(op)
+        FORMATTER.set(JSONFormatter())
+        SESSION_STORE.set(session_store)
+        ctx = copy_context()
+        rc = ctx.run(
+            cmd_show_multi,
+            ShowMultiArgs(uuids=[op["uuid"] for op in ops], raw=True),
+        )
+        assert rc is None
+        lines = capsys.readouterr().out.strip().splitlines()
+        assert len(lines) == 3
+        parsed = [_json.loads(line) for line in lines]
+        assert [p["uuid"] for p in parsed] == ["op-a", "op-b", "op-c"]
+
 
 # ----------------------------------------------------------------------
 # op cancel
@@ -290,7 +343,9 @@ def test_cancel_all_with_no_active(self, contree_client, caplog):
 
     def test_cancel_all_overrides_explicit_uuids(self, contree_client, caplog):
         """--all wins; explicit UUIDs are ignored with a WARNING."""
-        list_pages = [[{"uuid": "pending-0"}]] + [[] for _ in ACTIVE_STATUSES[1:]]
+        list_pages = [[{"uuid": "pending-0"}]] + [
+            [] for _ in range(len(ACTIVE_STATUSES) - 1)
+        ]
         with caplog.at_level("WARNING"):
             rc = run_cancel(
                 contree_client,
@@ -305,3 +360,196 @@ def test_cancel_all_overrides_explicit_uuids(self, contree_client, caplog):
         deletes = [r for r in contree_client.fake.requests if r.method == "DELETE"]
         assert len(deletes) == 1
         assert deletes[0].path == "/v1/operations/pending-0"
+
+
+# ----------------------------------------------------------------------
+# op wait
+# ----------------------------------------------------------------------
+
+
+def _wait_op(uuid: str, status: str = "SUCCESS", duration: float = 1.0) -> dict:
+    return {
+        "uuid": uuid,
+        "kind": "instance",
+        "status": status,
+        "duration": duration,
+        "error": None,
+    }
+
+
+class TestOperationWait:
+    def test_argparse_wait_alias(self):
+        ns = parser.parse_args(["op", "w", "op-1"])
+        assert ns.handler is cmd_wait
+        assert ns.uuids == ["op-1"]
+
+    def test_argparse_wait_default_timeout(self):
+        ns = parser.parse_args(["op", "wait", "op-1"])
+        assert ns.timeout == 60
+
+    def test_wait_returns_none_on_terminal_success(self, contree_client, monkeypatch):
+        monkeypatch.setattr("contree_cli.cli.operation.time.sleep", lambda _: None)
+        contree_client.respond_json(_wait_op("op-1", status="SUCCESS"))
+
+        FORMATTER.set(JSONFormatter())
+        CLIENT.set(contree_client)
+        ctx = copy_context()
+        rc = ctx.run(cmd_wait, WaitArgs(uuids=["op-1"], timeout=60))
+        assert rc is None
+        assert contree_client.request_count == 1
+
+    def test_wait_failed_op_returns_exit_code_one(
+        self, contree_client, monkeypatch, capsys
+    ):
+        monkeypatch.setattr("contree_cli.cli.operation.time.sleep", lambda _: None)
+        contree_client.respond_json(_wait_op("op-fail", status="FAILED"))
+
+        FORMATTER.set(JSONFormatter())
+        CLIENT.set(contree_client)
+        ctx = copy_context()
+        rc = ctx.run(cmd_wait, WaitArgs(uuids=["op-fail"], timeout=60))
+        assert rc == 1
+        import json as _json
+
+        data = _json.loads(capsys.readouterr().out)
+        assert data["status"] == "FAILED"
+        assert data["timed_out"] is False
+
+    def test_wait_success_with_nonzero_exit_code_preserves_status(
+        self, contree_client, monkeypatch, capsys
+    ):
+        """Operation status is the server's word; it is NOT promoted to
+        FAILED when the sandbox process exited non-zero. The exit_code
+        is shown separately and propagated to the CLI's exit code so
+        `op wait && next-step` still composes correctly."""
+        monkeypatch.setattr("contree_cli.cli.operation.time.sleep", lambda _: None)
+        op = _wait_op("op-false", status="SUCCESS")
+        op["metadata"] = {"result": {"state": {"exit_code": 1}}}
+        contree_client.respond_json(op)
+
+        FORMATTER.set(JSONFormatter())
+        CLIENT.set(contree_client)
+        ctx = copy_context()
+        rc = ctx.run(cmd_wait, WaitArgs(uuids=["op-false"], timeout=60))
+        assert rc == 1
+        import json as _json
+
+        data = _json.loads(capsys.readouterr().out)
+        assert data["status"] == "SUCCESS"
+        assert data["exit_code"] == 1
+        assert data["timed_out"] is False
+
+    def test_wait_propagates_specific_exit_code(self, contree_client, monkeypatch):
+        """Like `session wait`, propagate the actual process exit code so
+        `op wait foo && next-step` composes correctly with the underlying
+        sandbox command's status."""
+        monkeypatch.setattr("contree_cli.cli.operation.time.sleep", lambda _: None)
+        op = _wait_op("op-42", status="SUCCESS")
+        op["metadata"] = {"result": {"state": {"exit_code": 42}}}
+        contree_client.respond_json(op)
+
+        FORMATTER.set(JSONFormatter())
+        CLIENT.set(contree_client)
+        ctx = copy_context()
+        rc = ctx.run(cmd_wait, WaitArgs(uuids=["op-42"], timeout=60))
+        assert rc == 42
+
+    def test_wait_emits_timed_out_column(
+        self, contree_client, monkeypatch, capsys, caplog
+    ):
+        # `time.monotonic` returns a value past the deadline on the second
+        # call, simulating a real-world timeout without sleeping.
+        clock = iter([0.0, 0.0, 0.5, 100.0, 100.0, 100.0, 100.0])
+        monkeypatch.setattr(
+            "contree_cli.cli.operation.time.monotonic", lambda: next(clock)
+        )
+        monkeypatch.setattr("contree_cli.cli.operation.time.sleep", lambda _: None)
+        # Poll: returns EXECUTING (not terminal). Second fetch (post-deadline)
+        # picks up the same op for the timed-out row.
+        contree_client.respond_json(_wait_op("op-slow", status="EXECUTING"))
+        contree_client.respond_json(_wait_op("op-slow", status="EXECUTING"))
+
+        FORMATTER.set(JSONFormatter())
+        CLIENT.set(contree_client)
+        ctx = copy_context()
+        with caplog.at_level("WARNING"):
+            rc = ctx.run(cmd_wait, WaitArgs(uuids=["op-slow"], timeout=1))
+
+        assert rc == 1
+        import json as _json
+
+        data = _json.loads(capsys.readouterr().out)
+        assert data["uuid"] == "op-slow"
+        assert data["status"] == "EXECUTING"
+        assert data["timed_out"] is True
+        assert "Timeout" in caplog.text
+
+    def test_wait_no_args_no_all_errors(self, contree_client, caplog):
+        CLIENT.set(contree_client)
+        ctx = copy_context()
+        with caplog.at_level("ERROR"):
+            rc = ctx.run(cmd_wait, WaitArgs(uuids=[], all=False, timeout=60))
+        assert rc == 1
+        assert "at least one UUID" in caplog.text
+
+    def test_wait_all_with_no_active(self, contree_client, monkeypatch, caplog):
+        # list_active returns no UUIDs after polling each ACTIVE_STATUS once.
+        for _ in ACTIVE_STATUSES:
+            contree_client.respond_json([])
+
+        FORMATTER.set(JSONFormatter())
+        CLIENT.set(contree_client)
+        ctx = copy_context()
+        with caplog.at_level("INFO"):
+            rc = ctx.run(cmd_wait, WaitArgs(uuids=[], all=True, timeout=60))
+        assert rc is None
+        assert "No active operations to wait for" in caplog.text
+
+
+# ----------------------------------------------------------------------
+# argparse + from_args integration -- the parsing/resolution itself is
+# tested exhaustively in tests/test_refs.py; here we just verify each
+# subcommand's argparse Namespace flows through resolve_operation_uuids() so it
+# accepts whitespace-joined UUID strings (a common agent quoting bug).
+# ----------------------------------------------------------------------
+
+
+UUID_A = "019e3fb6-e2d8-7350-a8f9-8b2b5ebfda7f"
+UUID_B = "019e3fb6-e447-760d-b7ab-62ef51f91b1f"
+UUID_C = "019e3fb6-e5c3-7184-96f1-f7d56453a193"
+
+
+class TestArgsFromNamespace:
+    def test_wait_one_quoted_string_of_uuids(self, session_store):
+        ns = parser.parse_args(["op", "wait", f"{UUID_A} {UUID_B} {UUID_C}"])
+        SESSION_STORE.set(session_store)
+        args = copy_context().run(WaitArgs.from_args, ns)
+        assert args.uuids == [UUID_A, UUID_B, UUID_C]
+
+    def test_cancel_one_quoted_string_of_uuids(self, session_store):
+        ns = parser.parse_args(["op", "cancel", f"{UUID_A} {UUID_B}"])
+        SESSION_STORE.set(session_store)
+        args = copy_context().run(CancelArgs.from_args, ns)
+        assert args.uuids == [UUID_A, UUID_B]
+
+    def test_show_one_quoted_string_of_uuids(self, session_store):
+        ns = parser.parse_args(["op", "show", f"{UUID_A} {UUID_B}"])
+        SESSION_STORE.set(session_store)
+        args = copy_context().run(ShowMultiArgs.from_args, ns)
+        assert args.uuids == [UUID_A, UUID_B]
+
+    def test_show_resolves_history_ref_to_real_uuid(self, session_store):
+        # @N is no longer passed through verbatim -- from_args resolves
+        # it against the active session and returns the real UUID.
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid=UUID_A)
+        ns = parser.parse_args(["op", "show", "@2"])
+        SESSION_STORE.set(session_store)
+        args = copy_context().run(ShowMultiArgs.from_args, ns)
+        assert args.uuids == [UUID_A]
+
+    def test_wait_with_garbage_uuid_raises(self, session_store):
+        ns = parser.parse_args(["op", "wait", "definitely-not-uuid"])
+        SESSION_STORE.set(session_store)
+        with pytest.raises(ValueError, match="Invalid operation reference"):
+            copy_context().run(WaitArgs.from_args, ns)
diff --git a/tests/test_output.py b/tests/test_output.py
index 7293226..fe3c9ba 100644
--- a/tests/test_output.py
+++ b/tests/test_output.py
@@ -85,7 +85,7 @@ class TestJSONPrettyFormatter:
     def test_single_item_is_list(self, capsys):
         fmt = JSONPrettyFormatter()
         fmt(a=1, b=2)
-        fmt.flush()
+        fmt.close()
         parsed = json.loads(capsys.readouterr().out)
         assert parsed == [{"a": 1, "b": 2}]
 
@@ -93,29 +93,41 @@ def test_multiple_items(self, capsys):
         fmt = JSONPrettyFormatter()
         fmt(x=1)
         fmt(x=2)
-        fmt.flush()
+        fmt.close()
         parsed = json.loads(capsys.readouterr().out)
         assert parsed == [{"x": 1}, {"x": 2}]
 
     def test_indented(self, capsys):
         fmt = JSONPrettyFormatter()
         fmt(key="val")
-        fmt.flush()
+        fmt.close()
         assert "  " in capsys.readouterr().out
 
-    def test_flush_empty(self, capsys):
+    def test_close_empty(self, capsys):
         fmt = JSONPrettyFormatter()
-        fmt.flush()
+        fmt.close()
         assert capsys.readouterr().out == ""
 
-    def test_flush_clears_buffer(self, capsys):
+    def test_close_clears_buffer(self, capsys):
         fmt = JSONPrettyFormatter()
         fmt(k=1)
-        fmt.flush()
+        fmt.close()
         capsys.readouterr()
-        fmt.flush()
+        fmt.close()
         assert capsys.readouterr().out == ""
 
+    def test_streaming_across_flushes(self, capsys):
+        """Multiple flushes accumulate into a single JSON array."""
+        fmt = JSONPrettyFormatter()
+        fmt(x=1)
+        fmt.flush()
+        fmt(x=2)
+        fmt.flush()
+        fmt(x=3)
+        fmt.close()
+        parsed = json.loads(capsys.readouterr().out)
+        assert parsed == [{"x": 1}, {"x": 2}, {"x": 3}]
+
 
 class TestTableFormatter:
     def test_single_row(self, capsys):
@@ -165,12 +177,47 @@ def test_multiple_batches(self, capsys):
         assert "a" in first
         assert "b" in second
 
+    def test_header_only_on_first_flush(self, capsys):
+        """Subsequent flushes don't reprint the header (paginated streaming)."""
+        with patch("contree_cli.output.STDOUT_IS_A_TTY", False):
+            fmt = TableFormatter()
+            fmt(name="alice", val="x")
+            fmt.flush()
+            first = capsys.readouterr().out
+            fmt(name="bob", val="y")
+            fmt.flush()
+            second = capsys.readouterr().out
+        assert "NAME" in first
+        assert "NAME" not in second
+        assert "bob" in second
+
+    def test_column_widths_stable_across_flushes(self, capsys):
+        """Column widths from the first flush apply to later flushes."""
+        with patch("contree_cli.output.STDOUT_IS_A_TTY", False):
+            fmt = TableFormatter()
+            fmt(name="ab", val="x")
+            fmt.flush()
+            first_lines = capsys.readouterr().out.splitlines()
+            # A wider name in the next batch must be truncated to the
+            # already-printed column width so columns don't shift.
+            fmt(name="abcdef", val="y")
+            fmt.flush()
+            second_lines = capsys.readouterr().out.splitlines()
+        assert len(first_lines[0]) == len(second_lines[0])
+
 
 class TestFormatValue:
-    def test_datetime(self):
-        dt = datetime(2025, 1, 15, 10, 30, 45, tzinfo=timezone.utc)
+    def test_datetime_naive(self):
+        # Naive datetimes are printed as-is without TZ conversion.
+        dt = datetime(2025, 1, 15, 10, 30, 45)
         assert _format_value(dt) == "2025-01-15 10:30:45"
 
+    def test_datetime_aware_converts_to_local(self):
+        # Aware UTC datetime is converted to the local timezone for display.
+        dt = datetime(2025, 1, 15, 10, 30, 45, tzinfo=timezone.utc)
+        expected = dt.astimezone().strftime("%Y-%m-%d %H:%M:%S")
+        assert _format_value(dt) == expected
+
     def test_timedelta_seconds(self):
         assert _format_value(timedelta(seconds=42)) == "42s"
 
diff --git a/tests/test_ps.py b/tests/test_ps.py
index 3a206e4..0b8d69a 100644
--- a/tests/test_ps.py
+++ b/tests/test_ps.py
@@ -7,7 +7,16 @@
 from conftest import ContreeTestClient
 
 from contree_cli import FORMATTER
-from contree_cli.cli.ps import PAGE_SIZE, STATUS_CHOICES, PsArgs, cmd_ps
+from contree_cli.cli.operation import (
+    PAGE_SIZE,
+    STATUS_CHOICES,
+)
+from contree_cli.cli.operation import (
+    ListArgs as PsArgs,
+)
+from contree_cli.cli.operation import (
+    cmd_list as cmd_ps,
+)
 from contree_cli.output import CSVFormatter, JSONFormatter, TableFormatter
 from contree_cli.types import parse_interval
 
@@ -210,23 +219,19 @@ def test_offset_increments(self, contree_client):
         assert "offset=0" in paths[0]
         assert f"offset={PAGE_SIZE}" in paths[1]
 
-    def test_progress_logged_per_full_page(self, contree_client, caplog):
-        """Each completed full page emits a progress line at INFO level."""
-        import logging
-
+    def test_pages_flushed_progressively(self, contree_client, capsys):
+        """Each full page is flushed as it completes (streaming output)."""
         page1 = [_make_op(i) for i in range(PAGE_SIZE)]
         page2 = [_make_op(i) for i in range(PAGE_SIZE, PAGE_SIZE + 3)]
-        with caplog.at_level(logging.INFO, logger="contree_cli.cli.ps"):
-            _run_cmd_pages(
-                contree_client,
-                [page1, page2],
-                show_max=None,
-            )
-        msgs = [r.getMessage() for r in caplog.records]
-        assert any(
-            f"Fetched {PAGE_SIZE} operations so far" in m and "Ctrl+C" in m
-            for m in msgs
+        _run_cmd_pages(
+            contree_client,
+            [page1, page2],
+            show_max=None,
         )
+        out = capsys.readouterr().out
+        # All rows from both pages should appear in output.
+        assert f"op-{PAGE_SIZE - 1}" in out
+        assert f"op-{PAGE_SIZE + 2}" in out
 
 
 class TestPsActiveFilter:
@@ -341,16 +346,18 @@ def test_show_max_no_warning_when_under_limit(
         _run_cmd(contree_client, ops, show_max=100, all=True)
         assert "Output truncated" not in caplog.text
 
-    def test_show_max_stops_pagination(self, contree_client, capsys):
-        """show_max stops mid-page; one probe request follows."""
+    def test_show_max_stops_pagination(self, contree_client):
+        """show_max stops mid-page; short first page is detected without a probe."""
         ops = [_make_op(i) for i in range(10)]
         _run_cmd_pages(
             contree_client,
-            [ops, [_make_op(99)]],  # main + probe
+            [ops],
             show_max=3,
             all=True,
         )
-        assert contree_client.request_count == 2
+        # Short page (10 < PAGE_SIZE) is enough to know we've seen all data;
+        # no need for the historical probe request.
+        assert contree_client.request_count == 1
 
     def test_show_max_across_pages(self, contree_client, capsys):
         """show_max truncates across page boundaries."""
@@ -379,19 +386,6 @@ def test_show_max_one_shows_one(self, contree_client, capsys):
         assert "op-0" in out
         assert "op-1" not in out
 
-    def test_show_max_probe_uses_skip_of_one(self, contree_client):
-        """Probe is a single-record request after the cap."""
-        page = [_make_op(i) for i in range(5)]
-        _run_cmd_pages(
-            contree_client,
-            [page, []],
-            show_max=3,
-            all=True,
-        )
-        probe_path = contree_client.request_paths[1]
-        assert "limit=1" in probe_path
-        assert "offset=3" in probe_path
-
     def test_show_max_no_warning_when_probe_empty(self, contree_client, caplog):
         """Empty probe means we hit show_max but there's nothing more."""
         page = [_make_op(i) for i in range(3)]
@@ -413,7 +407,7 @@ def test_show_max_warning_after_table_flush(self, contree_client, caplog, capsys
 
         FORMATTER.set(TableFormatter())
         ctx = copy_context()
-        with caplog.at_level(logging.WARNING, logger="contree_cli.cli.ps"):
+        with caplog.at_level(logging.WARNING, logger="contree_cli.cli.operation"):
             ctx.run(cmd_ps, PsArgs(show_max=3, all=True))
 
         out = capsys.readouterr().out
diff --git a/tests/test_refs.py b/tests/test_refs.py
new file mode 100644
index 0000000..362ada6
--- /dev/null
+++ b/tests/test_refs.py
@@ -0,0 +1,287 @@
+"""Tests for contree_cli.refs — operation-reference parsing and resolution.
+
+This is the single source of truth for the UUID/history-reference
+parsing logic. CLI handlers funnel their positional UUID arguments
+through ``resolve_operation_uuids``, so testing here covers every command that
+accepts operation references (op show, op cancel, op wait, session
+wait, top-level show/kill).
+"""
+
+from __future__ import annotations
+
+from contextvars import copy_context
+
+import pytest
+
+from contree_cli import SESSION_STORE
+from contree_cli.refs import (
+    history_spec_from_ref,
+    looks_like_history_ref,
+    resolve_operation_uuid,
+    resolve_operation_uuids,
+)
+
+UUID_A = "019e3fb6-e2d8-7350-a8f9-8b2b5ebfda7f"
+UUID_B = "019e3fb6-e447-760d-b7ab-62ef51f91b1f"
+UUID_C = "019e3fb6-e5c3-7184-96f1-f7d56453a193"
+
+
+# ----------------------------------------------------------------------
+# history_spec_from_ref
+# ----------------------------------------------------------------------
+
+
+class TestHistorySpecFromRef:
+    def test_head_alone_is_tip(self):
+        assert history_spec_from_ref("HEAD") == ""
+
+    def test_head_tilde_is_one_back(self):
+        assert history_spec_from_ref("HEAD~") == "-1"
+
+    def test_head_tilde_n_is_n_back(self):
+        assert history_spec_from_ref("HEAD~3") == "-3"
+
+    def test_head_tilde_zero_is_a_ref_but_invalid_when_resolved(self):
+        # Lexically a ref (errors clearly via resolve_history_spec rather
+        # than silently passing through as a bogus UUID).
+        assert history_spec_from_ref("HEAD~0") == "-0"
+
+    def test_head_tilde_garbage_is_invalid(self):
+        assert history_spec_from_ref("HEAD~abc") is None
+
+    def test_bare_at_is_tip(self):
+        assert history_spec_from_ref("@") == ""
+
+    def test_bare_colon_is_tip(self):
+        assert history_spec_from_ref(":") == ""
+
+    def test_at_n_is_absolute(self):
+        assert history_spec_from_ref("@5") == "5"
+
+    def test_colon_n_is_absolute(self):
+        assert history_spec_from_ref(":12") == "12"
+
+    def test_bare_n_is_absolute(self):
+        assert history_spec_from_ref("7") == "7"
+
+    def test_at_minus_n_is_relative_back(self):
+        assert history_spec_from_ref("@-2") == "-2"
+
+    def test_at_plus_n_is_relative_forward(self):
+        assert history_spec_from_ref("@+1") == "+1"
+
+    def test_uuid_is_not_a_ref(self):
+        assert history_spec_from_ref(UUID_A) is None
+
+    def test_garbage_is_not_a_ref(self):
+        assert history_spec_from_ref("not-a-ref") is None
+
+
+class TestLooksLikeHistoryRef:
+    @pytest.mark.parametrize(
+        "value",
+        ["HEAD", "HEAD~", "HEAD~5", "@", ":", "@2", ":12", "7", "@-1", "@+3"],
+    )
+    def test_history_refs_are_recognised(self, value):
+        assert looks_like_history_ref(value)
+
+    @pytest.mark.parametrize("value", [UUID_A, "not-a-ref", "HEAD~abc"])
+    def test_non_refs_are_rejected(self, value):
+        assert not looks_like_history_ref(value)
+
+
+# ----------------------------------------------------------------------
+# resolve_operation_uuid -- single token; needs session_store
+# ----------------------------------------------------------------------
+
+
+class TestResolveOperationUuid:
+    def test_uuid_passes_through(self, session_store):
+        # Bare UUID -- no history-ref pattern -- returns unchanged.
+        assert resolve_operation_uuid(UUID_A, session_store) == UUID_A
+
+    def test_garbage_passes_through(self, session_store):
+        # Non-ref, non-UUID strings are still passed through; the
+        # caller decides what to do with them (e.g. resolve_operation_uuids
+        # validates as UUID afterwards and rejects).
+        assert resolve_operation_uuid("not-a-ref", session_store) == "not-a-ref"
+
+    def test_at_prefix_resolves(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-abc")
+        assert resolve_operation_uuid("@2", session_store) == "op-abc"
+
+    def test_colon_prefix_resolves(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-def")
+        assert resolve_operation_uuid(":2", session_store) == "op-def"
+
+    def test_bare_numeric_resolves(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-ghi")
+        assert resolve_operation_uuid("2", session_store) == "op-ghi"
+
+    def test_no_session_raises(self, session_store):
+        with pytest.raises(ValueError, match="No active session"):
+            resolve_operation_uuid("@1", session_store)
+
+    def test_no_operation_uuid_raises(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        with pytest.raises(ValueError, match="has no operation UUID"):
+            resolve_operation_uuid("@1", session_store)
+
+    def test_nonexistent_entry_raises(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        with pytest.raises(ValueError, match="not found"):
+            resolve_operation_uuid("@999", session_store)
+
+    def test_at_minus_n_walks_back_from_tip(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-prev")
+        session_store.set_image("img-3", kind="run", operation_uuid="op-tip")
+        assert resolve_operation_uuid("@-1", session_store) == "op-prev"
+
+    def test_at_minus_n_exceeds_history(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        with pytest.raises(ValueError, match="Cannot go back"):
+            resolve_operation_uuid("@-5", session_store)
+
+    def test_at_plus_n_walks_forward(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-next")
+        session_store.rollback(1)
+        assert resolve_operation_uuid("@+1", session_store) == "op-next"
+
+    def test_at_plus_n_exceeds_children(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-x")
+        with pytest.raises(ValueError, match="Cannot go forward"):
+            resolve_operation_uuid("@+5", session_store)
+
+    def test_at_zero_errors_with_clear_message(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        with pytest.raises(ValueError, match="non-zero"):
+            resolve_operation_uuid("@0", session_store)
+
+    def test_colon_minus_n_resolves(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-prev")
+        session_store.set_image("img-3", kind="run", operation_uuid="op-tip")
+        assert resolve_operation_uuid(":-1", session_store) == "op-prev"
+
+    def test_bare_at_resolves_to_tip(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-tip")
+        assert resolve_operation_uuid("@", session_store) == "op-tip"
+        assert resolve_operation_uuid(":", session_store) == "op-tip"
+
+    def test_bare_at_with_no_op_on_tip_raises(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        with pytest.raises(ValueError, match="has no operation UUID"):
+            resolve_operation_uuid("@", session_store)
+
+    def test_head_resolves_to_tip(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-tip")
+        assert resolve_operation_uuid("HEAD", session_store) == "op-tip"
+
+    def test_head_tilde_n_walks_back(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-prev")
+        session_store.set_image("img-3", kind="run", operation_uuid="op-tip")
+        assert resolve_operation_uuid("HEAD~1", session_store) == "op-prev"
+
+    def test_head_tilde_alone_means_one_back(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="op-prev")
+        session_store.set_image("img-3", kind="run", operation_uuid="op-tip")
+        assert resolve_operation_uuid("HEAD~", session_store) == "op-prev"
+
+    def test_head_tilde_zero_errors_with_clear_message(self, session_store):
+        # `HEAD~0` is a recognised reference but semantically invalid
+        # ("0 steps back"). resolve_operation_uuid surfaces the same
+        # "non-zero" error users see for @0 instead of pretending it's
+        # a UUID-shaped token.
+        session_store.set_image("img-1", kind="use")
+        with pytest.raises(ValueError, match="non-zero"):
+            resolve_operation_uuid("HEAD~0", session_store)
+
+    def test_head_tilde_garbage_passes_through(self, session_store):
+        assert resolve_operation_uuid("HEAD~abc", session_store) == "HEAD~abc"
+
+
+# ----------------------------------------------------------------------
+# resolve_operation_uuids -- the list/whitespace-aware façade used by from_args
+# ----------------------------------------------------------------------
+
+
+def run_resolve(items, store):
+    """Invoke resolve_operation_uuids with SESSION_STORE bound, like __main__ does."""
+    SESSION_STORE.set(store)
+    return copy_context().run(resolve_operation_uuids, items)
+
+
+class TestResolveUuids:
+    def test_already_split_passes_through(self, session_store):
+        assert run_resolve([UUID_A, UUID_B], session_store) == [UUID_A, UUID_B]
+
+    def test_space_joined_single_arg_is_split(self, session_store):
+        joined = f"{UUID_A} {UUID_B} {UUID_C}"
+        assert run_resolve([joined], session_store) == [UUID_A, UUID_B, UUID_C]
+
+    def test_newline_joined_is_split(self, session_store):
+        joined = f"{UUID_A}\n      {UUID_B}\n      {UUID_C}"
+        assert run_resolve([joined], session_store) == [UUID_A, UUID_B, UUID_C]
+
+    def test_mixed_args_and_joined(self, session_store):
+        items = [f"{UUID_A} {UUID_B}", UUID_C, f"\t{UUID_A}"]
+        assert run_resolve(items, session_store) == [UUID_A, UUID_B, UUID_C, UUID_A]
+
+    def test_empty_list(self, session_store):
+        assert run_resolve([], session_store) == []
+
+    def test_invalid_token_raises(self, session_store):
+        with pytest.raises(ValueError, match="Invalid operation reference"):
+            run_resolve(["not-a-uuid"], session_store)
+
+    def test_invalid_lists_every_bad_token(self, session_store):
+        with pytest.raises(ValueError) as exc:
+            run_resolve([f"{UUID_A} bogus garbage {UUID_B}"], session_store)
+        msg = str(exc.value)
+        assert "bogus" in msg
+        assert "garbage" in msg
+
+    def test_at_n_is_resolved_to_real_uuid(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid=UUID_A)
+        # `@2` should expand to UUID_A, not be passed through.
+        assert run_resolve(["@2"], session_store) == [UUID_A]
+
+    def test_head_is_resolved_to_real_uuid(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid=UUID_A)
+        assert run_resolve(["HEAD"], session_store) == [UUID_A]
+
+    def test_mixed_refs_and_uuids_are_resolved(self, session_store):
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid=UUID_A)
+        session_store.set_image("img-3", kind="run", operation_uuid=UUID_B)
+        # HEAD -> UUID_B, HEAD~1 -> UUID_A, raw UUID_C passes through.
+        result = run_resolve(["HEAD HEAD~1", UUID_C], session_store)
+        assert result == [UUID_B, UUID_A, UUID_C]
+
+    def test_head_with_no_operation_on_tip_raises(self, session_store):
+        # `use` entry has no operation_uuid -- the message is propagated
+        # back so the caller can see what went wrong with which token.
+        session_store.set_image("img-1", kind="use")
+        with pytest.raises(ValueError, match="has no operation UUID"):
+            run_resolve(["HEAD"], session_store)
+
+    def test_resolved_value_must_be_a_uuid(self, session_store):
+        # Defensive: if history points at a non-UUID operation_uuid (e.g.
+        # legacy data), resolve_operation_uuids should flag the token rather than
+        # ship a malformed UUID to the API.
+        session_store.set_image("img-1", kind="use")
+        session_store.set_image("img-2", kind="run", operation_uuid="not-a-uuid")
+        with pytest.raises(ValueError, match="Invalid operation reference"):
+            run_resolve(["HEAD"], session_store)
diff --git a/tests/test_run.py b/tests/test_run.py
index 11d933e..a0b3dbc 100644
--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -171,6 +171,19 @@ def test_poll_until_success(self, contree_client, session_store, capsys):
         parsed = json.loads(capsys.readouterr().out)
         assert parsed["status"] == "SUCCESS"
 
+    def test_unknown_field_passes_through(self, contree_client, session_store, capsys):
+        """New server fields on the operation reach JSON output as-is."""
+        session_store.set_image(IMG_UUID, kind="test")
+        args = _default_args()
+        op_body = json.loads(_op_response(status="SUCCESS", exit_code=0).body)
+        op_body["session_key"] = "sess-1"
+        op_body["future_field"] = "anything"
+        responses = [_spawn_response(), FakeResponse.json(op_body)]
+        _run_cmd(contree_client, args, responses, store=session_store)
+        parsed = json.loads(capsys.readouterr().out)
+        assert parsed["session_key"] == "sess-1"
+        assert parsed["future_field"] == "anything"
+
     def test_poll_default_shows_stdout(self, contree_client, session_store, capsys):
         session_store.set_image(IMG_UUID, kind="test")
         args = _default_args()
diff --git a/tests/test_session_cmd.py b/tests/test_session_cmd.py
index 2f6cae4..1223f56 100644
--- a/tests/test_session_cmd.py
+++ b/tests/test_session_cmd.py
@@ -612,7 +612,10 @@ def test_wait_exit_code_failure_sets_status_and_rc(
         assert rc == 1
         out = capsys.readouterr().out.strip().splitlines()
         data = json.loads(out[0])
-        assert data["status"] == "FAILED"
+        # status is the server's word; exit_code is reported separately
+        # and propagated to the CLI rc. Branch is not advanced because
+        # the sandbox process failed (non-zero exit).
+        assert data["status"] == "SUCCESS"
         assert data["exit_code"] == 1
         assert session_store.current_image == "img-1"
 
@@ -642,11 +645,36 @@ def test_wait_outputs_title_and_exit_code_from_cached_meta(
         assert rc == 2
         out = capsys.readouterr().out.strip().splitlines()
         data = json.loads(out[0])
-        assert data["status"] == "FAILED"
+        assert data["status"] == "SUCCESS"
         assert data["exit_code"] == 2
         assert data["title"] == "sleep 1"
         assert session_store.current_image == "img-1"
 
+    def test_wait_unknown_field_passes_through(
+        self, contree_client, session_store, capsys
+    ) -> None:
+        """New server fields reach the row even when not hardcoded."""
+        SESSION_STORE.set(session_store)
+        FORMATTER.set(JSONFormatter())
+        session_store.set_image("img-1", kind="use")
+        contree_client.respond_json(
+            {
+                "uuid": "op-x",
+                "status": "SUCCESS",
+                "kind": "instance",
+                "duration": 1,
+                "session_key": "sess-1",
+                "future_field": "anything",
+                "metadata": {"result": {"state": {"exit_code": 0}}},
+                "result": {"image": "img-new"},
+            }
+        )
+        cmd_wait(WaitArgs(op_ids=["op-x"]))
+        out = capsys.readouterr().out.strip().splitlines()
+        data = json.loads(out[0])
+        assert data["session_key"] == "sess-1"
+        assert data["future_field"] == "anything"
+
     def test_show_defaults_to_last_20_and_logs_info(
         self,
         store: SessionStore,
@@ -849,9 +877,16 @@ def test_rollback_args_absolute(self) -> None:
         assert args.forward == 0
 
     def test_wait_args(self) -> None:
-        ns = argparse.Namespace(op_ids=["a", "b"])
+        uuid_a = "019e3fb6-e2d8-7350-a8f9-8b2b5ebfda7f"
+        uuid_b = "019e3fb6-e447-760d-b7ab-62ef51f91b1f"
+        ns = argparse.Namespace(op_ids=[uuid_a, uuid_b])
         args = WaitArgs.from_args(ns)
-        assert args.op_ids == ["a", "b"]
+        assert args.op_ids == [uuid_a, uuid_b]
+
+    def test_wait_args_rejects_invalid_uuid(self) -> None:
+        ns = argparse.Namespace(op_ids=["definitely-not-uuid"])
+        with pytest.raises(ValueError, match="Invalid operation reference"):
+            WaitArgs.from_args(ns)
 
     def test_show_args(self) -> None:
         ns = argparse.Namespace(
diff --git a/tests/test_shell_completer.py b/tests/test_shell_completer.py
index a0ce6ff..5303050 100644
--- a/tests/test_shell_completer.py
+++ b/tests/test_shell_completer.py
@@ -1378,9 +1378,9 @@ def test_invalidate_all_only_drops_active_profile(self, image_cache):
 
 
 class TestEnvKeySource:
-    """`env -d <TAB>` completes session env keys."""
+    """`env -U <TAB>` completes session env keys."""
 
-    def test_env_d_completes_existing_keys(self, image_cache):
+    def test_env_unset_completes_existing_keys(self, image_cache):
         store = MagicMock()
         store.cache = image_cache
         store.get_env.return_value = {"PATH": "/usr/bin", "DEBUG": "1"}
@@ -1388,7 +1388,7 @@ def test_env_d_completes_existing_keys(self, image_cache):
         results = _complete_line(
             completer,
             "",
-            "contree env -d ",
+            "contree env -U ",
             begidx=15,
         )
         assert "PATH " in results
@@ -1399,7 +1399,7 @@ def test_env_no_store_returns_empty(self):
         results = _complete_line(
             completer,
             "",
-            "contree env -d ",
+            "contree env -U ",
             begidx=15,
         )
         assert results == []
diff --git a/tests/test_show.py b/tests/test_show.py
index bf772da..0c0689f 100644
--- a/tests/test_show.py
+++ b/tests/test_show.py
@@ -4,11 +4,10 @@
 import json
 from contextvars import copy_context
 
-import pytest
 from conftest import ContreeTestClient
 
 from contree_cli import FORMATTER, SESSION_STORE
-from contree_cli.cli.show import ShowArgs, _resolve_operation_uuid, cmd_show
+from contree_cli.cli.show import ShowArgs, cmd_show
 from contree_cli.client import decode_stream
 from contree_cli.output import (
     CSVFormatter,
@@ -146,6 +145,16 @@ def test_json_output(self, contree_client, capsys, session_store):
         assert parsed["uuid"] == "op-abc"
         assert parsed["duration"] == 5.0
 
+    def test_unknown_field_passes_through(self, contree_client, capsys, session_store):
+        """New server fields reach the row even when not hardcoded."""
+        op = _make_op()
+        op["session_key"] = "sess-1"
+        op["future_field"] = "anything"
+        _run_cmd(contree_client, op, formatter=JSONFormatter(), store=session_store)
+        parsed = json.loads(capsys.readouterr().out)
+        assert parsed["session_key"] == "sess-1"
+        assert parsed["future_field"] == "anything"
+
     def test_table_output(self, contree_client, capsys, session_store):
         fmt = TableFormatter()
         _run_cmd(contree_client, _make_op(), formatter=fmt, store=session_store)
@@ -292,58 +301,26 @@ def test_non_terminal_op_not_cached(self, contree_client, session_store):
         assert contree_client.request_count == 2  # new request (not cached)
 
 
-class TestResolveOperationUuid:
-    def test_at_prefix_resolves(self, session_store):
-        session_store.set_image("img-1", kind="use")
-        session_store.set_image("img-2", kind="run", operation_uuid="op-abc")
-        result = _resolve_operation_uuid("@2", session_store)
-        assert result == "op-abc"
-
-    def test_colon_prefix_resolves(self, session_store):
-        session_store.set_image("img-1", kind="use")
-        session_store.set_image("img-2", kind="run", operation_uuid="op-def")
-        result = _resolve_operation_uuid(":2", session_store)
-        assert result == "op-def"
-
-    def test_bare_numeric_resolves(self, session_store):
-        session_store.set_image("img-1", kind="use")
-        session_store.set_image("img-2", kind="run", operation_uuid="op-ghi")
-        result = _resolve_operation_uuid("2", session_store)
-        assert result == "op-ghi"
-
-    def test_non_numeric_passthrough(self, session_store):
-        result = _resolve_operation_uuid("abc-def-uuid", session_store)
-        assert result == "abc-def-uuid"
-
-    def test_no_session_raises(self, session_store):
-        with pytest.raises(ValueError, match="No active session"):
-            _resolve_operation_uuid("@1", session_store)
-
-    def test_no_operation_uuid_raises(self, session_store):
-        session_store.set_image("img-1", kind="use")
-        with pytest.raises(ValueError, match="has no operation UUID"):
-            _resolve_operation_uuid("@1", session_store)
-
-    def test_nonexistent_entry_raises(self, session_store):
-        session_store.set_image("img-1", kind="use")
-        with pytest.raises(ValueError, match="not found"):
-            _resolve_operation_uuid("@999", session_store)
-
-
-class TestExitCodeToFailed:
-    def test_success_with_exit_code_shows_failed(
+class TestStatusVerbatim:
+    def test_success_with_nonzero_exit_keeps_status(
         self, contree_client, capsys, session_store
     ):
-        """SUCCESS + exit_code!=0 should show status as FAILED."""
+        """SUCCESS + exit_code!=0 reports SUCCESS; exit_code is its own column.
+
+        Operation status is an orchestration verdict (did the API run the
+        job?). What the sandbox process did with its exit code is a
+        separate concern and lives in the `exit_code` column.
+        """
         op = _make_op(status="SUCCESS", exit_code=1)
         _run_cmd(contree_client, op, store=session_store)
         out = capsys.readouterr().out
-        assert "FAILED" in out
+        assert "SUCCESS" in out
+        assert "FAILED" not in out
+        assert ",1," in out  # exit_code column
 
     def test_success_with_exit_code_zero_shows_success(
         self, contree_client, capsys, session_store
     ):
-        """SUCCESS + exit_code=0 should still show SUCCESS."""
         op = _make_op(status="SUCCESS", exit_code=0)
         _run_cmd(contree_client, op, store=session_store)
         out = capsys.readouterr().out
@@ -352,8 +329,35 @@ def test_success_with_exit_code_zero_shows_success(
     def test_success_with_no_exit_code_shows_success(
         self, contree_client, capsys, session_store
     ):
-        """SUCCESS + exit_code=None should show SUCCESS."""
         op = _make_op(status="SUCCESS", exit_code=None)
         _run_cmd(contree_client, op, store=session_store)
         out = capsys.readouterr().out
         assert "SUCCESS" in out
+
+
+class TestShowRaw:
+    def test_raw_prints_server_payload_as_jsonl(
+        self, contree_client, capsys, session_store
+    ):
+        # --raw skips formatter routing and derived columns and emits
+        # JSONL: one operation per line, the full server JSON, so multi-
+        # UUID `op show --raw` streams cleanly into `jq -c` / `awk`.
+        op = _make_op(status="SUCCESS", exit_code=1)
+        op["server_only_field"] = "preserved"
+        contree_client.respond_json(op)
+        FORMATTER.set(JSONFormatter())
+        SESSION_STORE.set(session_store)
+        ctx = copy_context()
+        ctx.run(cmd_show, ShowArgs(uuid="op-abc", raw=True))
+        out = capsys.readouterr().out
+        lines = out.strip().splitlines()
+        assert len(lines) == 1
+        parsed = json.loads(lines[0])
+        # No derived columns (no `exit_code` flattening, no `image`/`tag`
+        # promotion); the entire server payload is what came back.
+        assert parsed["status"] == "SUCCESS"
+        assert parsed["server_only_field"] == "preserved"
+        assert "metadata" in parsed  # full nested structure preserved
+        assert parsed["metadata"]["result"]["state"]["exit_code"] == 1
+        # JSONL contract: exactly one line of compact JSON per op.
+        assert "\n" not in lines[0]
diff --git a/tests/test_types.py b/tests/test_types.py
index 4023529..0fdbc17 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -5,6 +5,7 @@
 
 import pytest
 
+from contree_cli.output import ListSorter
 from contree_cli.types import FLAGS, parse_datetime, parse_interval
 
 REF = datetime(2025, 6, 15, 12, 0, 0, tzinfo=timezone.utc)
@@ -269,3 +270,59 @@ def test_no_duplicate_flags_in_registry(self) -> None:
                         f" FLAGS[{name!r}]"
                     )
                 seen[flag] = name
+
+
+class TestListSorter:
+    def test_skips_nested_dict_and_list(self) -> None:
+        out = ListSorter().order({"a": 1, "b": {"x": 1}, "c": [1, 2]})
+        assert dict(out) == {"a": 1}
+
+    def test_unknown_fields_pass_through(self) -> None:
+        out = ListSorter().order({"future": "anything", "n": 42})
+        assert dict(out) == {"future": "anything", "n": 42}
+
+    def test_datetime_fields_parsed(self) -> None:
+        out = ListSorter().order({"created_at": "2025-01-02T03:04:05Z"})
+        assert out["created_at"] == datetime(2025, 1, 2, 3, 4, 5, tzinfo=timezone.utc)
+
+    def test_duration_to_timedelta(self) -> None:
+        assert ListSorter().order({"duration": 5})["duration"] == timedelta(seconds=5)
+
+    def test_null_error_becomes_empty(self) -> None:
+        assert ListSorter().order({"error": None})["error"] == ""
+
+    def test_null_tag_becomes_empty(self) -> None:
+        assert ListSorter().order({"tag": None})["tag"] == ""
+
+    def test_mode_formatted_as_octal(self) -> None:
+        assert ListSorter().order({"mode": 0o755})["mode"] == "755"
+
+    def test_mtime_to_datetime(self) -> None:
+        out = ListSorter().order({"mtime": 1700000000})
+        assert isinstance(out["mtime"], datetime)
+        assert out["mtime"].tzinfo == timezone.utc
+
+    def test_preserves_insertion_order(self) -> None:
+        keys = list(ListSorter().order({"c": 1, "a": 2, "b": 3}))
+        assert keys == ["c", "a", "b"]
+
+    def test_head_pinned_first(self) -> None:
+        keys = list(ListSorter(head=("z",)).order({"a": 1, "b": 2, "z": 3}))
+        assert keys == ["z", "a", "b"]
+
+    def test_tail_pinned_last(self) -> None:
+        keys = list(ListSorter(tail=("error",)).order({"error": "x", "a": 1}))
+        assert keys == ["a", "error"]
+
+    def test_head_tail_absent_keys_skipped(self) -> None:
+        out = ListSorter(head=("nope",), tail=("missing",)).order({"a": 1})
+        assert list(out) == ["a"]
+
+    def test_column_order_memoised_across_calls(self) -> None:
+        """Column order locks in from first occurrence."""
+        sorter = ListSorter(tail=("error",))
+        first = list(sorter.order({"a": 1, "b": 2}))
+        second = list(sorter.order({"b": 3, "a": 4, "c": 5}))
+        assert first == ["a", "b"]
+        # Second call: a/b keep first-seen order, c appended, error skipped.
+        assert second == ["a", "b", "c"]