diff --git a/.agent-os/decision-policy.yaml b/.agent-os/decision-policy.yaml new file mode 100644 index 0000000..a5258cc --- /dev/null +++ b/.agent-os/decision-policy.yaml @@ -0,0 +1,3 @@ +decision_policy: + strictness: warn + downgrade_reason: "" diff --git a/.agent-os/specs.yaml b/.agent-os/specs.yaml index 046cdff..ac8fc6f 100644 --- a/.agent-os/specs.yaml +++ b/.agent-os/specs.yaml @@ -1,7 +1,17 @@ -active_spec: SPEC-20260513-001 +active_spec: SPEC-20260529-001 specs: - id: SPEC-20260513-001 title: "KnowledgeOS Core Product Philosophy" status: active created_at: 2026-05-13T05:59:52.646718+00:00 path: .agent-os/specs/SPEC-20260513-001 + - id: SPEC-20260519-001 + title: "Capability effect verification" + status: active + created_at: 2026-05-19T01:20:48.972160+00:00 + path: .agent-os/specs/SPEC-20260519-001 + - id: SPEC-20260529-001 + title: "Decision Graph Module" + status: active + created_at: 2026-05-29T14:25:59.993564+00:00 + path: .agent-os/specs/SPEC-20260529-001 diff --git a/.agent-os/specs/SPEC-20260529-001/acceptance.md b/.agent-os/specs/SPEC-20260529-001/acceptance.md new file mode 100644 index 0000000..f0f0e82 --- /dev/null +++ b/.agent-os/specs/SPEC-20260529-001/acceptance.md @@ -0,0 +1,10 @@ +# Acceptance + +- `decision-event` records command-generated decision events and returns `DECISION_OK`. +- `decision-query` returns filtered parent/child decision events. +- `verify-decisions` rejects forged or orphaned decision events and emits `DECISION_VERIFY_OK`. +- `decision-policy strictness: warn` does not block linear work without decision events. +- `decision-policy strictness: enforce` blocks completion without valid decision evidence. +- `decision-policy strictness: off` requires `downgrade_reason`. +- `render-html --kind decision-map` creates a self-contained HTML sidecar with source hash metadata. +- Documentation explains Decision Graph as a module, not kernel. diff --git a/.agent-os/specs/SPEC-20260529-001/alignment.md b/.agent-os/specs/SPEC-20260529-001/alignment.md new file mode 100644 index 0000000..9eb87cb --- /dev/null +++ b/.agent-os/specs/SPEC-20260529-001/alignment.md @@ -0,0 +1,3 @@ +# Alignment + +No alignment has been run yet. diff --git a/.agent-os/specs/SPEC-20260529-001/change-log.ndjson b/.agent-os/specs/SPEC-20260529-001/change-log.ndjson new file mode 100644 index 0000000..52e3d12 --- /dev/null +++ b/.agent-os/specs/SPEC-20260529-001/change-log.ndjson @@ -0,0 +1,2 @@ +{"event_type": "create-spec", "set_active": true, "timestamp": "2026-05-29T14:25:59.994039+00:00", "title": "Decision Graph Module"} +{"timestamp":"2026-05-29T14:36:16Z","change":"Filled Decision Graph Module spec with public contract, acceptance, and non-goals."} diff --git a/.agent-os/specs/SPEC-20260529-001/non-goals.md b/.agent-os/specs/SPEC-20260529-001/non-goals.md new file mode 100644 index 0000000..e2f8241 --- /dev/null +++ b/.agent-os/specs/SPEC-20260529-001/non-goals.md @@ -0,0 +1,7 @@ +# Non-Goals + +- Do not store hidden chain-of-thought. +- Do not make Decision Graph mandatory for minimal projects. +- Do not replace `trace-step`, `phase-task`, `capability-event`, or `artifact-assert`. +- Do not auto-open HTML in v1. +- Do not build a full visual Workbench UI in this change. diff --git a/.agent-os/specs/SPEC-20260529-001/spec.md b/.agent-os/specs/SPEC-20260529-001/spec.md new file mode 100644 index 0000000..3c7d10e --- /dev/null +++ b/.agent-os/specs/SPEC-20260529-001/spec.md @@ -0,0 +1,26 @@ +# Spec: Decision Graph Module + +## Intent + +Add an optional Decision Graph module that records public, auditable natural-language decision summaries during KnowledgeOS runs. The module captures plan branches, route selections, inserted steps, abandoned branches, rollbacks, superseded paths, deferred work, human decisions, risk tradeoffs, and final decisions. + +The module must preserve KnowledgeOS product philosophy: + +```text +small kernel + pluggable modules + optional apps + project-level strictness + mandatory visible checkpoints +``` + +Decision Graph is not kernel. It is a module that can be observed by default and enforced only by project policy. + +## Public Contract + +- `decision-event` writes `.agent-os/runs//decision-events.ndjson` and emits `DECISION_OK`. +- `decision-query` filters decision events by run, task, kind, status, or parent id. +- `verify-decisions` checks command evidence, orphan parents, duplicate ids, invalid kinds/statuses, and unexplained abandoned/rollback/superseded branches. +- `render-html --kind decision-map` renders a static HTML sidecar from `decision-events.ndjson`. +- `.agent-os/decision-policy.yaml` defaults to `strictness: warn`. +- `complete-task` blocks only when decision verification status is failed, including `strictness: enforce` without valid decision evidence. + +## Source Of Truth + +The source of truth is NDJSON under `.agent-os/runs//decision-events.ndjson`. HTML is presentation only. diff --git a/.agent-os/startup-prompt.md b/.agent-os/startup-prompt.md index fbf1078..66b7cd2 100644 --- a/.agent-os/startup-prompt.md +++ b/.agent-os/startup-prompt.md @@ -9,27 +9,30 @@ This prompt is only the session trigger. Durable rules live in `AGENTS.md`, `.ag Before substantial work: 1. Read `AGENTS.md`. -2. Read `.agent-os/workspace.yaml`, `.agent-os/project.yaml`, `.agent-os/tasks.yaml`, `.agent-os/specs.yaml`, `.agent-os/phase-policy.yaml`, `.agent-os/effect-policy.yaml`, `.agent-os/decisions.yaml`, `.agent-os/evals.yaml`, `.agent-os/fabric-link.yaml`, `.agent-os/read-policy.yaml`, `.agent-os/write-policy.yaml`, `.agent-os/dispatch-policy.yaml`, and `.agent-os/tool-registry.yaml`. +2. Read `.agent-os/workspace.yaml`, `.agent-os/project.yaml`, `.agent-os/tasks.yaml`, `.agent-os/specs.yaml`, `.agent-os/phase-policy.yaml`, `.agent-os/decision-policy.yaml`, `.agent-os/effect-policy.yaml`, `.agent-os/decisions.yaml`, `.agent-os/evals.yaml`, `.agent-os/fabric-link.yaml`, `.agent-os/read-policy.yaml`, `.agent-os/write-policy.yaml`, `.agent-os/dispatch-policy.yaml`, and `.agent-os/tool-registry.yaml`. 3. Run `./bin/knowledgeos doctor --project-root . --summary` and do not proceed if it fails. 4. If the user says `create spec`, `align spec`, `创建spec`, `对齐spec`, or equivalent, run `./bin/knowledgeos create-spec --project-root . --title ""` or `./bin/knowledgeos align-spec --project-root . --task-id <task-id>` before execution. -5. Select or confirm one task id from `.agent-os/tasks.yaml`. -6. Run `./bin/knowledgeos route-task --project-root . --task-id <task-id>`. -7. Run `./bin/knowledgeos dispatch-task --project-root . --task-id <task-id>` before invoking subagents, MCP tools, skills, workflows, or scripts. -8. Before planned mutation, run `./bin/knowledgeos check-route-write --project-root . --task-id <task-id> --path <planned-path>`. -9. Create run evidence with `./bin/knowledgeos run-task --project-root . --task-id <task-id>`. -10. Write/update the execution context with `./bin/knowledgeos context-pack --project-root . --task-id <task-id> --run-id <run-id>` and `./bin/knowledgeos plan-task --project-root . --task-id <task-id> --run-id <run-id> --summary "<summary>"`. -11. Pause at consultation checkpoints, state your recommended next move, name the tradeoff, and ask the human whether to proceed. -12. Record run-bound dispatch evidence with `./bin/knowledgeos dispatch-task --project-root . --task-id <task-id> --run-id <run-id>`. -13. Record public operational progress with `./bin/knowledgeos trace-step --project-root . --task-id <task-id> --run-id <run-id> --step <step> --note "<public trace>" --evidence "<command/file/user confirmation>"`, and relay the returned `TRACE_OK` marker. -14. Record public phase evidence with `./bin/knowledgeos phase-task --project-root . --task-id <task-id> --run-id <run-id> --phase <route|plan|review|dispatch|execute|report> --status completed --note "<public trace>" --evidence "<command/file/user confirmation>"`, and relay the returned `CHECKPOINT_OK` marker. -15. Record MCP, skill, subagent, orchestrator, or important script use with `./bin/knowledgeos capability-event --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --id <capability-id> --purpose "<purpose>"`, and relay the returned `CAPABILITY_OK` marker. -16. Verify real side effects with `./bin/knowledgeos artifact-assert --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --path <artifact>` and relay the returned `EFFECT_OK` marker. -17. Run `./bin/knowledgeos eval-task --project-root . --task-id <task-id> --run-id <run-id>`; do not manually append eval status. -18. Run `./bin/knowledgeos verify-context --project-root . --task-id <task-id> --run-id <run-id>`, `./bin/knowledgeos verify-lifecycle --project-root . --task-id <task-id> --run-id <run-id>`, and `./bin/knowledgeos verify-effects --project-root . --task-id <task-id> --run-id <run-id>`; relay the returned `EFFECT_VERIFY_OK` marker before claiming effect verification success. -19. Use `./bin/knowledgeos complete-task --project-root . --task-id <task-id> --run-id <run-id> --summary "<summary>"`; it enforces spec/context/plan, lifecycle, capability visibility, effect verification, and required postflight. -20. If a shared-fabric postflight hook is configured, report `[SYNC_OK]` only after `complete-task` returns `sync_status: SYNC_OK`. -21. For reset requests, run `./bin/knowledgeos reset-project --project-root . --mode <soft|hard> --dry-run` before destructive action. -22. For old-project reorganization requests, run `./bin/knowledgeos migrate-legacy-project --project-root . --write-plan` before moving files. -23. For historical or superseded files that should be stored but not read by default, run `./bin/knowledgeos archive-legacy-project --project-root . --write-plan` before moving files into `archive/`. +5. If the user starts or continues a durable plan/spec conversation, run `./bin/knowledgeos thread-plan current --project-root .` or `./bin/knowledgeos thread-plan start --project-root . --title "<natural language goal>"`; append plain-language progress with `./bin/knowledgeos thread-plan append --project-root . --thread-id <thread-id> --kind <kind> --text "<plain note>"` and relay `THREAD_PLAN_OK`. +6. Select or confirm one task id from `.agent-os/tasks.yaml`. +7. Run `./bin/knowledgeos route-task --project-root . --task-id <task-id>`. +8. Run `./bin/knowledgeos dispatch-task --project-root . --task-id <task-id>` before invoking subagents, MCP tools, skills, workflows, or scripts. +9. Before planned mutation, run `./bin/knowledgeos check-route-write --project-root . --task-id <task-id> --path <planned-path>`. +10. Create run evidence with `./bin/knowledgeos run-task --project-root . --task-id <task-id>`. +11. Write/update the execution context with `./bin/knowledgeos context-pack --project-root . --task-id <task-id> --run-id <run-id>` and `./bin/knowledgeos plan-task --project-root . --task-id <task-id> --run-id <run-id> --summary "<summary>"`. +12. Pause at consultation checkpoints, state your recommended next move, name the tradeoff, and ask the human whether to proceed. +13. Record run-bound dispatch evidence with `./bin/knowledgeos dispatch-task --project-root . --task-id <task-id> --run-id <run-id>`. +14. Record public operational progress with `./bin/knowledgeos trace-step --project-root . --task-id <task-id> --run-id <run-id> --step <step> --note "<public trace>" --evidence "<command/file/user confirmation>"`, and relay the returned `TRACE_OK` marker. +15. Record public phase evidence with `./bin/knowledgeos phase-task --project-root . --task-id <task-id> --run-id <run-id> --phase <route|plan|review|dispatch|execute|report> --status completed --note "<public trace>" --evidence "<command/file/user confirmation>"`, and relay the returned `CHECKPOINT_OK` marker. +16. Record MCP, skill, subagent, orchestrator, or important script use with `./bin/knowledgeos capability-event --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --id <capability-id> --purpose "<purpose>"`, and relay the returned `CAPABILITY_OK` marker. +17. Record public decision changes with `./bin/knowledgeos decision-event --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --title "<title>" --summary "<summary>" --reason "<reason>" --evidence "<evidence>"`, and relay the returned `DECISION_OK` marker when plans branch, change, roll back, or abandon a route. +18. Verify real side effects with `./bin/knowledgeos artifact-assert --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --path <artifact>` and relay the returned `EFFECT_OK` marker. +19. Run `./bin/knowledgeos eval-task --project-root . --task-id <task-id> --run-id <run-id>`; do not manually append eval status. +20. Run `./bin/knowledgeos verify-context --project-root . --task-id <task-id> --run-id <run-id>`, `./bin/knowledgeos verify-lifecycle --project-root . --task-id <task-id> --run-id <run-id>`, `./bin/knowledgeos verify-effects --project-root . --task-id <task-id> --run-id <run-id>`, and `./bin/knowledgeos verify-decisions --project-root . --task-id <task-id> --run-id <run-id>`; relay `EFFECT_VERIFY_OK` and `DECISION_VERIFY_OK` before claiming verification success. +21. Use `./bin/knowledgeos complete-task --project-root . --task-id <task-id> --run-id <run-id> --summary "<summary>"`; it enforces spec/context/plan, lifecycle, capability visibility, effect verification, decision verification, and required postflight. +22. For medium, high, or complex tasks, include the returned `FLOW_OK` Mermaid Mission Flow in the final answer; if needed, run `./bin/knowledgeos flow-summary --project-root . --run-id <run-id>`. +23. If a shared-fabric postflight hook is configured, report `[SYNC_OK]` only after `complete-task` returns `sync_status: SYNC_OK`. +24. For reset requests, run `./bin/knowledgeos reset-project --project-root . --mode <soft|hard> --dry-run` before destructive action. +25. For old-project reorganization requests, run `./bin/knowledgeos migrate-legacy-project --project-root . --write-plan` before moving files. +26. For historical or superseded files that should be stored but not read by default, run `./bin/knowledgeos archive-legacy-project --project-root . --write-plan` before moving files into `archive/`. -Never claim boot, route, dispatch, write safety, spec alignment, context pack, plan, trace, checkpoint, capability, effect, eval, completion, or sync success without command evidence. +Never claim boot, route, dispatch, write safety, spec alignment, thread plan, context pack, plan, trace, checkpoint, capability, decision, effect, eval, completion, flow, or sync success without command evidence. diff --git a/.agent-os/tasks.yaml b/.agent-os/tasks.yaml index ea337dd..60a5bb0 100644 --- a/.agent-os/tasks.yaml +++ b/.agent-os/tasks.yaml @@ -1195,3 +1195,288 @@ tasks: - "harness-audit detects dispatch-task entries that lack --run-id as lifecycle drift" - "harness-audit apply upgrades all stale workflow profiles and doctor passes" - "Project5.5 dry-run audit reports every stale router profile consistently" + - id: KOS-T065 + title: "Add composable HTML sidecar reports" + type: "route_bound_execution_guard" + status: completed + complexity: "medium" + risk: "html_report_surface" + outputs: + - "knowledgeos/cli.py" + - "tests/test_knowledgeos_cli.py" + - "docs/executable-control-plane.md" + - "docs/LIVE-REPORT.md" + acceptance: + - "render-html creates receipt and handoff HTML sidecars with source hash metadata" + - "render-html creates self-contained rich reports without remote assets" + - "render-html can compose fragment manifests into one HTML page" + - "doctor continues to trust canonical Markdown YAML and NDJSON evidence" + - id: KOS-T066 + title: "Add capability effect verification gates" + type: "route_bound_execution_guard" + status: completed + complexity: "medium" + risk: "verifier_gate_regression" + outputs: + - "knowledgeos/cli.py" + - "tests/test_knowledgeos_cli.py" + - "docs/executable-control-plane.md" + - "docs/LIVE-REPORT.md" + - "docs/route-bound-execution-guard.md" + - "docs/public-release-checklist.md" + - "docs/agent-guide.md" + - ".agent-os/effect-policy.yaml" + - ".agent-os/write-policy.yaml" + - ".agent-os/startup-prompt.md" + - "templates/project-control-plane/.agent-os/effect-policy.yaml" + - "templates/project-control-plane/.agent-os/write-policy.yaml" + - "templates/project-control-plane/.agent-os/startup-prompt.md" + - "templates/project-control-plane/AGENTS.md" + - ".agent-os/specs.yaml" + - ".agent-os/specs/SPEC-20260519-001/" + - ".agent-os/workflows/router.yaml" + - "docs/agentos-architecture.md" + - "docs/knowledgeos-operating-spec.md" + - "docs/quickstart.md" + - "docs/spec-context-plan.md" + - "docs/workflow-router.md" + - "templates/project-control-plane/.agent-os/workflows/router.yaml" + acceptance: + - "effect-policy controls observe warn enforce and off strictness modes" + - "artifact-assert records EFFECT_OK only after touching the real artifact" + - "verify-effects detects missing or forged effect assertions" + - "complete-task enforces or reports effect verification according to policy" + - "doctor and guardrail tests cover the new effect verifier contract" + - id: KOS-T067 + title: "Create HTML summary for capability effect verification changes" + type: "report_task" + status: cancelled + complexity: "low" + risk: "low" + outputs: + - "reports/drafts/capability-effect-verification-summary.md" + - "reports/drafts/capability-effect-verification-summary.html" + - "reports/drafts/capability-effect-verification-summary.manifest.json" + acceptance: + - "summary explains the change set, bug roots, verification evidence, and current status" + - "HTML is generated as a sidecar from Markdown and includes source hash metadata" + - id: KOS-T068 + title: "Create HTML summary for capability effect verification changes" + type: "route_bound_execution_guard" + status: cancelled + complexity: "low" + risk: "low" + outputs: + - ".agent-os/tasks.yaml" + - "reports/drafts/capability-effect-verification-summary.md" + - "reports/drafts/capability-effect-verification-summary.html" + - "reports/drafts/capability-effect-verification-summary.manifest.json" + acceptance: + - "supersede the unroutable report_task intake entry without deleting evidence" + - "summary explains the change set, bug roots, verification evidence, and current status" + - "HTML is generated as a sidecar from Markdown and includes source hash metadata" + - id: KOS-T069 + title: "Create docs HTML summary for capability effect verification changes" + type: "route_bound_execution_guard" + status: completed + complexity: "low" + risk: "low" + outputs: + - "docs/capability-effect-verification-summary.md" + - "docs/capability-effect-verification-summary.html" + - "docs/capability-effect-verification-summary.manifest.json" + acceptance: + - "summary explains the change set, bug roots, verification evidence, and current status" + - "HTML is generated as a sidecar from Markdown and includes source hash metadata" + - id: KOS-T070 + title: "Restore report_task route profile after HTML summary dry run" + type: "route_bound_execution_guard" + status: completed + complexity: "low" + risk: "router_config_regression" + outputs: + - ".agent-os/workflows/router.yaml" + acceptance: + - "doctor passes after cancelled report_task intake entries remain in task history" + - "report_task routes reports docs outputs and knowledge without weakening lifecycle gates" + - id: KOS-T071 + title: "Add explicit effect verification marker" + type: "route_bound_execution_guard" + status: completed + complexity: "low" + risk: "verifier_reporting_regression" + outputs: + - "knowledgeos/cli.py" + - "tests/test_knowledgeos_cli.py" + - "docs/executable-control-plane.md" + - "docs/agent-guide.md" + - "docs/route-bound-execution-guard.md" + - "docs/public-release-checklist.md" + - "docs/LIVE-REPORT.md" + - ".agent-os/startup-prompt.md" + - "templates/project-control-plane/.agent-os/startup-prompt.md" + - "templates/project-control-plane/AGENTS.md" + acceptance: + - "verify-effects plain output includes EFFECT_VERIFY_OK with status strictness assertion warning and error counts" + - "verify-effects --json includes effect_verify_marker and marker fields" + - "agent guides require relaying EFFECT_VERIFY_OK before claiming effect verification success" + - "tests and doctor pass" + - id: KOS-T072 + title: "Audit staged diff and robustness before GitHub sync" + type: "route_bound_execution_guard" + status: completed + complexity: "medium" + risk: "guardrail_regression" + outputs: + - "knowledgeos/cli.py" + - "tests/test_knowledgeos_cli.py" + - "docs/LIVE-REPORT.md" + - "docs/public-release-checklist.md" + acceptance: + - "staged and unstaged diff boundaries are reviewed before commit" + - "full tests and guardrail scenarios pass after any bug fix" + - "bug audit reports root cause reproduction fix and verification" + - id: KOS-T073 + title: "Update changelog and publish staged KnowledgeOS release" + type: "route_bound_execution_guard" + status: completed + complexity: "low" + risk: "release_sync" + outputs: + - "CHANGELOG.md" + acceptance: + - "CHANGELOG documents capability effect verification and HTML sidecar release notes" + - "staged release commit is pushed to GitHub" + - id: KOS-T074 + title: "Add local external-write policy overlay for Antigravity runtime migration" + type: "route_bound_execution_guard" + status: completed + complexity: "medium" + risk: "normal" + outputs: + - "knowledgeos/cli.py" + acceptance: + - "KnowledgeOS can classify explicitly whitelisted local runtime paths outside the project root without opening broad external write access; local-only policy stays under .knowledgeos-local/; doctor and tests pass." + - id: KOS-T075 + title: "Migrate local Gemini CLI workflow to Antigravity CLI with KnowledgeOS compatibility" + type: "migration_task" + status: completed + complexity: "medium" + risk: "normal" + outputs: + - ".knowledgeos-local/migrations/antigravity-cli-migration-20260527.md" + acceptance: + - "Antigravity CLI installs successfully, existing workflow extensions are reconciled or replaced, and KnowledgeOS boot/sync workflow remains usable." + - id: KOS-T076 + title: "Cleanly uninstall Antigravity CLI and Gemini CLI from local machine" + type: "migration_task" + status: completed + complexity: "medium" + risk: "normal" + outputs: + - ".knowledgeos-local/migrations/cli-uninstall-20260529.md" + acceptance: + - "gemini, agy, and agy-knowledgeos commands are absent; CLI-owned config/cache wrappers are removed or intentionally retained with rationale; verification evidence is recorded" + - id: KOS-T077 + title: "Add Decision Graph module" + type: "route_bound_execution_guard" + status: completed + complexity: "medium" + risk: "decision_trace_regression" + outputs: + - "knowledgeos/cli.py" + - "tests/test_knowledgeos_cli.py" + - "docs/decision-graph-module.md" + - "docs/executable-control-plane.md" + - "docs/knowledgeos-operating-spec.md" + - "docs/agent-guide.md" + - "templates/project-control-plane/AGENTS.md" + - "templates/project-control-plane/.agent-os/decision-policy.yaml" + - ".agent-os/decision-policy.yaml" + - ".agent-os/specs.yaml" + - ".agent-os/specs/" + - ".agent-os/tasks.yaml" + acceptance: + - "decision-event records DECISION_OK decision ledger entries with command evidence" + - "decision-query filters decision graph events by run status kind and parent" + - "verify-decisions validates command evidence orphan nodes and unresolved decision branches" + - "render-html decision-map generates self-contained HTML sidecar from decision ledger" + - "complete-task enforces decision verification only when decision-policy strictness is enforce" + - id: KOS-T078 + title: "Add readable completion mission flow" + type: "route_bound_execution_guard" + status: completed + complexity: "medium" + risk: "normal" + outputs: + - "knowledgeos/cli.py" + - "tests/test_knowledgeos_cli.py" + - "docs/executable-control-plane.md" + - "docs/knowledgeos-operating-spec.md" + - "docs/agent-guide.md" + - "templates/project-control-plane/AGENTS.md" + - "templates/project-control-plane/.agent-os/startup-prompt.md" + - ".agent-os/startup-prompt.md" + - "CHANGELOG.md" + - "docs/LIVE-REPORT.md" + - ".agent-os/tasks.yaml" + acceptance: + - "flow-summary emits a readable layered Mermaid mission flow with FLOW_OK" + - "render-html --kind mission-flow writes a self-contained HTML sidecar" + - "the feature remains presentation/reporting layer, not kernel" + - "doctor, targeted tests, full tests, scenarios, and smoke pass" + - id: KOS-T079 + title: "Add chat-level Thread Plan Ledger" + type: "route_bound_execution_guard" + status: completed + complexity: "medium" + risk: "normal" + outputs: + - "knowledgeos/cli.py" + - "tests/test_knowledgeos_cli.py" + - "docs/executable-control-plane.md" + - "docs/knowledgeos-operating-spec.md" + - "docs/agent-guide.md" + - "docs/thread-plan-ledger.md" + - "docs/public-release-checklist.md" + - "templates/project-control-plane/AGENTS.md" + - "templates/project-control-plane/.agent-os/startup-prompt.md" + - "templates/project-control-plane/.agent-os/write-policy.yaml" + - ".agent-os/startup-prompt.md" + - ".agent-os/write-policy.yaml" + - "CHANGELOG.md" + - "docs/LIVE-REPORT.md" + - ".agent-os/tasks.yaml" + acceptance: + - "thread-plan start creates a chat-level append-only ledger and current thread pointer" + - "thread-plan append, current, link-run, and render work with readable natural language output" + - "thread-plan does not block complete-task or lifecycle verification" + - "markdown, mermaid, and self-contained HTML outputs are generated from source ledger" + - "doctor, tests, guardrail scenarios, smoke, and diff check pass" + - id: KOS-T080 + title: "Audit and harden Thread Plan Ledger before GitHub publish" + type: "route_bound_execution_guard" + status: completed + complexity: "medium" + risk: "guardrail_regression" + outputs: + - "knowledgeos/cli.py" + - "tests/test_knowledgeos_cli.py" + - "docs/thread-plan-ledger.md" + - "docs/executable-control-plane.md" + - "docs/knowledgeos-operating-spec.md" + - "docs/agent-guide.md" + - "docs/public-release-checklist.md" + - "templates/project-control-plane/AGENTS.md" + - "templates/project-control-plane/.agent-os/startup-prompt.md" + - "templates/project-control-plane/.agent-os/write-policy.yaml" + - ".agent-os/startup-prompt.md" + - ".agent-os/write-policy.yaml" + - "CHANGELOG.md" + - "docs/LIVE-REPORT.md" + - ".agent-os/tasks.yaml" + acceptance: + - "bug audit reproduces any issue before fixing" + - "thread-plan and lifecycle tests pass" + - "public diff leak scan passes before push" + - "changes are pushed to GitHub branch" diff --git a/.agent-os/workflows/router.yaml b/.agent-os/workflows/router.yaml index b55edbc..bf278f3 100644 --- a/.agent-os/workflows/router.yaml +++ b/.agent-os/workflows/router.yaml @@ -47,6 +47,39 @@ workflows: notes: - Inventory local state without copying secrets into public docs. + migration_task: + route_order: + - doctor --project-root . + - route-task --project-root . --task-id <task-id> + - check-route-write --project-root . --task-id <task-id> --path <planned-path> + - run-task --project-root . --task-id <task-id> + - dispatch-task --project-root . --task-id <task-id> --run-id <run-id> + - context-pack --project-root . --task-id <task-id> --run-id <run-id> + - plan-task --project-root . --task-id <task-id> --run-id <run-id> --summary <summary> + - phase-task --project-root . --task-id <task-id> --run-id <run-id> --phase <phase> --status completed --note <public-trace> --evidence <evidence> + - eval-task --project-root . --task-id <task-id> --run-id <run-id> + - verify-context --project-root . --task-id <task-id> --run-id <run-id> + - verify-lifecycle --project-root . --task-id <task-id> --run-id <run-id> + - verify-effects --project-root . --task-id <task-id> --run-id <run-id> + - complete-task --project-root . --task-id <task-id> --run-id <run-id> --summary <summary> + eval_profile: migration_task + human_gate: explicit_approval + allow_external_controlled: true + allowed_outputs: + - .knowledgeos-local/ + - docs/ + - knowledgeos/ + - tests/ + - global-agent-fabric/ + - .agent-os/tool-registry.yaml + - .agent-os/dispatch-policy.yaml + - .agent-os/workflows/ + - .agent-os/write-policy.yaml + - README.md + notes: + - External runtime writes stay denied by default and must flow through the local external-write overlay. + - Use this route for reversible local migrations such as CLI install/import work, not for broad public template changes. + template_design: route_order: - doctor --root . --project-root templates/project-control-plane --template @@ -230,6 +263,7 @@ workflows: - .agent-os/workspace.yaml - .agent-os/tool-registry.yaml - .agent-os/startup-prompt.md + - .agent-os/decision-policy.yaml - .agent-os/effect-policy.yaml - .agent-os/phase-policy.yaml - .agent-os/read-policy.yaml diff --git a/.agent-os/write-policy.yaml b/.agent-os/write-policy.yaml index 1820a0d..8b2ad39 100644 --- a/.agent-os/write-policy.yaml +++ b/.agent-os/write-policy.yaml @@ -20,6 +20,7 @@ write_policy: - examples/** - templates/** - .agent-os/** + - .knowledgeos-local/** - knowledge/** - src/** - tests/** @@ -40,6 +41,7 @@ write_policy: - .agent-os/dispatch-policy.yaml - .agent-os/tool-registry.yaml - .agent-os/workflows/router.yaml + - .agent-os/decision-policy.yaml - .agent-os/effect-policy.yaml - .agent-os/phase-policy.yaml - .agent-os/fabric-link.yaml @@ -52,8 +54,15 @@ write_policy: - .agent-os/runs/**/command-events.ndjson - .agent-os/runs/**/step-events.ndjson - .agent-os/runs/**/capability-events.ndjson + - .agent-os/runs/**/decision-events.ndjson - .agent-os/runs/**/effect-assertions.ndjson - .agent-os/runs/**/postflight.md + - .agent-os/threads/current.json + - .agent-os/threads/**/thread.json + - .agent-os/threads/**/thread-plan.ndjson + - .agent-os/threads/**/thread-plan.md + - .agent-os/threads/**/thread-map.html + - .agent-os/threads/**/command-events.ndjson - templates/governance-core/** - templates/capability-layer/** @@ -70,3 +79,4 @@ write_policy: - examples/** - templates/** - .agent-os/** + - .knowledgeos-local/** diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a3d7bb..f159e20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,16 @@ KnowledgeOS is still a working prototype, so versions below describe capability ### Added +- Decision Graph module for public, auditable decision summaries without expanding the kernel. +- `decision-event` records plan branches, route selections, inserted steps, abandoned branches, rollbacks, deferred work, human decisions, risk tradeoffs, and final decisions into `decision-events.ndjson`, returning `DECISION_OK`. +- `decision-query` and `verify-decisions` make decision trees queryable and command-evidence checked. +- `render-html --kind decision-map` generates a presentation sidecar from the decision ledger while keeping NDJSON as the source of truth. +- `.agent-os/decision-policy.yaml` lets projects choose decision-graph strictness: `warn`, `enforce`, or `off` with a downgrade reason. +- `flow-summary` emits a readable, layered Mermaid Mission Flow with `FLOW_OK` for medium-or-larger task completion reports. +- `render-html --kind mission-flow` generates a self-contained Mission Flow HTML sidecar. +- `complete-task` now prepares `mission-flow.md` and returns `flow_mermaid` for medium, high, or complex tasks so agents can include a clear end-of-task flow diagram. +- Thread Plan Ledger module for chat-window-level, append-only natural-language planning across multiple tasks and runs. +- `thread-plan start/current/append/link-run/render` creates readable `Plan A / Plan B` and `Phase A / Phase B` planning maps, returning `THREAD_PLAN_OK`. - `render-html` for HTML presentation sidecars without replacing Markdown, YAML, or NDJSON as canonical evidence. - Composable HTML report manifests and fragments for receipt, handoff, rich-report, and stitched report outputs. - `.agent-os/effect-policy.yaml` for project-level capability effect verification strictness. @@ -25,6 +35,8 @@ KnowledgeOS is still a working prototype, so versions below describe capability ### Fixed +- `thread-plan render --format html` no longer duplicates the Thread Plan heading inside the report body; the HTML shell keeps the page title and the Markdown fragment keeps the plan title. +- `complete-task` now runs decision verification and blocks forged or structurally invalid decision evidence when project policy is enforced. - `artifact-assert` now rejects nonexistent `capability_event_id` links instead of allowing forged capability-to-effect evidence. - `verify-effects` now rejects existing effect assertions that reference missing capability events. - `run-task` now allocates suffixed run ids when the same task is run more than once within the same second. diff --git a/docs/LIVE-REPORT.md b/docs/LIVE-REPORT.md index 643e237..ce1629c 100644 --- a/docs/LIVE-REPORT.md +++ b/docs/LIVE-REPORT.md @@ -658,3 +658,98 @@ Validation: - Reproduced the bogus capability link acceptance in a temporary project, then verified the same command fails with `capability event not found`. - Added targeted regression coverage for bogus capability links, forged effect ledgers, and same-second run id collisions. - Re-ran harness audit, guardrail scenarios, unit tests, smoke, and diff checks after the fix. + +## 2026-05-29 - Module Note: Decision Graph + +Status: implemented locally as an optional module, not a kernel expansion. + +Bug / product gap reproduced: + +- Long research and agent sessions could record what happened through trace steps and lifecycle checkpoints, but they had no first-class public record of why a plan branched, why a route was abandoned, or why a rollback happened. +- Without a decision ledger, the human-readable plan could drift from the executed path, especially when a task inserted new checks or changed route midway. +- Completion could not distinguish between ordinary linear progress and important decision changes that should be auditable. + +Fix: + +- Added `decision-event` to write `.agent-os/runs/<RUN_ID>/decision-events.ndjson` and return `DECISION_OK`. +- Added `decision-query` for filtering decision events by run, task, kind, status, or parent id. +- Added `verify-decisions` to detect forged events, orphan nodes, invalid kinds/statuses, and unexplained abandoned, rollback, or superseded branches. +- Added `.agent-os/decision-policy.yaml` with default `strictness: warn`; `strictness: enforce` makes `complete-task` block on decision verification failure. +- Added `render-html --kind decision-map` to generate a readable HTML sidecar from the decision ledger while keeping NDJSON as source of truth. +- Updated agent guide, startup prompts, operating spec, executable control-plane docs, release checklist, project template, and tests. + +Validation: + +- Reproduced missing CLI behavior before implementation: `decision-event` was an invalid command and completion did not enforce decision verification. +- Added targeted tests for event creation, query, orphan detection, strict policy completion blocking, default warn policy, startup prompt contract, doctor policy validation, and decision-map HTML output. +- Verified `python3 -B -m py_compile knowledgeos/cli.py`. +- Verified `python3 -B -m unittest discover -s tests -v`: 87 tests passed. +- Verified `./examples/scenarios/run_guardrail_scenarios.sh`: 30 checkpoints passed. +- Verified `./bin/knowledgeos doctor --root . --project-root . --summary`: 1789 checks passed. +- Verified `git diff --check`. + +## 2026-05-29 - Reporting Note: Mission Flow + +Status: implemented locally as a presentation/reporting layer, not kernel evidence. + +Product gap reproduced: + +- Medium and complex tasks could finish with strong command evidence but still require the user to mentally assemble what happened from raw markers such as `CHECKPOINT_OK`, `CAPABILITY_OK`, `EFFECT_OK`, `DECISION_OK`, and `[SYNC_OK]`. +- The user-facing closeout needed a readable flow diagram with plain labels, not a dense dump of internal lifecycle terms. + +Fix: + +- Added `flow-summary`, which writes `.agent-os/runs/<RUN_ID>/mission-flow.md` and emits `FLOW_OK`. +- Added `render-html --kind mission-flow`, which creates a self-contained HTML sidecar with colored cards and source metadata. +- Updated `complete-task` so medium, high, and complex tasks return `flow_marker`, `flow_summary_marker`, `flow_mermaid`, and `flow_source`. +- Kept Mission Flow out of kernel enforcement. It summarizes existing evidence lanes for humans and does not replace Markdown, YAML, or NDJSON evidence. +- Updated executable docs, operating spec, agent guide, startup prompts, templates, changelog, and tests. + +Validation: + +- Added targeted tests for `flow-summary`, mission-flow HTML sidecars, and `complete-task` flow output. +- Verified the diagram uses readable labels such as `Goal`, `Health Check`, `Task & Plan`, `Safe Writes`, `Work Done`, `Tools Used`, `Proof`, `Decisions`, and `Finish`. + +## 2026-05-29 - Module Note: Thread Plan Ledger + +Status: implemented locally as a chat-level planning module, not a lifecycle checkpoint. + +Product gap reproduced: + +- Run-level Mission Flow summarizes one completed task but does not preserve the whole chat-window plan as the conversation grows. +- Long research/product conversations need a natural-language version map: Plan A / Plan B, Phase A / Phase B, current route, inserted steps, linked runs, and deferred branches. +- The desired view should be readable by humans and should not add another completion gate. + +Fix: + +- Added `thread-plan start`, `thread-plan current`, `thread-plan append`, `thread-plan link-run`, and `thread-plan render`. +- Added `.agent-os/threads/<THREAD_ID>/thread-plan.ndjson` as the append-only source of truth, with Markdown and HTML sidecars for review. +- Added `THREAD_PLAN_OK` command output for successful thread-plan operations. +- Kept Thread Plan Ledger out of `complete-task`, `verify-lifecycle`, and checkpoint enforcement. +- Updated executable docs, operating spec, agent guide, startup prompts, templates, release checklist, changelog, and tests. + +Validation: + +- Added targeted tests for start/current/append/link-run/render and append-only ledger behavior. +- Verified Markdown includes `Plan A / Plan B`, `Phase A / Phase B`, current working line, and linked run evidence. +- Verified HTML sidecar is self-contained and marks HTML as presentation, not source of truth. + +## 2026-05-30 - Bugfix Note: Thread Plan HTML Heading + +Status: fixed and regression-tested. + +Bug reproduced: + +- `thread-plan render --format html` produced three `<h1>` headings for one thread-plan page. +- Root cause: the HTML document shell already renders a page title, the Thread Plan renderer manually prepended another `<h1>`, and the Markdown fragment converted its own top-level title into a third `<h1>`. + +Fix: + +- Removed the extra hand-written Thread Plan body heading from `render_thread_plan_html`. +- Kept the canonical Markdown title and the outer HTML shell title intact. + +Validation: + +- Reproduced the issue in a temporary project before the fix: `h1_count=3`. +- Re-ran the same reproduction after the fix: `h1_count=2`. +- Added a regression assertion to the Thread Plan Ledger test. diff --git a/docs/agent-guide.md b/docs/agent-guide.md index e91a0fb..7f418db 100644 --- a/docs/agent-guide.md +++ b/docs/agent-guide.md @@ -11,6 +11,7 @@ Use this checklist before substantial work in a KnowledgeOS-controlled project. - `.agent-os/tasks.yaml` - `.agent-os/specs.yaml` - `.agent-os/phase-policy.yaml` + - `.agent-os/decision-policy.yaml` - `.agent-os/effect-policy.yaml` - `.agent-os/decisions.yaml` - `.agent-os/evals.yaml` @@ -25,6 +26,8 @@ Use this checklist before substantial work in a KnowledgeOS-controlled project. - `knowledgeos check-write --project-root . --path <planned-path>` - `knowledgeos create-spec --project-root . --title "<title>"` when the user asks to create a spec. - `knowledgeos align-spec --project-root . --task-id <task-id>` when the user asks to align with a spec. + - `knowledgeos thread-plan current --project-root .` when continuing a long-lived planning conversation. + - `knowledgeos thread-plan start --project-root . --title "<natural language goal>"` when the user starts a durable plan/spec conversation. 4. Start work through a run envelope. - `knowledgeos route-task --project-root . --task-id <task-id>` @@ -48,14 +51,22 @@ Use this checklist before substantial work in a KnowledgeOS-controlled project. - Relay the returned `CHECKPOINT_OK` marker to the user. - Use `knowledgeos capability-event --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --id <capability-id> --purpose "<purpose>"` for MCP, skill, subagent, orchestrator, or important script calls. - Relay the returned `CAPABILITY_OK` marker to the user. + - Use `knowledgeos decision-event --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --title "<title>" --summary "<summary>" --reason "<reason>" --evidence "<evidence>"` when a plan branches, changes, rolls back, abandons a route, or records a major human decision. + - Relay the returned `DECISION_OK` marker to the user. + - Use `knowledgeos thread-plan append --project-root . --thread-id <thread-id> --kind <plan|phase|branch|decision|progress|change|summary> --text "<plain note>"` when the chat-level plan changes or advances. + - Relay the returned `THREAD_PLAN_OK` marker when you update the long-lived plan. + - Use `knowledgeos thread-plan link-run --project-root . --thread-id <thread-id> --task-id <task-id> --run-id <run-id>` to connect a run to the chat-level plan. - Use `knowledgeos artifact-assert --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --path <artifact>` to prove real artifact side effects. - Relay the returned `EFFECT_OK` marker to the user. - Use `knowledgeos eval-task --project-root . --task-id <task-id> --run-id <run-id>`; do not hand-write `Status: passed`. - Use `knowledgeos verify-context --project-root . --task-id <task-id> --run-id <run-id>`. - Use `knowledgeos verify-lifecycle --project-root . --task-id <task-id> --run-id <run-id>`. - Use `knowledgeos verify-effects --project-root . --task-id <task-id> --run-id <run-id>` and relay the returned `EFFECT_VERIFY_OK` marker before claiming effect verification success. + - Use `knowledgeos verify-decisions --project-root . --task-id <task-id> --run-id <run-id>` and relay the returned `DECISION_VERIFY_OK` marker before claiming decision verification success. - Use `knowledgeos complete-task --project-root . --task-id <task-id> --run-id <run-id> --summary "<summary>"`. - - Do not directly edit `.agent-os/specs.yaml`, `.agent-os/specs/**`, `.agent-os/runs/RUN-*/context-pack.md`, `.agent-os/runs/RUN-*/spec-snapshot.md`, `.agent-os/runs/RUN-*/plan.md`, `.agent-os/runs/RUN-*/eval.md`, `.agent-os/runs/RUN-*/phases.ndjson`, `.agent-os/runs/RUN-*/step-events.ndjson`, `.agent-os/runs/RUN-*/capability-events.ndjson`, `.agent-os/runs/RUN-*/effect-assertions.ndjson`, `.agent-os/runs/RUN-*/command-events.ndjson`, or `.agent-os/runs/RUN-*/receipt.md`. + - For medium, high, or complex tasks, include the `FLOW_OK` Mermaid Mission Flow returned by `complete-task`; if needed, run `knowledgeos flow-summary --project-root . --run-id <run-id>` and relay `FLOW_OK`. + - Keep the Mission Flow user-facing and readable: prefer labels like `Goal`, `Health Check`, `Task & Plan`, `Safe Writes`, `Work Done`, `Tools Used`, `Proof`, `Decisions`, and `Finish`. + - Do not directly edit `.agent-os/specs.yaml`, `.agent-os/specs/**`, `.agent-os/threads/**`, `.agent-os/runs/RUN-*/context-pack.md`, `.agent-os/runs/RUN-*/spec-snapshot.md`, `.agent-os/runs/RUN-*/plan.md`, `.agent-os/runs/RUN-*/eval.md`, `.agent-os/runs/RUN-*/phases.ndjson`, `.agent-os/runs/RUN-*/step-events.ndjson`, `.agent-os/runs/RUN-*/capability-events.ndjson`, `.agent-os/runs/RUN-*/decision-events.ndjson`, `.agent-os/runs/RUN-*/effect-assertions.ndjson`, `.agent-os/runs/RUN-*/command-events.ndjson`, or `.agent-os/runs/RUN-*/receipt.md`. 7. Finish with the configured sync contract when a shared-fabric kernel module is active. - Report `SYNC_OK` only after the postflight command succeeds. diff --git a/docs/decision-graph-module.md b/docs/decision-graph-module.md new file mode 100644 index 0000000..0105537 --- /dev/null +++ b/docs/decision-graph-module.md @@ -0,0 +1,98 @@ +# KnowledgeOS Decision Graph Module + +Decision Graph is an optional KnowledgeOS module for public, auditable decision summaries. It records how a task plan branches, changes, rolls back, abandons a route, or reaches a final decision. + +It is not a hidden chain-of-thought store. It saves concise natural-language decisions that a human can read later. + +## Why It Exists + +Research and long-running agent work are rarely pure automation. A task may start with one plan, insert a new check, abandon a later branch, or roll back after evidence changes. Linear traces say what happened; Decision Graph records why the path changed. + +## Evidence Lane + +```text +File: .agent-os/runs/<RUN_ID>/decision-events.ndjson +Command: decision-event +Marker: DECISION_OK +Verifier: verify-decisions +Verify Marker: DECISION_VERIFY_OK +HTML: render-html --kind decision-map +``` + +Decision Graph complements the existing lanes: + +```text +trace-step what happened +phase-task which lifecycle checkpoint passed +capability-event which tool or agent capability was used +artifact-assert which real side effect landed +decision-event why the route changed or was selected +``` + +## Commands + +Record a public decision: + +```bash +knowledgeos decision-event \ + --project-root . \ + --task-id T001 \ + --run-id RUN-... \ + --kind branch_selected \ + --status selected \ + --title "Use targeted rerun" \ + --summary "Select the smaller validation path." \ + --reason "It proves the changed artifact without repeating expensive work." \ + --evidence "plan review" +``` + +Query the graph: + +```bash +knowledgeos decision-query --project-root . --run-id RUN-... --parent-id DEC-... +``` + +Verify command-generated decision evidence: + +```bash +knowledgeos verify-decisions --project-root . --task-id T001 --run-id RUN-... +``` + +Render a human-readable sidecar: + +```bash +knowledgeos render-html --project-root . --run-id RUN-... --kind decision-map +``` + +## Policy + +Project policy lives at `.agent-os/decision-policy.yaml`: + +```yaml +decision_policy: + strictness: warn + downgrade_reason: "" +``` + +Supported strictness values: + +- `warn`: default; missing decision events warn but do not block completion. +- `enforce`: completion fails when decision evidence is missing or invalid. +- `off`: allowed only with a `downgrade_reason`. + +## When To Use + +Use `decision-event` when one of these happens: + +- a plan branches; +- a route is selected among alternatives; +- a new step is inserted mid-task; +- a branch is abandoned, deferred, superseded, or rolled back; +- the human makes a meaningful decision; +- a risk tradeoff changes execution. + +Do not use it for ordinary linear progress. Use `trace-step` for that. + +## Module Boundary + +Decision Graph is a module, not kernel. The kernel remains small: route, write guard, lifecycle, eval, effect verification, and completion. Decision Graph can be enforced by project policy when a research, strict, or release project needs stronger auditability. diff --git a/docs/executable-control-plane.md b/docs/executable-control-plane.md index 4e3af06..2412d54 100644 --- a/docs/executable-control-plane.md +++ b/docs/executable-control-plane.md @@ -64,7 +64,8 @@ Checks include: - write-policy coverage; - capability guardrails; - workflow route coverage; -- lifecycle consistency for `run-task -> context-pack -> plan-task -> phase-task -> eval-task -> verify-context -> verify-lifecycle -> verify-effects -> complete-task`. +- lifecycle consistency for `run-task -> context-pack -> plan-task -> phase-task -> eval-task -> verify-context -> verify-lifecycle -> verify-effects -> complete-task`; +- decision policy validation when `.agent-os/decision-policy.yaml` is present. ### `init-project` @@ -148,6 +149,48 @@ Align the active or selected spec with the current task before execution. The command writes `alignment.md` and returns `aligned` or `needs_review`. Agents should stop for human triage when alignment needs review. +### `thread-plan` + +Manage a chat-level, append-only natural-language plan. Use this when a conversation starts a durable plan/spec or when later rounds should build on an earlier planning path. + +```bash +./bin/knowledgeos thread-plan start \ + --project-root /path/to/project \ + --title "长期维护鸟类声景基金申请计划" \ + --spec-id SPEC-... +``` + +Append progress, branch, phase, decision, change, or summary notes: + +```bash +./bin/knowledgeos thread-plan append \ + --project-root /path/to/project \ + --thread-id THREAD-... \ + --kind phase \ + --text "Phase A:把聊天级计划记录清楚;Phase B:再把多个任务串起来。" +``` + +Link a run back to the long-lived conversation: + +```bash +./bin/knowledgeos thread-plan link-run \ + --project-root /path/to/project \ + --thread-id THREAD-... \ + --task-id T001 \ + --run-id RUN-... +``` + +Render the plan as Markdown, Mermaid, or HTML: + +```bash +./bin/knowledgeos thread-plan render \ + --project-root /path/to/project \ + --thread-id THREAD-... \ + --format html +``` + +Successful output begins with `THREAD_PLAN_OK`. This module does not gate completion. It is a readable planning map across a chat window, while `plan-task` remains the run-level execution plan. + ### `check-write` Classify a planned write against `.agent-os/write-policy.yaml`. @@ -331,6 +374,42 @@ CAPABILITY_OK kind=<kind> id=<capability-id> purpose=<short purpose> JSON output also includes a stable `capability_event_id`. Use that id when an effect assertion proves the real side effect produced by the capability. If `artifact-assert` is called with `--capability-event-id`, the id must already exist in the run's `capability-events.ndjson`; bogus links are rejected. +### `decision-event` + +Record a public Decision Graph event when a plan branches, a route is selected, a step is inserted, or a branch is abandoned, rolled back, superseded, deferred, or finalized. + +```bash +./bin/knowledgeos decision-event \ + --project-root /path/to/project \ + --task-id T001 \ + --run-id RUN-... \ + --kind branch_selected \ + --status selected \ + --title "Use targeted rerun" \ + --summary "Select the smaller validation path." \ + --reason "It proves the changed artifact without repeating expensive work." \ + --evidence "plan review" +``` + +The command writes `.agent-os/runs/<RUN_ID>/decision-events.ndjson` and matching command evidence. Plain-text output begins with: + +```text +DECISION_OK kind=<kind> status=<status> title=<short title> +``` + +Use `decision-event` for public decision summaries, not hidden chain-of-thought. Ordinary linear progress should stay in `trace-step`. + +### `decision-query` + +Query Decision Graph events by run, task, kind, status, or parent id. + +```bash +./bin/knowledgeos decision-query \ + --project-root /path/to/project \ + --run-id RUN-... \ + --parent-id DEC-... +``` + ### `artifact-assert` Verify a real artifact side effect before recording `EFFECT_OK`. @@ -404,9 +483,74 @@ EFFECT_VERIFY_OK status=<passed|warning|failed|disabled> strictness=<level> asse JSON output includes `effect_verify_marker: EFFECT_VERIFY_OK` and the full `marker` string. Agents must relay this marker before claiming effect verification success. +### `verify-decisions` + +Verify that Decision Graph evidence is command-generated and structurally valid. + +```bash +./bin/knowledgeos verify-decisions \ + --project-root /path/to/project \ + --task-id T001 \ + --run-id RUN-... +``` + +The project policy lives in `.agent-os/decision-policy.yaml`. `strictness: warn` is the default and does not block linear work with no decision events. `strictness: enforce` blocks completion when decision evidence is missing, forged, orphaned, or invalid. `strictness: off` requires a `downgrade_reason`. + +Plain output includes: + +```text +DECISION_VERIFY_OK status=<passed|warning|failed|disabled> strictness=<level> decisions=<n> warnings=<n> errors=<n> +``` + +### `render-html --kind decision-map` + +Render `.agent-os/runs/<RUN_ID>/decision-events.ndjson` into a human-readable static decision map. + +```bash +./bin/knowledgeos render-html \ + --project-root /path/to/project \ + --run-id RUN-... \ + --kind decision-map +``` + +The HTML includes source path, source SHA-256, run id, generated time, and the notice `HTML is presentation, not source of truth.` + +### `flow-summary` + +Print a friendly, layered Mermaid mission flow for a run. This is designed for human end-of-task reporting, not machine enforcement. + +```bash +./bin/knowledgeos flow-summary \ + --project-root /path/to/project \ + --run-id RUN-... +``` + +Plain output begins with: + +```text +FLOW_OK run=<run-id> stages=<n> synced=<yes|no> +``` + +The diagram uses simple labels: `Goal`, `Health Check`, `Task & Plan`, `Safe Writes`, `Work Done`, `Tools Used`, `Proof`, `Decisions`, and `Finish`. It intentionally avoids exposing internal jargon as the main user-facing surface. + +For medium, high, or complex tasks, `complete-task` also writes `.agent-os/runs/<RUN_ID>/mission-flow.md` and returns `flow_marker`, `flow_summary_marker`, and `flow_mermaid` so the agent can include the readable flow in its final answer. + +### `render-html --kind mission-flow` + +Render `.agent-os/runs/<RUN_ID>/mission-flow.md` into a self-contained HTML sidecar with colored cards and source metadata. + +```bash +./bin/knowledgeos render-html \ + --project-root /path/to/project \ + --run-id RUN-... \ + --kind mission-flow +``` + +The HTML is presentation only. Markdown, YAML, and NDJSON remain the source of truth. + ### `complete-task` -Close a task only after `eval-task` passed, declared outputs exist, context verification passes, lifecycle verification passes, effect verification passes or explicitly downgrades, and required postflight succeeds or records an explicit pending reason. +Close a task only after `eval-task` passed, declared outputs exist, context verification passes, lifecycle verification passes, effect verification passes or explicitly downgrades, decision verification passes or warns according to policy, and required postflight succeeds or records an explicit pending reason. ```bash ./bin/knowledgeos complete-task \ @@ -418,7 +562,9 @@ Close a task only after `eval-task` passed, declared outputs exist, context veri Manual `Status: passed` text is not enough unless an explicit override is used. -Before postflight, `complete-task` runs `verify-effects`. It blocks on failed effect verification and records effect status, the `EFFECT_VERIFY_OK` marker, warnings, or explicit strictness downgrades in the receipt. +Before postflight, `complete-task` runs `verify-effects` and `verify-decisions`. It blocks on failed verification and records status, visible markers, warnings, or explicit strictness downgrades in the receipt. + +For medium, high, or complex tasks, `complete-task` also prepares a readable Mission Flow summary and returns `FLOW_OK` fields. Agents should include that Mermaid flow in the final user-facing answer unless the user explicitly asks for a terse response. If `.agent-os/fabric-link.yaml` sets `postflight_required: true`, `complete-task` runs the configured shared-fabric `after-task.sh` and only reports `sync_status: SYNC_OK` when the hook emits `[SYNC_OK]`. Use `--allow-pending-postflight "<reason>"` only as an explicit, receipt-recorded escape hatch. @@ -442,6 +588,16 @@ Render canonical Markdown evidence into static, composable HTML sidecars for hum --input reports/drafts/x.md \ --kind rich-report \ --output reports/drafts/x.html + +./bin/knowledgeos render-html \ + --project-root /path/to/project \ + --run-id RUN-... \ + --kind decision-map + +./bin/knowledgeos render-html \ + --project-root /path/to/project \ + --run-id RUN-... \ + --kind mission-flow ``` HTML is presentation only. Markdown, YAML, and NDJSON remain the source of truth. Generated pages include source path, source SHA-256, generated time, run id when available, and the notice `HTML is presentation, not source of truth.` diff --git a/docs/knowledgeos-operating-spec.md b/docs/knowledgeos-operating-spec.md index b816f34..61ce7ff 100644 --- a/docs/knowledgeos-operating-spec.md +++ b/docs/knowledgeos-operating-spec.md @@ -44,7 +44,10 @@ Modules are invoked by task intent, route policy, or project profile. Current and planned modules include: - `spec`: durable user intent, context pack, plan, and drift checks; +- `thread-plan`: chat-level natural-language plan history across multiple tasks and runs; - `capability`: dispatch policy, tool registry, and capability events; +- `decision-graph`: public decision summaries, plan branches, abandoned routes, rollbacks, and decision-map HTML; +- `mission-flow`: readable completion flow summaries and HTML sidecars for medium or larger tasks; - `archive`: cold storage for old or superseded project content; - `migration`: old-project reorganization plans; - `harness`: cross-project audit and repair; @@ -61,6 +64,7 @@ The Workbench should read: - `.agent-os/tasks.yaml`; - `.agent-os/runs/`; - `.agent-os/specs/`; +- `.agent-os/threads/`; - receipts and handoffs; - evidence lanes. @@ -83,14 +87,14 @@ Recommended profiles: - `minimal`: kernel gates only; - `standard`: kernel plus context, plan, checkpoint, and capability visibility; - `strict`: standard plus stronger spec and release checks; -- `research`: strict spec/context tracking and artifact discipline; +- `research`: strict spec/context tracking, artifact discipline, and decision graph visibility; - `release`: strict verification, security review, and postflight requirements. Checkpoint reporting should remain mandatory for substantial managed work even when optional modules are disabled. ## Evidence Lanes -KnowledgeOS separates evidence into four lanes. +KnowledgeOS separates evidence into auditable lanes. ### Public Operational Trace @@ -136,6 +140,33 @@ This lane records visible use of: Required dispatch stages must be recorded or explicitly skipped with a public reason. +### Decision Graph + +```text +File: .agent-os/runs/<RUN_ID>/decision-events.ndjson +Command: decision-event +Marker: DECISION_OK +Verifier: verify-decisions +Verify Marker: DECISION_VERIFY_OK +``` + +This module lane records public, human-readable decision summaries: plan branches, selected routes, inserted steps, abandoned branches, rollbacks, superseded paths, deferred work, human decisions, risk tradeoffs, and final decisions. It is not hidden chain-of-thought. + +Project policy lives at `.agent-os/decision-policy.yaml`. Default strictness is `warn`; `enforce` blocks completion on invalid or missing decision evidence; `off` requires a downgrade reason. + +### Thread Plan Ledger + +```text +File: .agent-os/threads/<THREAD_ID>/thread-plan.ndjson +Command: thread-plan +Marker: THREAD_PLAN_OK +HTML: thread-plan render --format html +``` + +This module lane records a long-lived chat-window plan in natural language. It can show `Plan A / Plan B`, `Phase A / Phase B`, current progress, abandoned or delayed routes, and linked task runs. It is append-only and human-readable. + +Thread Plan Ledger is not a checkpoint and does not block completion. It complements `plan-task`: `plan-task` is the run-level execution plan; `thread-plan` is the multi-turn conversation map. + ### Completion And Sync ```text @@ -146,6 +177,19 @@ Marker: SYNC_OK This lane proves the task closed through the completion gate and postflight contract. +### Completion Mission Flow + +```text +File: .agent-os/runs/<RUN_ID>/mission-flow.md +Command: flow-summary +Marker: FLOW_OK +HTML: render-html --kind mission-flow +``` + +This presentation lane gives humans a readable, layered flow at the end of medium, high, or complex tasks. It uses plain labels such as `Goal`, `Health Check`, `Task & Plan`, `Safe Writes`, `Work Done`, `Tools Used`, `Proof`, `Decisions`, and `Finish`. + +Mission Flow is not kernel evidence. It summarizes existing evidence lanes and should remain visually clear instead of exposing raw internal terminology. + ## Full Task Chain A substantial managed task should follow this chain: @@ -156,6 +200,7 @@ User Intent -> Doctor Gate -> Task Intake -> Spec Alignment +-> Thread Plan -> Route Guard -> Dispatch Plan -> Write Guard @@ -164,14 +209,16 @@ User Intent -> Plan Task -> Execution -> Capability Visibility +-> Decision Graph -> Phase Checkpoints -> Eval -> Verify -> Complete -> Sync +-> Mission Flow ``` -`trace-step` explains the operational path. `phase-task` proves the lifecycle checkpoints. `capability-event` proves tool and agent visibility. `complete-task` proves closure and sync. +`trace-step` explains the operational path. `phase-task` proves the lifecycle checkpoints. `capability-event` proves tool and agent visibility. `decision-event` explains public decision changes. `thread-plan` preserves the long-lived natural-language plan across the chat window. `complete-task` proves closure and sync. `flow-summary` turns one run into a readable end-of-task map for humans. ## Doctor And Repair Tools diff --git a/docs/migration-boundary.md b/docs/migration-boundary.md index 78fdc2c..ca9d101 100644 --- a/docs/migration-boundary.md +++ b/docs/migration-boundary.md @@ -20,6 +20,20 @@ The local track lives under `.knowledgeos-local/` and can reference the current This track is for migration planning only. It should not be published as part of the clean KnowledgeOS release. +For runtime migrations that must touch exact machine-local targets outside the repo, keep the allowlist in `.knowledgeos-local/write-policy.local.yaml`. + +That file is a local overlay, not part of the public control plane. It may contain sections such as: + +- `external_controlled` +- `external_forbidden_without_human_gate` +- `external_require_receipt_for` + +External writes remain denied by default. They become legal only when all three conditions are true: + +1. the path is listed in the local overlay; +2. the active task route explicitly sets `allow_external_controlled: true`; +3. normal KnowledgeOS route, run, eval, and lifecycle evidence still complete. + ## Reorganization Boundary `migrate-legacy-project` is the safe entry point for old project folders. It creates a reviewable plan first and only moves confidently classified top-level entries with `--apply`. diff --git a/docs/public-release-checklist.md b/docs/public-release-checklist.md index c9e7fdc..493cc08 100644 --- a/docs/public-release-checklist.md +++ b/docs/public-release-checklist.md @@ -12,17 +12,25 @@ Before publishing KnowledgeOS, verify: - Any future app is described as a workbench that consumes receipts/wiki/graph outputs; - `archive/**` is documented as cold storage, not default context. - `create-spec` and `align-spec` work for durable user intent; +- `thread-plan start/current/append/link-run/render` work for chat-level natural-language planning; +- `thread-plan render --format html` creates a self-contained sidecar without becoming a completion gate; - `context-pack`, `plan-task`, and `verify-context` work; - `complete-task` refuses missing plan/context evidence and spec drift; - `create-task` works and new work does not require abusing `reopen-task`; - `phase-task` and `verify-lifecycle` work; - `phase-task` returns a visible `CHECKPOINT_OK` marker; - `capability-event` records capability visibility and returns `CAPABILITY_OK`; +- `decision-event` records public decision graph events and returns `DECISION_OK`; +- `decision-query` can filter run decisions by parent, kind, and status; +- `verify-decisions` returns a visible `DECISION_VERIFY_OK` marker and rejects forged or orphaned decisions; +- `flow-summary` returns a visible `FLOW_OK` marker and a readable Mermaid Mission Flow; +- `render-html --kind mission-flow` creates a self-contained HTML sidecar from `mission-flow.md`; - `artifact-assert` verifies real side effects and returns `EFFECT_OK`; - `artifact-assert` rejects bogus `--capability-event-id` links; - `verify-effects` returns a visible `EFFECT_VERIFY_OK` marker with status, strictness, assertion count, warning count, and error count; - `verify-effects` refuses missing or forged effect evidence when policy is `enforce`; - `verify-lifecycle` refuses missing dispatch or required capability evidence; - `complete-task` refuses missing phases; -- `complete-task` runs effect verification and records warnings or downgrade reasons; +- `complete-task` runs effect and decision verification and records warnings or downgrade reasons; +- `complete-task` returns Mission Flow fields for medium, high, or complex tasks; - `complete-task` runs required postflight or records an explicit pending reason. diff --git a/docs/thread-plan-ledger.md b/docs/thread-plan-ledger.md new file mode 100644 index 0000000..f0e27a0 --- /dev/null +++ b/docs/thread-plan-ledger.md @@ -0,0 +1,80 @@ +# KnowledgeOS Thread Plan Ledger + +Thread Plan Ledger is a chat-level planning module. It keeps a natural-language, append-only map of a long conversation: the original plan, later branches, route changes, inserted phases, linked runs, and current working line. + +It is not a checkpoint gate. It does not replace `plan-task`, `decision-event`, lifecycle evidence, or completion receipts. + +## Why It Exists + +Some research and product conversations last across many tasks. A run-level Mission Flow shows one completed task, but it does not show how the whole conversation evolved. Thread Plan Ledger gives that longer story a durable, readable home. + +Use it when the user says they want to create, align, or follow a durable plan/spec, or when a conversation starts to branch over multiple rounds. + +## Evidence Files + +```text +.agent-os/threads/<THREAD_ID>/thread-plan.ndjson +.agent-os/threads/<THREAD_ID>/thread-plan.md +.agent-os/threads/<THREAD_ID>/thread-map.html +.agent-os/threads/current.json +``` + +The NDJSON ledger is the source of truth. Markdown and HTML are sidecars for human review. + +## Commands + +Start a chat-level plan: + +```bash +knowledgeos thread-plan start \ + --project-root . \ + --title "长期维护鸟类声景基金申请计划" \ + --spec-id SPEC-... +``` + +Append a natural-language note: + +```bash +knowledgeos thread-plan append \ + --project-root . \ + --thread-id THREAD-... \ + --kind phase \ + --text "Phase A:先稳定计划记录;Phase B:再串联多个任务。" +``` + +Link a run to the conversation: + +```bash +knowledgeos thread-plan link-run \ + --project-root . \ + --thread-id THREAD-... \ + --task-id T001 \ + --run-id RUN-... +``` + +Render the plan: + +```bash +knowledgeos thread-plan render --project-root . --thread-id THREAD-... --format markdown +knowledgeos thread-plan render --project-root . --thread-id THREAD-... --format mermaid +knowledgeos thread-plan render --project-root . --thread-id THREAD-... --format html +``` + +All successful commands return `THREAD_PLAN_OK`. + +## Writing Style + +Keep entries readable. Prefer: + +- `Plan A / Plan B`; +- `Phase A / Phase B`; +- `当前选择`; +- `为什么改路`; +- `已经完成到哪里`; +- `下一步是什么`. + +Do not paste hidden chain-of-thought. Record concise public planning notes that another human can read later. + +## Module Boundary + +Thread Plan Ledger is a module, not kernel. It should not block `complete-task`, `verify-lifecycle`, or `SYNC_OK`. It can be rendered in a Workbench or HTML page as a versioned planning map, similar to a lightweight visual worktree for the conversation. diff --git a/knowledgeos/cli.py b/knowledgeos/cli.py index 140ede3..045c650 100644 --- a/knowledgeos/cli.py +++ b/knowledgeos/cli.py @@ -45,6 +45,8 @@ "docs/spec-context-plan.md", "docs/reset-and-migration.md", "docs/archive-policy.md", + "docs/decision-graph-module.md", + "docs/thread-plan-ledger.md", "templates/governance-core/README.md", "templates/governance-core/STRUCTURE-CHECK.md", "templates/governance-core/rules/global.md", @@ -78,6 +80,7 @@ "templates/project-control-plane/.agent-os/tasks.yaml", "templates/project-control-plane/.agent-os/specs.yaml", "templates/project-control-plane/.agent-os/phase-policy.yaml", + "templates/project-control-plane/.agent-os/decision-policy.yaml", "templates/project-control-plane/.agent-os/effect-policy.yaml", "templates/project-control-plane/.agent-os/read-policy.yaml", "templates/project-control-plane/.agent-os/write-policy.yaml", @@ -136,6 +139,11 @@ "forbidden_without_human_gate", "require_receipt_for", } +LOCAL_EXTERNAL_WRITE_POLICY_SECTIONS = { + "external_controlled", + "external_forbidden_without_human_gate", + "external_require_receipt_for", +} READ_POLICY_SECTIONS = { "default_context", "cold_storage", @@ -147,6 +155,35 @@ PHASE_STATUSES = {"completed", "skipped"} CAPABILITY_EVENT_KINDS = {"mcp", "skill", "subagent", "orchestrator", "script", "shell", "file_read"} EFFECT_STRICTNESS_LEVELS = {"observe", "warn", "enforce", "off"} +DECISION_STRICTNESS_LEVELS = {"warn", "enforce", "off"} +DECISION_EVENT_KINDS = { + "plan_node", + "branch_opened", + "branch_selected", + "step_inserted", + "branch_abandoned", + "rollback", + "superseded", + "deferred", + "human_decision", + "risk_tradeoff", + "final_decision", +} +DECISION_EVENT_STATUSES = { + "planned", + "active", + "selected", + "executed", + "skipped", + "abandoned", + "rolled_back", + "superseded", + "deferred", + "blocked", +} +DECISION_EXPLANATION_REQUIRED_KINDS = {"branch_abandoned", "rollback", "superseded"} +THREAD_PLAN_EVENT_KINDS = {"plan", "phase", "branch", "decision", "progress", "change", "summary"} +THREAD_PLAN_MARKER = "THREAD_PLAN_OK" EFFECT_ASSERTION_KINDS = { "file_exists", "file_nonempty", @@ -520,6 +557,41 @@ def validate_effect_policy(project_root: Path) -> list[CheckResult]: return results +def parse_decision_policy(project_root: Path) -> dict[str, Any]: + policy_path = project_root / ".agent-os" / "decision-policy.yaml" + if not policy_path.exists(): + return { + "exists": False, + "strictness": "warn", + "downgrade_reason": "", + } + scalars = parse_scalar_values(policy_path, {"strictness", "downgrade_reason"}) + return { + "exists": True, + "strictness": (scalars.get("strictness") or "warn").lower(), + "downgrade_reason": scalars.get("downgrade_reason", ""), + } + + +def validate_decision_policy(project_root: Path) -> list[CheckResult]: + policy = parse_decision_policy(project_root) + strictness = str(policy.get("strictness", "warn")) + results = [ + CheckResult( + strictness in DECISION_STRICTNESS_LEVELS, + "decision_policy", + f"strictness={strictness}" if strictness in DECISION_STRICTNESS_LEVELS else f"invalid strictness={strictness}", + ) + ] + if not policy.get("exists"): + results.append(CheckResult(True, "decision_policy", "missing decision-policy defaults to strictness=warn")) + return results + if strictness == "off": + reason = str(policy.get("downgrade_reason", "")).strip() + results.append(CheckResult(bool(reason), "decision_policy", "strictness=off requires downgrade_reason")) + return results + + def parse_named_blocks(path: Path) -> list[dict[str, str]]: blocks: list[dict[str, str]] = [] current: dict[str, str] | None = None @@ -657,6 +729,13 @@ def load_write_policy(project_root: Path) -> dict[str, list[str]]: return parse_simple_list_sections(policy_path, WRITE_POLICY_SECTIONS) +def load_local_external_write_policy(project_root: Path) -> dict[str, list[str]]: + policy_path = project_root / ".knowledgeos-local" / "write-policy.local.yaml" + if not policy_path.exists(): + return {section: [] for section in LOCAL_EXTERNAL_WRITE_POLICY_SECTIONS} + return parse_simple_list_sections(policy_path, LOCAL_EXTERNAL_WRITE_POLICY_SECTIONS) + + def load_read_policy(project_root: Path) -> dict[str, list[str]]: policy_path = project_root / ".agent-os" / "read-policy.yaml" if not policy_path.exists(): @@ -945,6 +1024,7 @@ def route_output_match(relative: str, allowed_outputs: list[str]) -> str | None: def classify_write(project_root: Path, target: str) -> dict[str, Any]: policy = load_write_policy(project_root) + local_external_policy = load_local_external_write_policy(project_root) absolute, relative, inside = path_for_policy(project_root, target) absolute_value = absolute.as_posix() @@ -969,6 +1049,35 @@ def classify_write(project_root: Path, target: str) -> dict[str, Any]: } if not inside: + external_forbidden_match = matches_any( + absolute_value, + local_external_policy["external_forbidden_without_human_gate"], + ) + if external_forbidden_match: + return { + "decision": "human_gate_required", + "reason": f"matches external_forbidden_without_human_gate pattern {external_forbidden_match}", + "path": absolute_value, + "inside_project": False, + } + + external_controlled_match = matches_any( + absolute_value, + local_external_policy["external_controlled"], + ) + if external_controlled_match: + receipt_match = matches_any( + absolute_value, + local_external_policy["external_require_receipt_for"], + ) + return { + "decision": "allow", + "reason": f"matches external_controlled pattern {external_controlled_match}", + "path": absolute_value, + "inside_project": False, + "receipt_required": bool(receipt_match), + "receipt_pattern": receipt_match, + } return { "decision": "deny", "reason": "path is outside the project root and no explicit allow policy matched", @@ -1015,6 +1124,25 @@ def classify_route_write(project_root: Path, task_id: str, target: str) -> dict[ "route": route, } + if not write_decision.get("inside_project", True): + if str(route.get("allow_external_controlled", "")).lower() == "true": + return { + **write_decision, + "task_id": task_id, + "route_status": "allowed_by_external_route", + "route_output_match": "external_controlled", + "route": route, + } + return { + "decision": "route_output_denied", + "reason": f"path is allowed by local external write policy but task {task_id} route does not allow external controlled writes", + "path": write_decision["path"], + "inside_project": False, + "task_id": task_id, + "allowed_outputs": route.get("allowed_outputs", []), + "route": route, + } + allowed_outputs = route.get("allowed_outputs", []) if not allowed_outputs: return { @@ -1535,6 +1663,7 @@ def write_task_plan(project_root: Path, task_id: str, run_id: str, *, summary: s "- Record public lifecycle checkpoints with phase-task.", "- Record MCP, skill, subagent, orchestrator, or important script use with capability-event.", "- Complete only after eval-task, verify-lifecycle, and postflight pass.", + "- For medium, high, or complex tasks, include a readable FLOW_OK Mission Flow in the final answer.", "", ] write_text(run_dir / "plan.md", "\n".join(lines).rstrip() + "\n") @@ -1690,6 +1819,10 @@ def effect_assertions_path(run_dir: Path) -> Path: return run_dir / "effect-assertions.ndjson" +def decision_events_path(run_dir: Path) -> Path: + return run_dir / "decision-events.ndjson" + + def step_events_path(run_dir: Path) -> Path: return run_dir / "step-events.ndjson" @@ -1768,6 +1901,423 @@ def load_effect_assertions(run_dir: Path) -> list[dict[str, Any]]: return assertions +def load_decision_events(run_dir: Path) -> list[dict[str, Any]]: + path = decision_events_path(run_dir) + if not path.exists(): + return [] + events: list[dict[str, Any]] = [] + for line_number, raw in enumerate(read_text(path).splitlines(), start=1): + if not raw.strip(): + continue + try: + item = json.loads(raw) + except json.JSONDecodeError: + item = {"_invalid_json": raw, "_line": line_number} + events.append(item) + return events + + +def threads_root(project_root: Path) -> Path: + return project_root / ".agent-os" / "threads" + + +def current_thread_path(project_root: Path) -> Path: + return threads_root(project_root) / "current.json" + + +def thread_dir(project_root: Path, thread_id: str) -> Path: + return threads_root(project_root) / thread_id + + +def thread_plan_path(project_root: Path, thread_id: str) -> Path: + return thread_dir(project_root, thread_id) / "thread-plan.ndjson" + + +def thread_command_events_path(project_root: Path, thread_id: str) -> Path: + return thread_dir(project_root, thread_id) / "command-events.ndjson" + + +def thread_meta_path(project_root: Path, thread_id: str) -> Path: + return thread_dir(project_root, thread_id) / "thread.json" + + +def utc_event_stamp() -> str: + return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S%fZ") + + +def make_thread_id(project_root: Path, title: str) -> str: + base = f"THREAD-{datetime.now(timezone.utc).strftime('%Y%m%d-%H%M%S')}-{safe_slug(title)[:36]}" + candidate = base.rstrip("-") + suffix = 2 + while thread_dir(project_root, candidate).exists(): + candidate = f"{base}-{suffix}".rstrip("-") + suffix += 1 + return candidate + + +def append_thread_command_event(project_root: Path, thread_id: str, event_type: str, **extra: Any) -> None: + record = { + "event_type": event_type, + "thread_id": thread_id, + "generated_by": "knowledgeos", + "timestamp": datetime.now(timezone.utc).isoformat(), + **extra, + } + path = thread_command_events_path(project_root, thread_id) + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", encoding="utf-8") as handle: + handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n") + + +def load_thread_meta(project_root: Path, thread_id: str) -> dict[str, Any]: + path = thread_meta_path(project_root, thread_id) + if not path.exists(): + raise FileNotFoundError(f"missing thread metadata: {path}") + return json.loads(read_text(path)) + + +def load_current_thread(project_root: Path) -> dict[str, Any]: + path = current_thread_path(project_root) + if not path.exists(): + raise FileNotFoundError(f"missing current thread pointer: {path}") + current = json.loads(read_text(path)) + thread_id = str(current.get("thread_id", "")) + if not thread_id: + raise ValueError("current thread pointer is missing thread_id") + if not thread_dir(project_root, thread_id).exists(): + raise FileNotFoundError(f"current thread directory missing: {thread_id}") + return current + + +def load_thread_events(project_root: Path, thread_id: str) -> list[dict[str, Any]]: + path = thread_plan_path(project_root, thread_id) + if not path.exists(): + return [] + events: list[dict[str, Any]] = [] + for line_number, raw in enumerate(read_text(path).splitlines(), start=1): + if not raw.strip(): + continue + try: + item = json.loads(raw) + except json.JSONDecodeError: + item = {"_invalid_json": raw, "_line": line_number} + events.append(item) + return events + + +def write_thread_current(project_root: Path, meta: dict[str, Any]) -> None: + current = { + "thread_id": meta.get("thread_id", ""), + "title": meta.get("title", ""), + "spec_id": meta.get("spec_id", ""), + "created_at": meta.get("created_at", ""), + "last_updated_at": meta.get("last_updated_at", ""), + } + write_text(current_thread_path(project_root), json.dumps(current, indent=2, ensure_ascii=False) + "\n") + + +def update_thread_meta(project_root: Path, thread_id: str, **updates: Any) -> dict[str, Any]: + meta = load_thread_meta(project_root, thread_id) + meta.update(updates) + meta["last_updated_at"] = datetime.now(timezone.utc).isoformat() + write_text(thread_meta_path(project_root, thread_id), json.dumps(meta, indent=2, ensure_ascii=False) + "\n") + write_thread_current(project_root, meta) + return meta + + +def thread_event_label(text: str, limit: int = 52) -> str: + cleaned = " ".join(str(text).split()) + if len(cleaned) > limit: + cleaned = cleaned[: max(0, limit - 3)].rstrip() + "..." + return cleaned.replace('"', "'").replace("[", "(").replace("]", ")") + + +def append_thread_plan_event( + project_root: Path, + *, + thread_id: str, + kind: str, + text: str, + linked_spec_id: str = "", + linked_task_id: str = "", + linked_run_id: str = "", + parent_event_id: str = "", + event_type: str = "thread-plan append", +) -> dict[str, Any]: + if kind not in THREAD_PLAN_EVENT_KINDS: + raise ValueError(f"invalid thread-plan kind: {kind}") + if not text.strip(): + raise ValueError("thread-plan text is required") + if not thread_dir(project_root, thread_id).exists(): + raise FileNotFoundError(f"thread not found: {thread_id}") + event_id = f"TPE-{utc_event_stamp()}-{safe_slug(kind)}" + record = { + "event_id": event_id, + "thread_id": thread_id, + "kind": kind, + "text": text.strip(), + "linked_spec_id": linked_spec_id.strip(), + "linked_task_id": linked_task_id.strip(), + "linked_run_id": linked_run_id.strip(), + "parent_event_id": parent_event_id.strip(), + "timestamp": datetime.now(timezone.utc).isoformat(), + "append_only": True, + } + path = thread_plan_path(project_root, thread_id) + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", encoding="utf-8") as handle: + handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n") + update_thread_meta(project_root, thread_id) + append_thread_command_event( + project_root, + thread_id, + event_type, + event_id=event_id, + kind=kind, + linked_task_id=linked_task_id.strip(), + linked_run_id=linked_run_id.strip(), + linked_spec_id=linked_spec_id.strip(), + ) + render_thread_plan_markdown(project_root, thread_id) + marker = f"{THREAD_PLAN_MARKER} action=append thread={thread_id} kind={kind}" + return { + "status": "recorded", + "thread_plan_marker": THREAD_PLAN_MARKER, + "marker": marker, + "thread_id": thread_id, + "event_id": event_id, + "ledger": str(path), + "record": record, + } + + +def start_thread_plan(project_root: Path, *, title: str, spec_id: str = "") -> dict[str, Any]: + if not title.strip(): + raise ValueError("thread-plan title is required") + thread_id = make_thread_id(project_root, title) + created_at = datetime.now(timezone.utc).isoformat() + meta = { + "thread_id": thread_id, + "title": title.strip(), + "spec_id": spec_id.strip(), + "created_at": created_at, + "last_updated_at": created_at, + } + thread_dir(project_root, thread_id).mkdir(parents=True, exist_ok=True) + write_text(thread_meta_path(project_root, thread_id), json.dumps(meta, indent=2, ensure_ascii=False) + "\n") + write_thread_current(project_root, meta) + result = append_thread_plan_event( + project_root, + thread_id=thread_id, + kind="plan", + text=f"总体计划:{title.strip()}", + linked_spec_id=spec_id, + event_type="thread-plan start", + ) + result.update({"status": "started", "title": title.strip(), "spec_id": spec_id.strip()}) + result["marker"] = f"{THREAD_PLAN_MARKER} action=start thread={thread_id}" + return result + + +def render_thread_plan_mermaid(project_root: Path, thread_id: str) -> str: + meta = load_thread_meta(project_root, thread_id) + events = [event for event in load_thread_events(project_root, thread_id) if "_invalid_json" not in event] + lines = ["flowchart LR"] + lines.append(f' S["Start: {thread_event_label(str(meta.get("title", thread_id)))}"]') + previous = "S" + branch_anchor = "S" + if not events: + lines.append(' E["No plan notes yet"]') + lines.append(" S --> E") + for index, event in enumerate(events, start=1): + node = f"N{index}" + kind = str(event.get("kind", "summary")) + label = thread_event_label(str(event.get("text", ""))) + lines.append(f' {node}["{label}"]') + if kind == "branch": + lines.append(f" {branch_anchor} -.-> {node}") + else: + lines.append(f" {previous} --> {node}") + previous = node + branch_anchor = node + lines.extend( + [ + "", + " classDef start fill:#E8F3FF,stroke:#2B6CB0,color:#102A43;", + " classDef plan fill:#ECFEFF,stroke:#0891B2,color:#083344;", + " classDef phase fill:#E9FBEF,stroke:#2F855A,color:#123524;", + " classDef branch fill:#FFF7ED,stroke:#EA580C,color:#431407;", + " classDef decision fill:#F3E8FF,stroke:#6B46C1,color:#2D174D;", + " classDef progress fill:#DCFCE7,stroke:#15803D,color:#052E16;", + " class S start;", + ] + ) + classes: dict[str, list[str]] = {"plan": [], "phase": [], "branch": [], "decision": [], "progress": []} + for index, event in enumerate(events, start=1): + kind = str(event.get("kind", "summary")) + css_kind = kind if kind in classes else ("decision" if kind in {"change", "summary"} else "plan") + classes.setdefault(css_kind, []).append(f"N{index}") + for kind, nodes in classes.items(): + if nodes: + lines.append(f" class {','.join(nodes)} {kind};") + return "\n".join(lines) + + +def render_thread_plan_markdown(project_root: Path, thread_id: str) -> dict[str, Any]: + meta = load_thread_meta(project_root, thread_id) + events = [event for event in load_thread_events(project_root, thread_id) if "_invalid_json" not in event] + by_kind: dict[str, list[dict[str, Any]]] = {kind: [] for kind in THREAD_PLAN_EVENT_KINDS} + for event in events: + by_kind.setdefault(str(event.get("kind", "")), []).append(event) + current_line = next((event for event in reversed(events) if str(event.get("kind")) in {"progress", "phase", "decision", "change", "summary"}), None) + lines = [ + f"# Thread Plan {thread_id}", + "", + f"目标:{meta.get('title', '')}", + "", + f"当前工作线:{current_line.get('text', '还没有记录进展。') if current_line else '还没有记录进展。'}", + "", + "## 总体计划", + "", + ] + for event in by_kind.get("plan", []): + lines.append(f"- {event.get('text', '')}") + if not by_kind.get("plan"): + lines.append("- 尚未记录总体计划。") + lines.extend(["", "## Plan A / Plan B", ""]) + branch_events = by_kind.get("branch", []) + if branch_events: + for index, event in enumerate(branch_events, start=1): + label = chr(ord("A") + min(index - 1, 25)) + lines.append(f"- Plan {label}: {event.get('text', '')}") + else: + lines.append("- 当前没有分支计划。") + lines.extend(["", "## Phase A / Phase B", ""]) + phase_events = by_kind.get("phase", []) + if phase_events: + for index, event in enumerate(phase_events, start=1): + label = chr(ord("A") + min(index - 1, 25)) + lines.append(f"- Phase {label}: {event.get('text', '')}") + else: + lines.append("- 当前没有阶段拆分。") + lines.extend(["", "## 当前选择", ""]) + choice_events = by_kind.get("decision", []) + by_kind.get("change", []) + if choice_events: + for event in choice_events: + lines.append(f"- {event.get('text', '')}") + else: + lines.append("- 尚未记录改路或选择。") + lines.extend(["", "## 已完成", ""]) + progress_events = by_kind.get("progress", []) + if progress_events: + for event in progress_events: + lines.append(f"- {event.get('text', '')}") + else: + lines.append("- 尚未记录完成项。") + lines.extend(["", "## 下一步", ""]) + summary_events = by_kind.get("summary", []) + if summary_events: + lines.append(f"- {summary_events[-1].get('text', '')}") + else: + lines.append("- 等待下一轮计划推进。") + lines.extend(["", "## 关联证据", ""]) + linked = [ + event + for event in events + if event.get("linked_spec_id") or event.get("linked_task_id") or event.get("linked_run_id") + ] + if linked: + for event in linked: + parts = [str(event.get(key, "")) for key in ["linked_spec_id", "linked_task_id", "linked_run_id"] if event.get(key)] + lines.append(f"- {event.get('event_id', '')}: {' / '.join(parts)}") + else: + lines.append("- 尚未关联 spec/task/run。") + lines.extend(["", "## Mermaid", "", "```mermaid", render_thread_plan_mermaid(project_root, thread_id), "```", ""]) + source_path = thread_dir(project_root, thread_id) / "thread-plan.md" + markdown = "\n".join(lines).rstrip() + "\n" + write_text(source_path, markdown) + return { + "status": "rendered", + "thread_plan_marker": THREAD_PLAN_MARKER, + "marker": f"{THREAD_PLAN_MARKER} action=render thread={thread_id} format=markdown", + "thread_id": thread_id, + "format": "markdown", + "output": str(source_path), + "markdown": markdown, + "event_count": len(events), + } + + +def render_thread_plan_html(project_root: Path, thread_id: str) -> dict[str, Any]: + markdown_result = render_thread_plan_markdown(project_root, thread_id) + source_path = thread_plan_path(project_root, thread_id) + if not source_path.exists(): + raise FileNotFoundError(source_path) + source_sha = sha256_file(source_path) + mermaid = render_thread_plan_mermaid(project_root, thread_id) + body_html, _title, sections = markdown_to_html_fragment(markdown_result["markdown"], anchor_prefix=safe_slug(thread_id.lower())) + body_html = ( + "<details><summary>Visual plan map</summary>" + f"<pre><code>{html_escape(mermaid)}</code></pre>" + "</details>" + + body_html + ) + output_path = thread_dir(project_root, thread_id) / "thread-map.html" + fragment_path = output_path.with_suffix(".fragment.html") + manifest_path = output_path.with_suffix(".manifest.json") + generated_at = datetime.now(timezone.utc).isoformat() + source_rel = project_relative(project_root, source_path) + fragment_html = build_html_fragment( + kind="thread-plan", + title=f"Thread Plan {thread_id}", + source_rel=source_rel, + source_sha=source_sha, + run_id="not run-bound", + body_html=body_html, + sections=sections, + generated_at=generated_at, + ) + write_text(fragment_path, fragment_html) + full_html = build_html_document( + title=f"Thread Plan {thread_id}", + kind="thread-plan", + body=fragment_html, + source_rel=source_rel, + source_sha=source_sha, + run_id="not run-bound", + generated_at=generated_at, + theme=HTML_DEFAULT_THEME, + ) + write_text(output_path, full_html) + manifest = { + "schema_version": "knowledgeos.html-report.v1", + "kind": "thread-plan", + "title": f"Thread Plan {thread_id}", + "thread_id": thread_id, + "source": source_rel, + "source_sha256": source_sha, + "output": project_relative(project_root, output_path), + "fragment": project_relative(project_root, fragment_path), + "sections": sections, + "generated_at": generated_at, + "html_source_of_truth": False, + "notice": HTML_SOURCE_TRUTH_NOTICE, + } + write_text(manifest_path, json.dumps(manifest, indent=2, ensure_ascii=False) + "\n") + return { + "status": "rendered", + "thread_plan_marker": THREAD_PLAN_MARKER, + "marker": f"{THREAD_PLAN_MARKER} action=render thread={thread_id} format=html", + "thread_id": thread_id, + "format": "html", + "output": str(output_path), + "manifest": str(manifest_path), + "source": str(source_path), + "source_sha256": source_sha, + "html_source_of_truth": False, + } + + def short_marker_value(value: str, limit: int = 96) -> str: cleaned = " ".join(value.strip().split()) if len(cleaned) <= limit: @@ -2447,6 +2997,7 @@ def deep_validate_project(project_root: Path, *, allow_placeholders: bool = Fals phases = parse_simple_list_sections(agent_os / "fabric-link.yaml", {"phase_keys"}).get("phase_keys", []) results.append(CheckResult(phases == EXPECTED_PHASE_KEYS, "phase_keys", f"{phases}")) results.extend(validate_phase_policy(project_root)) + results.extend(validate_decision_policy(project_root)) results.extend(validate_effect_policy(project_root)) policy = load_write_policy(project_root) @@ -2649,6 +3200,7 @@ def build_agent_guide(project_root: Path) -> str: " - .agent-os/tasks.yaml", " - .agent-os/specs.yaml", " - .agent-os/phase-policy.yaml", + " - .agent-os/decision-policy.yaml", " - .agent-os/effect-policy.yaml", " - .agent-os/decisions.yaml", " - .agent-os/evals.yaml", @@ -2661,6 +3213,8 @@ def build_agent_guide(project_root: Path) -> str: f" - {bin_path} tool-registry --project-root {project_root}", f" - {bin_path} create-spec --project-root {project_root} --title <title> # when the user asks to create/align spec", f" - {bin_path} align-spec --project-root {project_root} --task-id <task-id>", + f" - {bin_path} thread-plan current --project-root {project_root} # restore the chat-level plan when one exists; relay THREAD_PLAN_OK", + f" - {bin_path} thread-plan start --project-root {project_root} --title <natural-language-goal> # when a long-lived plan/spec starts; relay THREAD_PLAN_OK", f" - {bin_path} create-task --project-root {project_root} --title <title> --type <type> --output <path> --acceptance <check>", f" - {bin_path} route-task --project-root {project_root} --task-id <task-id>", f" - {bin_path} dispatch-task --project-root {project_root} --task-id <task-id>", @@ -2682,12 +3236,17 @@ def build_agent_guide(project_root: Path) -> str: f" - {bin_path} trace-step --project-root {project_root} --task-id <task-id> --run-id <run-id> --step <step> --note <public-trace> --evidence <evidence>; echo or relay TRACE_OK;", f" - {bin_path} phase-task --project-root {project_root} --task-id <task-id> --run-id <run-id> --phase <phase> --status completed --note <public-trace> --evidence <evidence>; echo or relay CHECKPOINT_OK;", f" - {bin_path} capability-event --project-root {project_root} --task-id <task-id> --run-id <run-id> --kind <kind> --id <capability-id> --purpose <purpose> before/after MCP, skill, subagent, orchestrator, or important script use; echo or relay CAPABILITY_OK;", + f" - {bin_path} decision-event --project-root {project_root} --task-id <task-id> --run-id <run-id> --kind <kind> --title <title> --summary <summary> --reason <reason> --evidence <evidence>; echo or relay DECISION_OK when the plan branches, changes, rolls back, or abandons a route;", + f" - {bin_path} thread-plan append --project-root {project_root} --thread-id <thread-id> --kind <plan|phase|branch|decision|progress|change|summary> --text <natural-language-note> when the chat-level plan changes or advances; relay THREAD_PLAN_OK;", + f" - {bin_path} thread-plan link-run --project-root {project_root} --thread-id <thread-id> --task-id <task-id> --run-id <run-id> to connect a run to the long-lived plan;", f" - {bin_path} artifact-assert --project-root {project_root} --task-id <task-id> --run-id <run-id> --kind <kind> --path <artifact>; echo or relay EFFECT_OK;", f" - {bin_path} eval-task --project-root {project_root} --task-id <task-id> --run-id <run-id>;", f" - {bin_path} verify-context --project-root {project_root} --task-id <task-id> --run-id <run-id>;", f" - {bin_path} verify-lifecycle --project-root {project_root} --task-id <task-id> --run-id <run-id>;", f" - {bin_path} verify-effects --project-root {project_root} --task-id <task-id> --run-id <run-id>; echo or relay EFFECT_VERIFY_OK;", + f" - {bin_path} verify-decisions --project-root {project_root} --task-id <task-id> --run-id <run-id>; echo or relay DECISION_VERIFY_OK;", f" - {bin_path} complete-task --project-root {project_root} --task-id <task-id> --run-id <run-id> --summary <summary>", + f" - for medium, high, or complex tasks, include the returned FLOW_OK Mermaid Mission Flow; if needed run {bin_path} flow-summary --project-root {project_root} --run-id <run-id>.", "", "7. For shared-fabric hosts, finish with canonical postflight.", " - report [SYNC_OK] only after postflight succeeds.", @@ -2717,30 +3276,33 @@ def build_startup_prompt(project_root: Path) -> str: "Before substantial work:", "", "1. Read `AGENTS.md`.", - "2. Read `.agent-os/workspace.yaml`, `.agent-os/project.yaml`, `.agent-os/tasks.yaml`, `.agent-os/specs.yaml`, `.agent-os/phase-policy.yaml`, `.agent-os/effect-policy.yaml`, `.agent-os/decisions.yaml`, `.agent-os/evals.yaml`, `.agent-os/fabric-link.yaml`, `.agent-os/read-policy.yaml`, `.agent-os/write-policy.yaml`, `.agent-os/dispatch-policy.yaml`, and `.agent-os/tool-registry.yaml`.", + "2. Read `.agent-os/workspace.yaml`, `.agent-os/project.yaml`, `.agent-os/tasks.yaml`, `.agent-os/specs.yaml`, `.agent-os/phase-policy.yaml`, `.agent-os/decision-policy.yaml`, `.agent-os/effect-policy.yaml`, `.agent-os/decisions.yaml`, `.agent-os/evals.yaml`, `.agent-os/fabric-link.yaml`, `.agent-os/read-policy.yaml`, `.agent-os/write-policy.yaml`, `.agent-os/dispatch-policy.yaml`, and `.agent-os/tool-registry.yaml`.", f"3. Run `{bin_path} doctor --project-root {project_root} --summary` and do not proceed if it fails.", f"4. If the user says `create spec`, `align spec`, `对齐spec`, or equivalent, run `{bin_path} create-spec --project-root {project_root} --title \"<title>\"` or `{bin_path} align-spec --project-root {project_root} --task-id <task-id>` before execution.", - f"5. Select or confirm one task id from `.agent-os/tasks.yaml`; if the user asks for new work and no ready task fits, run `{bin_path} create-task --project-root {project_root} --title \"<title>\" --type <type> --output <path> --acceptance \"<check>\"`.", - f"6. Run `{bin_path} route-task --project-root {project_root} --task-id <task-id>`.", - f"7. Run `{bin_path} dispatch-task --project-root {project_root} --task-id <task-id>` before invoking subagents, MCP tools, skills, workflows, or scripts.", - f"8. Before planned mutation, run `{bin_path} check-route-write --project-root {project_root} --task-id <task-id> --path <planned-path>`.", - f"9. Create run evidence with `{bin_path} run-task --project-root {project_root} --task-id <task-id>`; this writes `spec-snapshot.md` and `context-pack.md`.", - f"10. Write/update the execution context with `{bin_path} context-pack --project-root {project_root} --task-id <task-id> --run-id <run-id>` and `{bin_path} plan-task --project-root {project_root} --task-id <task-id> --run-id <run-id>`.", - "11. Pause at consultation checkpoints, state your recommended next move, name the tradeoff, and ask the human whether to proceed.", - f"12. Record dispatch evidence with `{bin_path} dispatch-task --project-root {project_root} --task-id <task-id> --run-id <run-id>` after the run exists.", - f"13. Record public operational progress with `{bin_path} trace-step --project-root {project_root} --task-id <task-id> --run-id <run-id> --step <step> --note \"<public trace>\" --evidence \"<command/file/user confirmation>\"`; relay the returned `TRACE_OK` marker.", - f"14. Record public phase evidence with `{bin_path} phase-task --project-root {project_root} --task-id <task-id> --run-id <run-id> --phase <route|plan|review|dispatch|execute|report> --status completed --note \"<public trace>\" --evidence \"<command/file/user confirmation>\"`; relay the returned `CHECKPOINT_OK` marker.", - f"15. Record MCP, skill, subagent, orchestrator, or important script use with `{bin_path} capability-event --project-root {project_root} --task-id <task-id> --run-id <run-id> --kind <kind> --id <capability-id> --purpose \"<purpose>\"`; relay the returned `CAPABILITY_OK` marker.", - f"16. Verify real side effects with `{bin_path} artifact-assert --project-root {project_root} --task-id <task-id> --run-id <run-id> --kind <kind> --path <artifact>`; relay the returned `EFFECT_OK` marker.", - f"17. Run `{bin_path} eval-task --project-root {project_root} --task-id <task-id> --run-id <run-id>`; do not manually append eval status.", - f"18. Run `{bin_path} verify-context --project-root {project_root} --task-id <task-id> --run-id <run-id>`, `{bin_path} verify-lifecycle --project-root {project_root} --task-id <task-id> --run-id <run-id>`, and `{bin_path} verify-effects --project-root {project_root} --task-id <task-id> --run-id <run-id>`; relay the returned `EFFECT_VERIFY_OK` marker before claiming effect verification success.", - f"19. Use `{bin_path} complete-task --project-root {project_root} --task-id <task-id> --run-id <run-id> --summary \"<summary>\"`; it must enforce spec/context/plan, lifecycle, capability visibility, effect verification, and required postflight.", - "20. If a shared-fabric postflight hook is configured, report `[SYNC_OK]` only after `complete-task` returns `sync_status: SYNC_OK`.", - f"21. For reset requests, run `{bin_path} reset-project --project-root {project_root} --mode <soft|hard> --dry-run` before destructive action.", - f"22. For old-project reorganization requests, run `{bin_path} migrate-legacy-project --project-root {project_root} --write-plan` before moving files.", - f"23. For historical/superseded files that should be stored but not read by default, run `{bin_path} archive-legacy-project --project-root {project_root} --write-plan` before moving files into `archive/`.", + f"5. If the user starts a durable plan/spec conversation, run `{bin_path} thread-plan current --project-root {project_root}` or `{bin_path} thread-plan start --project-root {project_root} --title \"<natural language goal>\"`; append natural-language progress with `{bin_path} thread-plan append --project-root {project_root} --thread-id <thread-id> --kind <kind> --text \"<plain note>\"` when the plan changes or advances, and relay `THREAD_PLAN_OK`.", + f"6. Select or confirm one task id from `.agent-os/tasks.yaml`; if the user asks for new work and no ready task fits, run `{bin_path} create-task --project-root {project_root} --title \"<title>\" --type <type> --output <path> --acceptance \"<check>\"`.", + f"7. Run `{bin_path} route-task --project-root {project_root} --task-id <task-id>`.", + f"8. Run `{bin_path} dispatch-task --project-root {project_root} --task-id <task-id>` before invoking subagents, MCP tools, skills, workflows, or scripts.", + f"9. Before planned mutation, run `{bin_path} check-route-write --project-root {project_root} --task-id <task-id> --path <planned-path>`.", + f"10. Create run evidence with `{bin_path} run-task --project-root {project_root} --task-id <task-id>`; this writes `spec-snapshot.md` and `context-pack.md`.", + f"11. Write/update the execution context with `{bin_path} context-pack --project-root {project_root} --task-id <task-id> --run-id <run-id>` and `{bin_path} plan-task --project-root {project_root} --task-id <task-id> --run-id <run-id>`.", + "12. Pause at consultation checkpoints, state your recommended next move, name the tradeoff, and ask the human whether to proceed.", + f"13. Record dispatch evidence with `{bin_path} dispatch-task --project-root {project_root} --task-id <task-id> --run-id <run-id>` after the run exists.", + f"14. Record public operational progress with `{bin_path} trace-step --project-root {project_root} --task-id <task-id> --run-id <run-id> --step <step> --note \"<public trace>\" --evidence \"<command/file/user confirmation>\"`; relay the returned `TRACE_OK` marker.", + f"15. Record public phase evidence with `{bin_path} phase-task --project-root {project_root} --task-id <task-id> --run-id <run-id> --phase <route|plan|review|dispatch|execute|report> --status completed --note \"<public trace>\" --evidence \"<command/file/user confirmation>\"`; relay the returned `CHECKPOINT_OK` marker.", + f"16. Record MCP, skill, subagent, orchestrator, or important script use with `{bin_path} capability-event --project-root {project_root} --task-id <task-id> --run-id <run-id> --kind <kind> --id <capability-id> --purpose \"<purpose>\"`; relay the returned `CAPABILITY_OK` marker.", + f"17. Record public decision changes with `{bin_path} decision-event --project-root {project_root} --task-id <task-id> --run-id <run-id> --kind <kind> --title \"<title>\" --summary \"<summary>\" --reason \"<reason>\" --evidence \"<evidence>\"`; relay the returned `DECISION_OK` marker when plans branch, change, roll back, or abandon a route.", + f"18. Verify real side effects with `{bin_path} artifact-assert --project-root {project_root} --task-id <task-id> --run-id <run-id> --kind <kind> --path <artifact>`; relay the returned `EFFECT_OK` marker.", + f"19. Run `{bin_path} eval-task --project-root {project_root} --task-id <task-id> --run-id <run-id>`; do not manually append eval status.", + f"20. Run `{bin_path} verify-context --project-root {project_root} --task-id <task-id> --run-id <run-id>`, `{bin_path} verify-lifecycle --project-root {project_root} --task-id <task-id> --run-id <run-id>`, `{bin_path} verify-effects --project-root {project_root} --task-id <task-id> --run-id <run-id>`, and `{bin_path} verify-decisions --project-root {project_root} --task-id <task-id> --run-id <run-id>`; relay `EFFECT_VERIFY_OK` and `DECISION_VERIFY_OK` before claiming verification success.", + f"21. Use `{bin_path} complete-task --project-root {project_root} --task-id <task-id> --run-id <run-id> --summary \"<summary>\"`; it must enforce spec/context/plan, lifecycle, capability visibility, effect verification, decision verification, and required postflight.", + f"22. For medium, high, or complex tasks, include the returned `FLOW_OK` Mermaid Mission Flow in the final answer; if needed, run `{bin_path} flow-summary --project-root {project_root} --run-id <run-id>`.", + "23. If a shared-fabric postflight hook is configured, report `[SYNC_OK]` only after `complete-task` returns `sync_status: SYNC_OK`.", + f"24. For reset requests, run `{bin_path} reset-project --project-root {project_root} --mode <soft|hard> --dry-run` before destructive action.", + f"25. For old-project reorganization requests, run `{bin_path} migrate-legacy-project --project-root {project_root} --write-plan` before moving files.", + f"26. For historical/superseded files that should be stored but not read by default, run `{bin_path} archive-legacy-project --project-root {project_root} --write-plan` before moving files into `archive/`.", "", - "Never claim boot, route, dispatch, write safety, spec alignment, context pack, plan, trace, checkpoint, capability, effect, eval, completion, or sync success without command evidence.", + "Never claim boot, route, dispatch, write safety, spec alignment, thread plan, context pack, plan, trace, checkpoint, capability, decision, effect, eval, completion, flow, or sync success without command evidence.", "", ] ) @@ -3572,50 +4134,308 @@ def record_trace_step( return {"status": "recorded", "trace_marker": "TRACE_OK", "marker": marker, "ledger": str(event_path), "record": record} -def get_json_key(data: Any, key_path: str) -> Any: - current = data - for part in key_path.split("."): - if not part: - raise ValueError("--json-key must not contain empty path segments") - if isinstance(current, dict) and part in current: - current = current[part] - continue - raise ValueError(f"json key not found: {key_path}") - return current - - -def html_has_remote_dependency(text: str) -> bool: - patterns = [ - r"<script\b[^>]*\bsrc\s*=\s*['\"]?\s*(?:https?:)?//", - r"<link\b[^>]*\bhref\s*=\s*['\"]?\s*(?:https?:)?//", - r"<img\b[^>]*\bsrc\s*=\s*['\"]?\s*(?:https?:)?//", - r"@import\s+(?:url\()?['\"]?\s*(?:https?:)?//", - r"url\(['\"]?\s*(?:https?:)?//", - ] - return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns) +def effect_assertion_id_exists(run_dir: Path, assertion_id: str, task_id: str, run_id: str) -> bool: + return any( + assertion.get("assertion_id") == assertion_id + and assertion.get("task_id") == task_id + and assertion.get("run_id") == run_id + and assertion.get("status") == "passed" + for assertion in load_effect_assertions(run_dir) + if "_invalid_json" not in assertion + ) -def run_artifact_assertion( +def record_decision_event( project_root: Path, task_id: str, run_id: str, *, kind: str, - target_path: str, - expect: str = "", - before_sha: str = "", - json_key: str = "", - capability_event_id: str = "", + status: str, + title: str, + summary: str, + reason: str, + evidence: str, + parent_id: str = "", + options: list[str] | None = None, + chosen: str = "", + linked_step: str = "", + linked_capability_event_id: str = "", + linked_effect_assertion_id: str = "", ) -> dict[str, Any]: run_dir = ensure_run_belongs_to_task(project_root, task_id, run_id) - if kind not in EFFECT_ASSERTION_KINDS: - raise ValueError(f"invalid artifact assertion kind {kind!r}; expected one of {sorted(EFFECT_ASSERTION_KINDS)}") - if not target_path.strip(): - raise ValueError("--path is required") - capability_link = capability_event_id.strip() + if kind not in DECISION_EVENT_KINDS: + raise ValueError(f"invalid decision kind {kind!r}; expected one of {sorted(DECISION_EVENT_KINDS)}") + if status not in DECISION_EVENT_STATUSES: + raise ValueError(f"invalid decision status {status!r}; expected one of {sorted(DECISION_EVENT_STATUSES)}") + for label, value in [("--title", title), ("--summary", summary), ("--reason", reason), ("--evidence", evidence)]: + if not value.strip(): + raise ValueError(f"{label} is required") + if linked_step and linked_step not in OPERATIONAL_TRACE_STEPS: + raise ValueError(f"invalid linked step {linked_step!r}; expected one of {OPERATIONAL_TRACE_STEPS}") + capability_link = linked_capability_event_id.strip() if capability_link and not capability_event_id_exists(run_dir, capability_link, task_id, run_id): - raise ValueError(f"capability event not found for assertion: {capability_link}") - target = resolve_project_artifact(project_root, target_path) + raise ValueError(f"capability event not found for decision: {capability_link}") + effect_link = linked_effect_assertion_id.strip() + if effect_link and not effect_assertion_id_exists(run_dir, effect_link, task_id, run_id): + raise ValueError(f"effect assertion not found for decision: {effect_link}") + + decision_id = ( + "DEC-" + + datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S%fZ") + + "-" + + safe_slug(f"{kind}-{title.strip()}")[:48] + ) + clean_options = [item.strip() for item in (options or []) if item.strip()] + record = { + "decision_id": decision_id, + "parent_id": parent_id.strip(), + "task_id": task_id, + "run_id": run_id, + "kind": kind, + "status": status, + "title": title.strip(), + "summary": summary.strip(), + "reason": reason.strip(), + "options": clean_options, + "chosen": chosen.strip(), + "evidence": evidence.strip(), + "linked_step": linked_step.strip(), + "linked_capability_event_id": capability_link, + "linked_effect_assertion_id": effect_link, + "timestamp": datetime.now(timezone.utc).isoformat(), + } + event_path = decision_events_path(run_dir) + with event_path.open("a", encoding="utf-8") as handle: + handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n") + append_command_event( + run_dir, + "decision-event", + task_id, + run_id, + decision_id=decision_id, + kind=kind, + status=status, + ) + marker = f"DECISION_OK kind={kind} status={status} title={short_marker_value(title)}" + return { + "status": "recorded", + "decision_marker": "DECISION_OK", + "decision_id": decision_id, + "marker": marker, + "ledger": str(event_path), + "record": record, + } + + +def query_decision_events( + project_root: Path, + *, + run_id: str, + task_id: str = "", + kind: str = "", + status: str = "", + parent_id: str = "", +) -> dict[str, Any]: + run_dir = resolve_run_dir(project_root, run_id) + events = [event for event in load_decision_events(run_dir) if "_invalid_json" not in event] + if task_id: + events = [event for event in events if event.get("task_id") == task_id] + if kind: + events = [event for event in events if event.get("kind") == kind] + if status: + events = [event for event in events if event.get("status") == status] + if parent_id: + events = [event for event in events if event.get("parent_id") == parent_id] + return { + "status": "ok", + "run_id": run_id, + "count": len(events), + "events": events, + "ledger": str(decision_events_path(run_dir)), + } + + +def decision_event_has_command_event(run_dir: Path, event: dict[str, Any], task_id: str, run_id: str) -> bool: + return has_command_event( + run_dir, + "decision-event", + task_id, + run_id, + decision_id=str(event.get("decision_id", "")), + kind=str(event.get("kind", "")), + status=str(event.get("status", "")), + ) + + +def build_decision_verify_marker(result: dict[str, Any]) -> str: + return ( + "DECISION_VERIFY_OK " + f"status={result.get('status', '')} " + f"strictness={result.get('strictness', '')} " + f"decisions={len(result.get('events', []))} " + f"warnings={len(result.get('warnings', []))} " + f"errors={len(result.get('errors', []))}" + ) + + +def verify_decisions(project_root: Path, task_id: str, run_id: str) -> dict[str, Any]: + policy = parse_decision_policy(project_root) + strictness = str(policy.get("strictness", "warn")) + if strictness not in DECISION_STRICTNESS_LEVELS: + raise ValueError(f"invalid decision strictness: {strictness}") + run_dir = ensure_run_belongs_to_task(project_root, task_id, run_id) + errors: list[dict[str, Any]] = [] + warnings: list[dict[str, Any]] = [] + + if strictness == "off": + reason = str(policy.get("downgrade_reason", "")).strip() + if not reason: + errors.append({"label": "missing_downgrade_reason", "detail": "strictness=off requires downgrade_reason"}) + result = { + "status": "failed" if errors else "disabled", + "task_id": task_id, + "run_id": run_id, + "strictness": strictness, + "downgrade_reason": reason, + "events": [], + "errors": errors, + "warnings": warnings, + "ledger": str(decision_events_path(run_dir)), + } + result["decision_verify_marker"] = "DECISION_VERIFY_OK" + result["marker"] = build_decision_verify_marker(result) + append_command_event( + run_dir, + "verify-decisions", + task_id, + run_id, + status=str(result["status"]), + strictness=strictness, + decision_verify_marker=result["decision_verify_marker"], + marker=result["marker"], + ) + return result + + raw_events = load_decision_events(run_dir) + valid_events: list[dict[str, Any]] = [] + seen_ids: set[str] = set() + duplicate_ids: set[str] = set() + parent_ids: set[str] = set() + for event in raw_events: + if "_invalid_json" in event: + errors.append({"label": "invalid_decision_event", "detail": f"line {event.get('_line')} is not JSON"}) + continue + if event.get("task_id") != task_id or event.get("run_id") != run_id: + errors.append({"label": "decision_scope_mismatch", "detail": event}) + continue + decision_id = str(event.get("decision_id", "")).strip() + if not decision_id: + errors.append({"label": "decision_missing_id", "detail": event.get("title", "")}) + continue + if decision_id in seen_ids: + duplicate_ids.add(decision_id) + seen_ids.add(decision_id) + for field in ["title", "summary", "reason", "evidence"]: + if not str(event.get(field, "")).strip(): + errors.append({"label": f"decision_missing_{field}", "detail": decision_id}) + kind = str(event.get("kind", "")) + status = str(event.get("status", "")) + if kind not in DECISION_EVENT_KINDS: + errors.append({"label": "invalid_decision_kind", "detail": kind}) + if status not in DECISION_EVENT_STATUSES: + errors.append({"label": "invalid_decision_status", "detail": status}) + if (kind in DECISION_EXPLANATION_REQUIRED_KINDS or status in {"abandoned", "rolled_back", "superseded"}) and not str(event.get("reason", "")).strip(): + errors.append({"label": "decision_missing_explanation", "detail": decision_id}) + if not decision_event_has_command_event(run_dir, event, task_id, run_id): + errors.append({"label": "decision_missing_command_event", "detail": decision_id}) + parent = str(event.get("parent_id", "")).strip() + if parent: + parent_ids.add(parent) + valid_events.append(event) + for decision_id in sorted(duplicate_ids): + errors.append({"label": "decision_duplicate_id", "detail": decision_id}) + for parent in sorted(parent_ids): + if parent not in seen_ids: + errors.append({"label": "decision_orphan_parent", "detail": parent}) + + if not valid_events: + if strictness == "enforce": + errors.append({"label": "no_decision_events", "detail": "decision-policy strictness=enforce requires decision evidence"}) + else: + warnings.append({"label": "no_decision_events", "detail": "no decision graph events were recorded"}) + + status = "failed" if errors else ("warning" if warnings else "passed") + result = { + "status": status, + "task_id": task_id, + "run_id": run_id, + "strictness": strictness, + "downgrade_reason": str(policy.get("downgrade_reason", "")).strip(), + "events": valid_events, + "errors": errors, + "warnings": warnings, + "ledger": str(decision_events_path(run_dir)), + } + result["decision_verify_marker"] = "DECISION_VERIFY_OK" + result["marker"] = build_decision_verify_marker(result) + append_command_event( + run_dir, + "verify-decisions", + task_id, + run_id, + status=status, + strictness=strictness, + decision_verify_marker=result["decision_verify_marker"], + marker=result["marker"], + ) + return result + + +def get_json_key(data: Any, key_path: str) -> Any: + current = data + for part in key_path.split("."): + if not part: + raise ValueError("--json-key must not contain empty path segments") + if isinstance(current, dict) and part in current: + current = current[part] + continue + raise ValueError(f"json key not found: {key_path}") + return current + + +def html_has_remote_dependency(text: str) -> bool: + patterns = [ + r"<script\b[^>]*\bsrc\s*=\s*['\"]?\s*(?:https?:)?//", + r"<link\b[^>]*\bhref\s*=\s*['\"]?\s*(?:https?:)?//", + r"<img\b[^>]*\bsrc\s*=\s*['\"]?\s*(?:https?:)?//", + r"@import\s+(?:url\()?['\"]?\s*(?:https?:)?//", + r"url\(['\"]?\s*(?:https?:)?//", + ] + return any(re.search(pattern, text, flags=re.IGNORECASE) for pattern in patterns) + + +def run_artifact_assertion( + project_root: Path, + task_id: str, + run_id: str, + *, + kind: str, + target_path: str, + expect: str = "", + before_sha: str = "", + json_key: str = "", + capability_event_id: str = "", +) -> dict[str, Any]: + run_dir = ensure_run_belongs_to_task(project_root, task_id, run_id) + if kind not in EFFECT_ASSERTION_KINDS: + raise ValueError(f"invalid artifact assertion kind {kind!r}; expected one of {sorted(EFFECT_ASSERTION_KINDS)}") + if not target_path.strip(): + raise ValueError("--path is required") + capability_link = capability_event_id.strip() + if capability_link and not capability_event_id_exists(run_dir, capability_link, task_id, run_id): + raise ValueError(f"capability event not found for assertion: {capability_link}") + target = resolve_project_artifact(project_root, target_path) relative = project_relative(project_root, target) evidence = "" @@ -4397,8 +5217,14 @@ def audit_project_mount( ) current_governance = fabric.get("governance_root", "") current_capability = fabric.get("capability_root") or fabric.get("implementation_root", "") - if "Antigravity_Skills" in current_governance: - issues.append("legacy_antigravity_governance_root") + current_governance_path = Path(current_governance).expanduser() if current_governance else None + if ( + current_governance_path + and project_root != root + and current_governance_path.name == "global-agent-fabric" + and current_governance_path != governance_root + ): + issues.append("legacy_preos_governance_root") if current_governance and Path(current_governance).expanduser() != governance_root and project_root != root: issues.append("noncanonical_governance_root") if current_capability and Path(current_capability).expanduser() != capability_root and project_root != root: @@ -4419,7 +5245,7 @@ def audit_project_mount( issues.append("missing_archive_write_guard") should_rewrite_mount = project_root != root and ( - "legacy_antigravity_governance_root" in issues + "legacy_preos_governance_root" in issues or "noncanonical_governance_root" in issues or "noncanonical_capability_root" in issues or "postflight_hook_missing_or_not_executable" in issues @@ -4637,8 +5463,19 @@ def complete_task( effects = verify_effects(project_root, task_id, run_id) if effects.get("status") == "failed": raise ValueError("effect verification failed: " + json.dumps(effects.get("errors", []), ensure_ascii=False)) + decisions = verify_decisions(project_root, task_id, run_id) + if decisions.get("status") == "failed": + raise ValueError("decision verification failed: " + json.dumps(decisions.get("errors", []), ensure_ascii=False)) postflight = run_postflight_gate(project_root, run_dir, summary, allow_pending_postflight) + mission_flow: dict[str, Any] | None = None + if task.get("complexity", "medium") in MISSION_FLOW_COMPLEXITIES: + mission_flow = write_mission_flow_markdown( + project_root, + task_id, + run_id, + sync_status=str(postflight.get("sync_status", "")), + ) completed_at = datetime.now(timezone.utc).isoformat() receipt_lines = [ "# Receipt", @@ -4661,6 +5498,14 @@ def complete_task( "", f"Effect Verify Marker: {effects.get('marker', '')}", "", + f"Decision Verification Status: {decisions.get('status')}", + "", + f"Decision Verify Marker: {decisions.get('marker', '')}", + "", + f"Mission Flow Marker: {mission_flow.get('marker', '') if mission_flow else 'not required'}", + "", + f"Mission Flow: {mission_flow.get('source', '') if mission_flow else 'not required for simple task'}", + "", f"Sync Status: {postflight.get('sync_status')}", "", ] @@ -4682,6 +5527,24 @@ def complete_task( "", ] ) + if decisions.get("strictness") == "off": + receipt_lines.extend( + [ + "DECISION_STRICTNESS_DOWNGRADED:", + "", + str(decisions.get("downgrade_reason", "")), + "", + ] + ) + if decisions.get("warnings"): + receipt_lines.extend( + [ + "Decision Warnings:", + "", + json.dumps(decisions.get("warnings", []), ensure_ascii=False), + "", + ] + ) if postflight.get("status_marker"): receipt_lines.extend([f"Status Marker: {postflight['status_marker']}", ""]) if postflight.get("pending_reason"): @@ -4726,6 +5589,12 @@ def complete_task( "context_contract_status": context_contract.get("status"), "effect_status": effects.get("status"), "effect_verify_marker": effects.get("marker", ""), + "decision_status": decisions.get("status"), + "decision_verify_marker": decisions.get("marker", ""), + "flow_marker": mission_flow.get("flow_marker", "") if mission_flow else "", + "flow_summary_marker": mission_flow.get("marker", "") if mission_flow else "", + "flow_mermaid": mission_flow.get("mermaid", "") if mission_flow else "", + "flow_source": mission_flow.get("source", "") if mission_flow else "", "sync_status": postflight.get("sync_status"), "status_marker": postflight.get("status_marker", ""), "postflight": postflight.get("postflight", ""), @@ -4734,9 +5603,10 @@ def complete_task( } -HTML_REPORT_KINDS = {"receipt", "handoff", "rich-report"} +HTML_REPORT_KINDS = {"receipt", "handoff", "rich-report", "decision-map", "mission-flow"} HTML_DEFAULT_THEME = "knowledgeos-default" HTML_SOURCE_TRUTH_NOTICE = "HTML is presentation, not source of truth." +MISSION_FLOW_COMPLEXITIES = {"medium", "high", "complex"} def sha256_file(path: Path) -> str: @@ -4915,6 +5785,20 @@ def html_report_css(theme: str) -> str: summary {{ cursor: pointer; font-weight: 800; }} .kos-footer {{ margin-top: 28px; color: var(--kos-muted); font-size: 0.9rem; text-align: center; }} .kos-notice {{ color: var(--kos-warn); font-weight: 800; }} +.kos-flow {{ display: grid; gap: 14px; margin: 20px 0; }} +.kos-flow-card {{ + border: 1px solid var(--kos-line); + border-radius: 18px; + padding: 16px 18px; + background: #f8fbff; +}} +.kos-flow-card h3 {{ margin: 0 0 6px; }} +.kos-flow-card p {{ margin: 0; }} +.kos-flow-intent {{ background: #e8f3ff; border-color: #b8d9ff; }} +.kos-flow-guard {{ background: #fff7df; border-color: #f3d58a; }} +.kos-flow-work {{ background: #eafbf0; border-color: #b8e7c7; }} +.kos-flow-proof {{ background: #f3e8ff; border-color: #d8b4fe; }} +.kos-flow-done {{ background: #dcfce7; border-color: #86efac; }} @media (max-width: 860px) {{ .kos-shell {{ padding: 28px 14px; }} .kos-grid {{ grid-template-columns: 1fr; }} @@ -5103,6 +5987,459 @@ def render_html_sidecar( } +def decision_events_to_markdown(run_id: str, events: list[dict[str, Any]]) -> str: + lines = [ + f"# KnowledgeOS Decision Map {run_id}", + "", + "This sidecar summarizes public Decision Graph events. It is not hidden chain-of-thought.", + "", + "## Main Path", + "", + ] + main_statuses = {"planned", "active", "selected", "executed"} + main_events = [event for event in events if str(event.get("status", "")) in main_statuses] + branch_events = [event for event in events if event not in main_events] + if not main_events: + lines.append("- No selected or executed decision path recorded.") + for event in main_events: + lines.append( + f"- {event.get('decision_id', '')}: {event.get('title', '')} " + f"({event.get('kind', '')}, {event.get('status', '')})" + ) + if event.get("summary"): + lines.append(f"- Summary: {event.get('summary')}") + if event.get("reason"): + lines.append(f"- Reason: {event.get('reason')}") + if event.get("chosen"): + lines.append(f"- Chosen: {event.get('chosen')}") + lines.extend(["", "## Abandoned Deferred And Recovery Branches", ""]) + if not branch_events: + lines.append("- No abandoned, deferred, superseded, blocked, or rollback branches recorded.") + for event in branch_events: + lines.append( + f"- {event.get('decision_id', '')}: {event.get('title', '')} " + f"({event.get('kind', '')}, {event.get('status', '')})" + ) + if event.get("parent_id"): + lines.append(f"- Parent: {event.get('parent_id')}") + if event.get("reason"): + lines.append(f"- Reason: {event.get('reason')}") + if event.get("evidence"): + lines.append(f"- Evidence: {event.get('evidence')}") + lines.extend(["", "## Full Decision Ledger", ""]) + for event in events: + options = ", ".join(str(item) for item in event.get("options", []) if str(item).strip()) + lines.append(f"- ID: {event.get('decision_id', '')}") + lines.append(f"- Parent: {event.get('parent_id', '') or 'root'}") + lines.append(f"- Title: {event.get('title', '')}") + lines.append(f"- Kind: {event.get('kind', '')}") + lines.append(f"- Status: {event.get('status', '')}") + if options: + lines.append(f"- Options: {options}") + if event.get("linked_capability_event_id"): + lines.append(f"- Linked Capability: {event.get('linked_capability_event_id')}") + if event.get("linked_effect_assertion_id"): + lines.append(f"- Linked Effect: {event.get('linked_effect_assertion_id')}") + lines.append("") + return "\n".join(lines).rstrip() + "\n" + + +def render_decision_map_sidecar( + project_root: Path, + *, + run_id: str, + output_path: Path, + theme: str = HTML_DEFAULT_THEME, +) -> dict[str, Any]: + run_dir = resolve_run_dir(project_root, run_id) + source_path = decision_events_path(run_dir) + if not source_path.exists(): + raise FileNotFoundError(source_path) + events = [event for event in load_decision_events(run_dir) if "_invalid_json" not in event] + source_sha = sha256_file(source_path) + markdown = decision_events_to_markdown(run_id, events) + anchor_prefix = safe_slug(f"decision-map-{source_sha[:12]}") + body_html, title, sections = markdown_to_html_fragment(markdown, anchor_prefix=anchor_prefix) + generated_at = datetime.now(timezone.utc).isoformat() + source_rel = project_relative(project_root, source_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + fragment_path = output_path.with_suffix(".fragment.html") + manifest_path = output_path.with_suffix(".manifest.json") + fragment_html = build_html_fragment( + kind="decision-map", + title=title, + source_rel=source_rel, + source_sha=source_sha, + run_id=run_id, + body_html=body_html, + sections=sections, + generated_at=generated_at, + ) + write_text(fragment_path, fragment_html) + full_html = build_html_document( + title=title, + kind="decision-map", + body=fragment_html, + source_rel=source_rel, + source_sha=source_sha, + run_id=run_id, + generated_at=generated_at, + theme=theme, + ) + write_text(output_path, full_html) + manifest = { + "schema_version": "knowledgeos.html-report.v1", + "kind": "decision-map", + "title": title, + "theme": theme, + "run_id": run_id, + "source": source_rel, + "source_sha256": source_sha, + "output": project_relative(project_root, output_path), + "fragment": project_relative(project_root, fragment_path), + "sections": sections, + "decision_count": len(events), + "generated_at": generated_at, + "html_source_of_truth": False, + "notice": HTML_SOURCE_TRUTH_NOTICE, + } + write_text(manifest_path, json.dumps(manifest, indent=2, ensure_ascii=False) + "\n") + return { + "status": "rendered", + "kind": "decision-map", + "title": title, + "output": str(output_path), + "fragment": str(fragment_path), + "manifest": str(manifest_path), + "source": str(source_path), + "source_sha256": source_sha, + "run_id": run_id, + "decision_count": len(events), + "html_source_of_truth": False, + } + + +def mermaid_label(value: str, limit: int = 58) -> str: + cleaned = " ".join(str(value).split()) + if len(cleaned) > limit: + cleaned = cleaned[: max(0, limit - 3)].rstrip() + "..." + return cleaned.replace('"', "'").replace("[", "(").replace("]", ")") + + +def latest_phase_records_by_phase(run_dir: Path) -> dict[str, dict[str, Any]]: + latest: dict[str, dict[str, Any]] = {} + for record in load_phase_records(run_dir): + if "_invalid_json" in record: + continue + phase = str(record.get("phase", "")) + if phase: + latest[phase] = record + return latest + + +def count_valid_records(records: list[dict[str, Any]], status: str = "") -> int: + count = 0 + for record in records: + if "_invalid_json" in record: + continue + if status and str(record.get("status", "")) != status: + continue + count += 1 + return count + + +def mission_flow_stage_summary(project_root: Path, task_id: str, run_id: str, sync_status: str = "") -> dict[str, Any]: + task = find_task(project_root, task_id) + run_dir = ensure_run_belongs_to_task(project_root, task_id, run_id) + run_meta = parse_scalar_values(run_dir / "run.yaml", {"route_status", "status", "task_title", "task_type"}) + phase_latest = latest_phase_records_by_phase(run_dir) + phase_done = [phase for phase in EXPECTED_PHASE_KEYS if phase_latest.get(phase, {}).get("status") in {"completed", "skipped"}] + capability_count = count_valid_records(load_capability_events(run_dir)) + effect_count = count_valid_records(load_effect_assertions(run_dir), status="passed") + decision_count = count_valid_records(load_decision_events(run_dir)) + has_plan = (run_dir / "plan.md").exists() and (run_dir / "context-pack.md").exists() + eval_ok = eval_has_passed(run_dir / "eval.md") + postflight_text = read_text(run_dir / "postflight.md") if (run_dir / "postflight.md").exists() else "" + synced = sync_status == "SYNC_OK" or "[SYNC_OK]" in postflight_text + task_title = task.get("title") or run_meta.get("task_title") or task_id + proof_label = f"{effect_count} real check" + ("" if effect_count == 1 else "s") + tool_label = f"{capability_count} tool note" + ("" if capability_count == 1 else "s") + decision_label = f"{decision_count} decision" + ("" if decision_count == 1 else "s") + checkpoint_label = f"{len(phase_done)}/6 checkpoints" + return { + "task": task, + "run_dir": run_dir, + "title": task_title, + "complexity": task.get("complexity", "medium"), + "stages": [ + { + "id": "A", + "name": "Goal", + "kind": "intent", + "label": f"Goal: {task_title}", + "detail": "What the user wanted us to finish.", + "status": "set", + }, + { + "id": "B", + "name": "Health Check", + "kind": "guard", + "label": "Health check: OK", + "detail": "The project control plane was checked before work.", + "status": "ok", + }, + { + "id": "C", + "name": "Task & Plan", + "kind": "intent", + "label": "Task and plan: ready" if has_plan else "Task and plan: missing", + "detail": "The run has a context pack and a short working plan." if has_plan else "The run is missing plan/context files.", + "status": "ok" if has_plan else "attention", + }, + { + "id": "D", + "name": "Safe Writes", + "kind": "guard", + "label": "Safe writes: routed" if run_meta.get("route_status") == "routed" else "Safe writes: review needed", + "detail": "Planned file changes stayed inside the task route.", + "status": "ok" if run_meta.get("route_status") == "routed" else "attention", + }, + { + "id": "E", + "name": "Work Done", + "kind": "work", + "label": "Work done: recorded" if "execute" in phase_done else "Work done: not recorded", + "detail": "Implementation or analysis steps were recorded as public checkpoints.", + "status": "ok" if "execute" in phase_done else "attention", + }, + { + "id": "F", + "name": "Tools Used", + "kind": "work", + "label": f"Tools used: {tool_label}", + "detail": "Important shell, script, subagent, MCP, or skill use was made visible.", + "status": "ok" if capability_count else "quiet", + }, + { + "id": "G", + "name": "Proof", + "kind": "proof", + "label": f"Proof: {proof_label}", + "detail": "Real artifacts were checked after the work landed.", + "status": "ok" if effect_count else "quiet", + }, + { + "id": "H", + "name": "Decisions", + "kind": "proof", + "label": f"Decisions: {decision_label}", + "detail": "Route changes or important choices were summarized for humans.", + "status": "ok" if decision_count else "quiet", + }, + { + "id": "I", + "name": "Finish", + "kind": "done", + "label": "Finish: synced" if synced else ("Finish: checked" if eval_ok else "Finish: pending"), + "detail": f"Review status: {'passed' if eval_ok else 'pending'}; {checkpoint_label}.", + "status": "ok" if synced or eval_ok else "attention", + }, + ], + "counts": { + "capability_events": capability_count, + "effect_assertions": effect_count, + "decision_events": decision_count, + "phases_recorded": len(phase_done), + }, + "synced": synced, + "eval_passed": eval_ok, + } + + +def build_mission_flow_mermaid(summary: dict[str, Any]) -> str: + stages = summary["stages"] + lines = ["flowchart LR"] + for stage in stages: + lines.append(f' {stage["id"]}["{mermaid_label(stage["label"])}"]') + for left, right in zip(stages, stages[1:]): + lines.append(f' {left["id"]} --> {right["id"]}') + lines.extend( + [ + "", + " classDef intent fill:#E8F3FF,stroke:#2B6CB0,color:#102A43;", + " classDef guard fill:#FFF4D6,stroke:#B7791F,color:#3D2B00;", + " classDef work fill:#E9FBEF,stroke:#2F855A,color:#123524;", + " classDef proof fill:#F3E8FF,stroke:#6B46C1,color:#2D174D;", + " classDef done fill:#DCFCE7,stroke:#15803D,color:#052E16;", + ] + ) + by_kind: dict[str, list[str]] = {"intent": [], "guard": [], "work": [], "proof": [], "done": []} + for stage in stages: + by_kind.setdefault(stage["kind"], []).append(stage["id"]) + for kind, ids in by_kind.items(): + if ids: + lines.append(f" class {','.join(ids)} {kind};") + return "\n".join(lines) + + +def build_mission_flow_markdown(project_root: Path, task_id: str, run_id: str, sync_status: str = "") -> dict[str, Any]: + summary = mission_flow_stage_summary(project_root, task_id, run_id, sync_status=sync_status) + mermaid = build_mission_flow_mermaid(summary) + lines = [ + f"# Mission Flow {run_id}", + "", + "FLOW_OK", + "", + "A human-readable map of how this task moved from request to finish.", + "", + "## Flow", + "", + "```mermaid", + mermaid, + "```", + "", + "## Plain Summary", + "", + ] + for stage in summary["stages"]: + lines.append(f"- {stage['name']}: {stage['label']}. {stage['detail']}") + lines.extend( + [ + "", + "## Counts", + "", + f"- Tools made visible: {summary['counts']['capability_events']}", + f"- Artifact checks: {summary['counts']['effect_assertions']}", + f"- Decision notes: {summary['counts']['decision_events']}", + f"- Checkpoints recorded: {summary['counts']['phases_recorded']}/6", + "", + "HTML is presentation, not source of truth.", + ] + ) + return { + "status": "generated", + "flow_marker": "FLOW_OK", + "marker": f"FLOW_OK run={run_id} stages={len(summary['stages'])} synced={'yes' if summary['synced'] else 'no'}", + "task_id": task_id, + "run_id": run_id, + "complexity": summary["complexity"], + "mermaid": mermaid, + "markdown": "\n".join(lines).rstrip() + "\n", + "counts": summary["counts"], + "stages": summary["stages"], + } + + +def write_mission_flow_markdown(project_root: Path, task_id: str, run_id: str, sync_status: str = "") -> dict[str, Any]: + result = build_mission_flow_markdown(project_root, task_id, run_id, sync_status=sync_status) + run_dir = ensure_run_belongs_to_task(project_root, task_id, run_id) + source_path = run_dir / "mission-flow.md" + write_text(source_path, result["markdown"]) + result["source"] = str(source_path) + return result + + +def mission_flow_cards_html(stages: list[dict[str, Any]]) -> str: + cards = [] + for stage in stages: + cards.append( + '<section class="kos-flow-card kos-flow-{kind}">' + "<h3>{name}</h3>" + "<p><strong>{label}</strong></p>" + "<p>{detail}</p>" + "</section>".format( + kind=html_escape(stage["kind"]), + name=html_escape(stage["name"]), + label=html_escape(stage["label"]), + detail=html_escape(stage["detail"]), + ) + ) + return '<div class="kos-flow">' + "\n".join(cards) + "</div>" + + +def render_mission_flow_sidecar( + project_root: Path, + *, + task_id: str, + run_id: str, + output_path: Path, + theme: str = HTML_DEFAULT_THEME, +) -> dict[str, Any]: + flow = write_mission_flow_markdown(project_root, task_id, run_id) + source_path = Path(flow["source"]) + source_sha = sha256_file(source_path) + generated_at = datetime.now(timezone.utc).isoformat() + source_rel = project_relative(project_root, source_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + fragment_path = output_path.with_suffix(".fragment.html") + manifest_path = output_path.with_suffix(".manifest.json") + body_html = "\n".join( + [ + f"<h1>Mission Flow {html_escape(run_id)}</h1>", + "<p>A friendly map of how the task moved from request to finish.</p>", + mission_flow_cards_html(flow["stages"]), + "<details><summary>Mermaid source</summary>", + f"<pre><code>{html_escape(flow['mermaid'])}</code></pre>", + "</details>", + ] + ) + sections = [{"id": "mission-flow", "title": "Mission Flow", "level": 1}] + fragment_html = build_html_fragment( + kind="mission-flow", + title=f"Mission Flow {run_id}", + source_rel=source_rel, + source_sha=source_sha, + run_id=run_id, + body_html=body_html, + sections=sections, + generated_at=generated_at, + ) + write_text(fragment_path, fragment_html) + full_html = build_html_document( + title=f"Mission Flow {run_id}", + kind="mission-flow", + body=fragment_html, + source_rel=source_rel, + source_sha=source_sha, + run_id=run_id, + generated_at=generated_at, + theme=theme, + ) + write_text(output_path, full_html) + manifest = { + "schema_version": "knowledgeos.html-report.v1", + "kind": "mission-flow", + "title": f"Mission Flow {run_id}", + "theme": theme, + "run_id": run_id, + "task_id": task_id, + "source": source_rel, + "source_sha256": source_sha, + "output": project_relative(project_root, output_path), + "fragment": project_relative(project_root, fragment_path), + "sections": sections, + "generated_at": generated_at, + "flow_marker": "FLOW_OK", + "html_source_of_truth": False, + "notice": HTML_SOURCE_TRUTH_NOTICE, + } + write_text(manifest_path, json.dumps(manifest, indent=2, ensure_ascii=False) + "\n") + return { + "status": "rendered", + "kind": "mission-flow", + "title": f"Mission Flow {run_id}", + "output": str(output_path), + "fragment": str(fragment_path), + "manifest": str(manifest_path), + "source": str(source_path), + "source_sha256": source_sha, + "run_id": run_id, + "task_id": task_id, + "flow_marker": "FLOW_OK", + "html_source_of_truth": False, + } + + def resolve_manifest_reference(project_root: Path, manifest_dir: Path, value: str) -> Path: candidate = Path(value).expanduser() if candidate.is_absolute(): @@ -5352,6 +6689,7 @@ def build_task_route(project_root: Path, task_id: str | None, task_type: str | N "route_order": profile.get("route_order", []), "eval_profile": profile.get("eval_profile", ""), "human_gate": profile.get("human_gate", ""), + "allow_external_controlled": profile.get("allow_external_controlled", ""), "allowed_outputs": profile.get("allowed_outputs", []), "notes": profile.get("notes", []), } @@ -5650,6 +6988,55 @@ def cmd_trace_step(args: argparse.Namespace) -> int: return 0 +def cmd_decision_event(args: argparse.Namespace) -> int: + project_root = Path(args.project_root).expanduser().resolve() + try: + result = record_decision_event( + project_root, + args.task_id, + args.run_id, + kind=args.kind, + status=args.status, + title=args.title, + summary=args.summary, + reason=args.reason, + evidence=args.evidence, + parent_id=args.parent_id or "", + options=args.option or [], + chosen=args.chosen or "", + linked_step=args.linked_step or "", + linked_capability_event_id=args.linked_capability_event_id or "", + linked_effect_assertion_id=args.linked_effect_assertion_id or "", + ) + except (FileNotFoundError, KeyError, ValueError) as exc: + print(str(exc), file=sys.stderr) + return 1 + if args.json: + emit(result, True) + else: + print(result["marker"]) + print(f"ledger: {result['ledger']}") + return 0 + + +def cmd_decision_query(args: argparse.Namespace) -> int: + project_root = Path(args.project_root).expanduser().resolve() + try: + result = query_decision_events( + project_root, + run_id=args.run_id, + task_id=args.task_id or "", + kind=args.kind or "", + status=args.status or "", + parent_id=args.parent_id or "", + ) + except (FileNotFoundError, KeyError, ValueError) as exc: + print(str(exc), file=sys.stderr) + return 1 + emit(result, args.json) + return 0 + + def cmd_artifact_assert(args: argparse.Namespace) -> int: project_root = Path(args.project_root).expanduser().resolve() try: @@ -5738,6 +7125,25 @@ def cmd_verify_effects(args: argparse.Namespace) -> int: return 2 if result.get("status") == "failed" else 0 +def cmd_verify_decisions(args: argparse.Namespace) -> int: + project_root = Path(args.project_root).expanduser().resolve() + try: + result = verify_decisions(project_root, args.task_id, args.run_id) + except (FileNotFoundError, KeyError, ValueError) as exc: + print(str(exc), file=sys.stderr) + return 1 + if args.json: + emit(result, True) + else: + print(result["marker"]) + print(f"ledger: {result['ledger']}") + if result.get("warnings"): + print(f"warnings: {len(result['warnings'])}") + if result.get("errors"): + print(f"errors: {len(result['errors'])}") + return 2 if result.get("status") == "failed" else 0 + + def cmd_complete_task(args: argparse.Namespace) -> int: project_root = Path(args.project_root).expanduser().resolve() try: @@ -5837,6 +7243,32 @@ def cmd_render_html(args: argparse.Namespace) -> int: run_id=args.run_id, theme=args.theme, ) + elif args.kind == "decision-map": + if not args.run_id: + raise ValueError("render-html --kind decision-map requires --run-id") + run_dir = resolve_run_dir(project_root, args.run_id) + output_path = resolve_project_artifact(project_root, args.output) if args.output else run_dir / "decision-map.html" + result = render_decision_map_sidecar( + project_root, + run_id=args.run_id, + output_path=output_path, + theme=args.theme, + ) + elif args.kind == "mission-flow": + if not args.run_id: + raise ValueError("render-html --kind mission-flow requires --run-id") + run_dir = resolve_run_dir(project_root, args.run_id) + task_id = args.task_id or parse_scalar_values(run_dir / "run.yaml", {"task_id"}).get("task_id", "") + if not task_id: + raise ValueError("could not resolve task id for mission-flow") + output_path = resolve_project_artifact(project_root, args.output) if args.output else run_dir / "mission-flow.html" + result = render_mission_flow_sidecar( + project_root, + task_id=task_id, + run_id=args.run_id, + output_path=output_path, + theme=args.theme, + ) elif args.kind == "rich-report": if not args.input: raise ValueError("render-html --kind rich-report requires --input") @@ -5858,6 +7290,120 @@ def cmd_render_html(args: argparse.Namespace) -> int: return 0 +def cmd_flow_summary(args: argparse.Namespace) -> int: + project_root = Path(args.project_root).expanduser().resolve() + try: + run_dir = resolve_run_dir(project_root, args.run_id) + task_id = args.task_id or parse_scalar_values(run_dir / "run.yaml", {"task_id"}).get("task_id", "") + if not task_id: + raise ValueError("could not resolve task id for flow-summary") + result = write_mission_flow_markdown(project_root, task_id, args.run_id) + append_command_event( + run_dir, + "flow-summary", + task_id, + args.run_id, + status="generated", + source=project_relative(project_root, Path(result["source"])), + ) + except (FileNotFoundError, KeyError, ValueError) as exc: + print(str(exc), file=sys.stderr) + return 1 + if args.json: + emit(result, True) + else: + print(result["marker"]) + if args.format == "markdown": + print(result["markdown"]) + else: + print("```mermaid") + print(result["mermaid"]) + print("```") + print(f"source: {result['source']}") + return 0 + + +def cmd_thread_plan(args: argparse.Namespace) -> int: + project_root = Path(args.project_root).expanduser().resolve() + try: + if args.thread_action == "start": + result = start_thread_plan(project_root, title=args.title, spec_id=args.spec_id or "") + elif args.thread_action == "current": + current = load_current_thread(project_root) + result = { + "status": "ok", + "thread_plan_marker": THREAD_PLAN_MARKER, + "marker": f"{THREAD_PLAN_MARKER} action=current thread={current['thread_id']}", + "current": current, + } + elif args.thread_action == "append": + result = append_thread_plan_event( + project_root, + thread_id=args.thread_id, + kind=args.kind, + text=args.text, + linked_spec_id=args.spec_id or "", + linked_task_id=args.task_id or "", + linked_run_id=args.run_id or "", + parent_event_id=args.parent_event_id or "", + ) + elif args.thread_action == "link-run": + ensure_run_belongs_to_task(project_root, args.task_id, args.run_id) + text = args.text or f"已关联执行记录:任务 {args.task_id} / 运行 {args.run_id}。" + result = append_thread_plan_event( + project_root, + thread_id=args.thread_id, + kind="progress", + text=text, + linked_task_id=args.task_id, + linked_run_id=args.run_id, + event_type="thread-plan link-run", + ) + result["status"] = "linked" + result["marker"] = f"{THREAD_PLAN_MARKER} action=link-run thread={args.thread_id} run={args.run_id}" + elif args.thread_action == "render": + if args.format == "markdown": + result = render_thread_plan_markdown(project_root, args.thread_id) + elif args.format == "html": + result = render_thread_plan_html(project_root, args.thread_id) + elif args.format == "mermaid": + mermaid = render_thread_plan_mermaid(project_root, args.thread_id) + result = { + "status": "rendered", + "thread_plan_marker": THREAD_PLAN_MARKER, + "marker": f"{THREAD_PLAN_MARKER} action=render thread={args.thread_id} format=mermaid", + "thread_id": args.thread_id, + "format": "mermaid", + "mermaid": mermaid, + } + else: + raise ValueError(f"unsupported thread-plan render format: {args.format}") + append_thread_command_event(project_root, args.thread_id, "thread-plan render", format=args.format) + else: + raise ValueError(f"unsupported thread-plan action: {args.thread_action}") + except (FileNotFoundError, KeyError, ValueError, json.JSONDecodeError) as exc: + print(str(exc), file=sys.stderr) + return 1 + if args.json: + emit(result, True) + else: + print(result.get("marker", f"{THREAD_PLAN_MARKER} action={args.thread_action}")) + if args.thread_action == "current": + current = result.get("current", {}) + print(f"thread_id: {current.get('thread_id', '')}") + print(f"title: {current.get('title', '')}") + print(f"spec_id: {current.get('spec_id', '')}") + elif args.thread_action == "render" and args.format == "mermaid": + print("```mermaid") + print(result["mermaid"]) + print("```") + elif result.get("output"): + print(f"output: {result['output']}") + elif result.get("ledger"): + print(f"ledger: {result['ledger']}") + return 0 + + def cmd_receipt(args: argparse.Namespace) -> int: project_root = Path(args.project_root).expanduser().resolve() receipt_dir = project_root / ".agent-os" / "receipts" @@ -6218,6 +7764,35 @@ def build_parser() -> argparse.ArgumentParser: trace_step_parser.add_argument("--json", action="store_true") trace_step_parser.set_defaults(func=cmd_trace_step) + decision_event_parser = sub.add_parser("decision-event", help="record a public Decision Graph event for a run") + decision_event_parser.add_argument("--project-root", required=True) + decision_event_parser.add_argument("--task-id", required=True) + decision_event_parser.add_argument("--run-id", required=True) + decision_event_parser.add_argument("--kind", required=True, choices=sorted(DECISION_EVENT_KINDS)) + decision_event_parser.add_argument("--status", default="active", choices=sorted(DECISION_EVENT_STATUSES)) + decision_event_parser.add_argument("--title", required=True) + decision_event_parser.add_argument("--summary", default="") + decision_event_parser.add_argument("--reason", default="", help="public decision rationale; do not include hidden chain-of-thought") + decision_event_parser.add_argument("--evidence", default="", help="command, file, or user confirmation evidence") + decision_event_parser.add_argument("--parent-id", default="") + decision_event_parser.add_argument("--option", action="append", default=[]) + decision_event_parser.add_argument("--chosen", default="") + decision_event_parser.add_argument("--linked-step", default="", choices=["", *OPERATIONAL_TRACE_STEPS]) + decision_event_parser.add_argument("--linked-capability-event-id", default="") + decision_event_parser.add_argument("--linked-effect-assertion-id", default="") + decision_event_parser.add_argument("--json", action="store_true") + decision_event_parser.set_defaults(func=cmd_decision_event) + + decision_query_parser = sub.add_parser("decision-query", help="query public Decision Graph events for a run") + decision_query_parser.add_argument("--project-root", required=True) + decision_query_parser.add_argument("--run-id", required=True) + decision_query_parser.add_argument("--task-id", default="") + decision_query_parser.add_argument("--kind", default="", choices=["", *sorted(DECISION_EVENT_KINDS)]) + decision_query_parser.add_argument("--status", default="", choices=["", *sorted(DECISION_EVENT_STATUSES)]) + decision_query_parser.add_argument("--parent-id", default="") + decision_query_parser.add_argument("--json", action="store_true") + decision_query_parser.set_defaults(func=cmd_decision_query) + artifact_assert_parser = sub.add_parser("artifact-assert", help="verify a real side effect before recording EFFECT_OK evidence") artifact_assert_parser.add_argument("--project-root", required=True) artifact_assert_parser.add_argument("--task-id", required=True) @@ -6268,6 +7843,13 @@ def build_parser() -> argparse.ArgumentParser: verify_effects_parser.add_argument("--json", action="store_true") verify_effects_parser.set_defaults(func=cmd_verify_effects) + verify_decisions_parser = sub.add_parser("verify-decisions", help="verify Decision Graph events and command evidence for a run") + verify_decisions_parser.add_argument("--project-root", required=True) + verify_decisions_parser.add_argument("--task-id", required=True) + verify_decisions_parser.add_argument("--run-id", required=True) + verify_decisions_parser.add_argument("--json", action="store_true") + verify_decisions_parser.set_defaults(func=cmd_verify_decisions) + complete = sub.add_parser("complete-task", help="complete a task only after eval, lifecycle, outputs, and required postflight pass") complete.add_argument("--project-root", required=True) complete.add_argument("--task-id", required=True) @@ -6304,10 +7886,11 @@ def build_parser() -> argparse.ArgumentParser: dispatch.add_argument("--json", action="store_true") dispatch.set_defaults(func=cmd_dispatch_task) - render_html = sub.add_parser("render-html", help="render Markdown evidence into composable static HTML sidecars") + render_html = sub.add_parser("render-html", help="render Markdown, flow, and Decision Graph evidence into composable static HTML sidecars") render_html.add_argument("--project-root", required=True) - render_html.add_argument("--kind", choices=sorted(HTML_REPORT_KINDS), help="receipt, handoff, or rich-report") - render_html.add_argument("--run-id", help="run id for receipt or handoff rendering") + render_html.add_argument("--kind", choices=sorted(HTML_REPORT_KINDS), help="receipt, handoff, rich-report, decision-map, or mission-flow") + render_html.add_argument("--run-id", help="run id for receipt, handoff, decision-map, or mission-flow rendering") + render_html.add_argument("--task-id", help="optional task id for mission-flow; defaults from run.yaml") render_html.add_argument("--input", help="Markdown source for rich-report") render_html.add_argument("--output", help="HTML output path; defaults beside the source") render_html.add_argument("--compose", help="compose report manifests into one HTML page") @@ -6315,6 +7898,56 @@ def build_parser() -> argparse.ArgumentParser: render_html.add_argument("--json", action="store_true") render_html.set_defaults(func=cmd_render_html) + flow_summary = sub.add_parser("flow-summary", help="print a friendly layered Mermaid mission flow for a run") + flow_summary.add_argument("--project-root", required=True) + flow_summary.add_argument("--run-id", required=True) + flow_summary.add_argument("--task-id", help="optional task id; defaults from run.yaml") + flow_summary.add_argument("--format", choices=["mermaid", "markdown"], default="mermaid") + flow_summary.add_argument("--json", action="store_true") + flow_summary.set_defaults(func=cmd_flow_summary) + + thread_plan = sub.add_parser("thread-plan", help="manage chat-level append-only natural-language plan ledgers") + thread_sub = thread_plan.add_subparsers(dest="thread_action", required=True) + thread_start = thread_sub.add_parser("start", help="start a chat-level Thread Plan Ledger") + thread_start.add_argument("--project-root", required=True) + thread_start.add_argument("--title", required=True) + thread_start.add_argument("--spec-id", default="") + thread_start.add_argument("--json", action="store_true") + thread_start.set_defaults(func=cmd_thread_plan) + + thread_current = thread_sub.add_parser("current", help="show the current active Thread Plan Ledger") + thread_current.add_argument("--project-root", required=True) + thread_current.add_argument("--json", action="store_true") + thread_current.set_defaults(func=cmd_thread_plan) + + thread_append = thread_sub.add_parser("append", help="append a natural-language plan note") + thread_append.add_argument("--project-root", required=True) + thread_append.add_argument("--thread-id", required=True) + thread_append.add_argument("--kind", required=True, choices=sorted(THREAD_PLAN_EVENT_KINDS)) + thread_append.add_argument("--text", required=True) + thread_append.add_argument("--spec-id", default="") + thread_append.add_argument("--task-id", default="") + thread_append.add_argument("--run-id", default="") + thread_append.add_argument("--parent-event-id", default="") + thread_append.add_argument("--json", action="store_true") + thread_append.set_defaults(func=cmd_thread_plan) + + thread_link = thread_sub.add_parser("link-run", help="link a task run to the current chat-level plan") + thread_link.add_argument("--project-root", required=True) + thread_link.add_argument("--thread-id", required=True) + thread_link.add_argument("--task-id", required=True) + thread_link.add_argument("--run-id", required=True) + thread_link.add_argument("--text", default="") + thread_link.add_argument("--json", action="store_true") + thread_link.set_defaults(func=cmd_thread_plan) + + thread_render = thread_sub.add_parser("render", help="render a thread plan as Markdown, Mermaid, or HTML") + thread_render.add_argument("--project-root", required=True) + thread_render.add_argument("--thread-id", required=True) + thread_render.add_argument("--format", choices=["markdown", "html", "mermaid"], default="markdown") + thread_render.add_argument("--json", action="store_true") + thread_render.set_defaults(func=cmd_thread_plan) + receipt = sub.add_parser("receipt", help="write a local project receipt") receipt.add_argument("--project-root", required=True) receipt.add_argument("--summary", required=True) diff --git a/templates/project-control-plane/.agent-os/decision-policy.yaml b/templates/project-control-plane/.agent-os/decision-policy.yaml new file mode 100644 index 0000000..a5258cc --- /dev/null +++ b/templates/project-control-plane/.agent-os/decision-policy.yaml @@ -0,0 +1,3 @@ +decision_policy: + strictness: warn + downgrade_reason: "" diff --git a/templates/project-control-plane/.agent-os/startup-prompt.md b/templates/project-control-plane/.agent-os/startup-prompt.md index 2138937..8cbf8db 100644 --- a/templates/project-control-plane/.agent-os/startup-prompt.md +++ b/templates/project-control-plane/.agent-os/startup-prompt.md @@ -9,25 +9,28 @@ This startup prompt is only the session trigger. Durable rules live in `AGENTS.m Before substantial work: 1. Read `AGENTS.md`. -2. Read `.agent-os/workspace.yaml`, `.agent-os/project.yaml`, `.agent-os/tasks.yaml`, `.agent-os/specs.yaml`, `.agent-os/phase-policy.yaml`, `.agent-os/effect-policy.yaml`, `.agent-os/decisions.yaml`, `.agent-os/evals.yaml`, `.agent-os/fabric-link.yaml`, `.agent-os/read-policy.yaml`, `.agent-os/write-policy.yaml`, `.agent-os/dispatch-policy.yaml`, and `.agent-os/tool-registry.yaml`. +2. Read `.agent-os/workspace.yaml`, `.agent-os/project.yaml`, `.agent-os/tasks.yaml`, `.agent-os/specs.yaml`, `.agent-os/phase-policy.yaml`, `.agent-os/decision-policy.yaml`, `.agent-os/effect-policy.yaml`, `.agent-os/decisions.yaml`, `.agent-os/evals.yaml`, `.agent-os/fabric-link.yaml`, `.agent-os/read-policy.yaml`, `.agent-os/write-policy.yaml`, `.agent-os/dispatch-policy.yaml`, and `.agent-os/tool-registry.yaml`. 3. Run `CHANGE_ME_KNOWLEDGEOS_BIN doctor --project-root CHANGE_ME_PROJECT_ROOT --summary` and do not proceed if it fails. -4. Select or confirm one task id from `.agent-os/tasks.yaml`; if no ready task fits the user's new request, run `CHANGE_ME_KNOWLEDGEOS_BIN create-task --project-root CHANGE_ME_PROJECT_ROOT --title "<title>" --type <type> --output <path> --acceptance "<check>"`. -5. Run `CHANGE_ME_KNOWLEDGEOS_BIN route-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id>`. -6. Run `CHANGE_ME_KNOWLEDGEOS_BIN dispatch-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id>` before invoking subagents, MCP tools, skills, workflows, or scripts. -7. Before planned mutation, run `CHANGE_ME_KNOWLEDGEOS_BIN check-route-write --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --path <planned-path>`. -8. Create run evidence with `CHANGE_ME_KNOWLEDGEOS_BIN run-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id>`. -9. Pause at consultation checkpoints, state your recommended next move, name the tradeoff, and ask the human whether to proceed. -10. Record run-bound dispatch evidence with `CHANGE_ME_KNOWLEDGEOS_BIN dispatch-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`. -11. Record public operational progress with `CHANGE_ME_KNOWLEDGEOS_BIN trace-step --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --step <step> --note "<public trace>" --evidence "<command/file/user confirmation>"`, and relay the returned `TRACE_OK` marker. -12. Record lifecycle evidence with `CHANGE_ME_KNOWLEDGEOS_BIN phase-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --phase <route|plan|review|dispatch|execute|report> --status completed --note "<public trace>" --evidence "<command/file/user confirmation>"`, and relay the returned `CHECKPOINT_OK` marker. -13. Record MCP, skill, subagent, orchestrator, or important script use with `CHANGE_ME_KNOWLEDGEOS_BIN capability-event --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --kind <kind> --id <capability-id> --purpose "<purpose>"`, and relay the returned `CAPABILITY_OK` marker. -14. Verify real side effects with `CHANGE_ME_KNOWLEDGEOS_BIN artifact-assert --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --kind <kind> --path <artifact>` and relay the returned `EFFECT_OK` marker. -15. Run `CHANGE_ME_KNOWLEDGEOS_BIN eval-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`; do not manually append eval status. -16. Run `CHANGE_ME_KNOWLEDGEOS_BIN verify-context --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`, `CHANGE_ME_KNOWLEDGEOS_BIN verify-lifecycle --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`, and `CHANGE_ME_KNOWLEDGEOS_BIN verify-effects --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`; relay the returned `EFFECT_VERIFY_OK` marker before claiming effect verification success. -17. Use `CHANGE_ME_KNOWLEDGEOS_BIN complete-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --summary "<summary>"`; it enforces lifecycle, capability visibility, effect verification, and required postflight. -18. If a shared-fabric postflight hook is configured, report `[SYNC_OK]` only after `complete-task` returns `sync_status: SYNC_OK`. -19. For reset requests, run `CHANGE_ME_KNOWLEDGEOS_BIN reset-project --project-root CHANGE_ME_PROJECT_ROOT --mode <soft|hard> --dry-run` before any destructive action. -20. For old-project reorganization requests, run `CHANGE_ME_KNOWLEDGEOS_BIN migrate-legacy-project --project-root CHANGE_ME_PROJECT_ROOT --write-plan` before moving files. -21. For historical or superseded files that should be stored but not read by default, run `CHANGE_ME_KNOWLEDGEOS_BIN archive-legacy-project --project-root CHANGE_ME_PROJECT_ROOT --write-plan` before moving files into `archive/`. +4. If the user starts or continues a durable plan/spec conversation, run `CHANGE_ME_KNOWLEDGEOS_BIN thread-plan current --project-root CHANGE_ME_PROJECT_ROOT` or `CHANGE_ME_KNOWLEDGEOS_BIN thread-plan start --project-root CHANGE_ME_PROJECT_ROOT --title "<natural language goal>"`; append plain-language progress with `CHANGE_ME_KNOWLEDGEOS_BIN thread-plan append --project-root CHANGE_ME_PROJECT_ROOT --thread-id <thread-id> --kind <kind> --text "<plain note>"` and relay `THREAD_PLAN_OK`. +5. Select or confirm one task id from `.agent-os/tasks.yaml`; if no ready task fits the user's new request, run `CHANGE_ME_KNOWLEDGEOS_BIN create-task --project-root CHANGE_ME_PROJECT_ROOT --title "<title>" --type <type> --output <path> --acceptance "<check>"`. +6. Run `CHANGE_ME_KNOWLEDGEOS_BIN route-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id>`. +7. Run `CHANGE_ME_KNOWLEDGEOS_BIN dispatch-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id>` before invoking subagents, MCP tools, skills, workflows, or scripts. +8. Before planned mutation, run `CHANGE_ME_KNOWLEDGEOS_BIN check-route-write --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --path <planned-path>`. +9. Create run evidence with `CHANGE_ME_KNOWLEDGEOS_BIN run-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id>`. +10. Pause at consultation checkpoints, state your recommended next move, name the tradeoff, and ask the human whether to proceed. +11. Record run-bound dispatch evidence with `CHANGE_ME_KNOWLEDGEOS_BIN dispatch-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`. +12. Record public operational progress with `CHANGE_ME_KNOWLEDGEOS_BIN trace-step --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --step <step> --note "<public trace>" --evidence "<command/file/user confirmation>"`, and relay the returned `TRACE_OK` marker. +13. Record lifecycle evidence with `CHANGE_ME_KNOWLEDGEOS_BIN phase-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --phase <route|plan|review|dispatch|execute|report> --status completed --note "<public trace>" --evidence "<command/file/user confirmation>"`, and relay the returned `CHECKPOINT_OK` marker. +14. Record MCP, skill, subagent, orchestrator, or important script use with `CHANGE_ME_KNOWLEDGEOS_BIN capability-event --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --kind <kind> --id <capability-id> --purpose "<purpose>"`, and relay the returned `CAPABILITY_OK` marker. +15. Record public decision changes with `CHANGE_ME_KNOWLEDGEOS_BIN decision-event --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --kind <kind> --title "<title>" --summary "<summary>" --reason "<reason>" --evidence "<evidence>"`, and relay the returned `DECISION_OK` marker when plans branch, change, roll back, or abandon a route. +16. Verify real side effects with `CHANGE_ME_KNOWLEDGEOS_BIN artifact-assert --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --kind <kind> --path <artifact>` and relay the returned `EFFECT_OK` marker. +17. Run `CHANGE_ME_KNOWLEDGEOS_BIN eval-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`; do not manually append eval status. +18. Run `CHANGE_ME_KNOWLEDGEOS_BIN verify-context --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`, `CHANGE_ME_KNOWLEDGEOS_BIN verify-lifecycle --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`, `CHANGE_ME_KNOWLEDGEOS_BIN verify-effects --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`, and `CHANGE_ME_KNOWLEDGEOS_BIN verify-decisions --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id>`; relay `EFFECT_VERIFY_OK` and `DECISION_VERIFY_OK` before claiming verification success. +19. Use `CHANGE_ME_KNOWLEDGEOS_BIN complete-task --project-root CHANGE_ME_PROJECT_ROOT --task-id <task-id> --run-id <run-id> --summary "<summary>"`; it enforces lifecycle, capability visibility, effect verification, decision verification, and required postflight. +20. For medium, high, or complex tasks, include the returned `FLOW_OK` Mermaid Mission Flow in the final answer; if needed, run `CHANGE_ME_KNOWLEDGEOS_BIN flow-summary --project-root CHANGE_ME_PROJECT_ROOT --run-id <run-id>`. +21. If a shared-fabric postflight hook is configured, report `[SYNC_OK]` only after `complete-task` returns `sync_status: SYNC_OK`. +22. For reset requests, run `CHANGE_ME_KNOWLEDGEOS_BIN reset-project --project-root CHANGE_ME_PROJECT_ROOT --mode <soft|hard> --dry-run` before any destructive action. +23. For old-project reorganization requests, run `CHANGE_ME_KNOWLEDGEOS_BIN migrate-legacy-project --project-root CHANGE_ME_PROJECT_ROOT --write-plan` before moving files. +24. For historical or superseded files that should be stored but not read by default, run `CHANGE_ME_KNOWLEDGEOS_BIN archive-legacy-project --project-root CHANGE_ME_PROJECT_ROOT --write-plan` before moving files into `archive/`. -Never claim boot, route, dispatch, write safety, trace, phase, lifecycle, capability, effect, eval, completion, or sync success without command evidence. +Never claim boot, route, dispatch, write safety, thread plan, trace, phase, lifecycle, capability, decision, effect, eval, completion, flow, or sync success without command evidence. diff --git a/templates/project-control-plane/.agent-os/write-policy.yaml b/templates/project-control-plane/.agent-os/write-policy.yaml index b6263fc..b2db2c1 100644 --- a/templates/project-control-plane/.agent-os/write-policy.yaml +++ b/templates/project-control-plane/.agent-os/write-policy.yaml @@ -31,6 +31,7 @@ write_policy: - .agent-os/dispatch-policy.yaml - .agent-os/tool-registry.yaml - .agent-os/workflows/router.yaml + - .agent-os/decision-policy.yaml - .agent-os/effect-policy.yaml - .agent-os/phase-policy.yaml - .agent-os/fabric-link.yaml @@ -41,9 +42,16 @@ write_policy: - .agent-os/runs/**/eval.md - .agent-os/runs/**/phases.ndjson - .agent-os/runs/**/command-events.ndjson + - .agent-os/runs/**/decision-events.ndjson - .agent-os/runs/**/capability-events.ndjson - .agent-os/runs/**/effect-assertions.ndjson - .agent-os/runs/**/postflight.md + - .agent-os/threads/current.json + - .agent-os/threads/**/thread.json + - .agent-os/threads/**/thread-plan.ndjson + - .agent-os/threads/**/thread-plan.md + - .agent-os/threads/**/thread-map.html + - .agent-os/threads/**/command-events.ndjson require_receipt_for: - "*.md" diff --git a/templates/project-control-plane/AGENTS.md b/templates/project-control-plane/AGENTS.md index 6e3a1d7..3a77e25 100644 --- a/templates/project-control-plane/AGENTS.md +++ b/templates/project-control-plane/AGENTS.md @@ -6,7 +6,7 @@ Before substantial work: 1. Read `.agent-os/workspace.yaml`. 2. Read `.agent-os/project.yaml`. -3. Read `.agent-os/tasks.yaml`, `.agent-os/specs.yaml`, `.agent-os/phase-policy.yaml`, `.agent-os/decisions.yaml`, and `.agent-os/evals.yaml`. +3. Read `.agent-os/tasks.yaml`, `.agent-os/specs.yaml`, `.agent-os/phase-policy.yaml`, `.agent-os/decision-policy.yaml`, `.agent-os/decisions.yaml`, and `.agent-os/evals.yaml`. 4. Run `CHANGE_ME_KNOWLEDGEOS_BIN doctor --project-root . --summary`. 5. Run the configured shared-fabric boot hook from `.agent-os/fabric-link.yaml`. 6. Report `[BOOT_OK]` only after the hook succeeds. @@ -17,6 +17,7 @@ During substantial work: - Check `.agent-os/read-policy.yaml` before using broad project context; `archive/**` is cold storage and is not default context. - Check `.agent-os/write-policy.yaml` before writing. - If the user asks to create, align, or follow a spec, use `CHANGE_ME_KNOWLEDGEOS_BIN create-spec --project-root . --title "<title>"` or `CHANGE_ME_KNOWLEDGEOS_BIN align-spec --project-root . --task-id <task-id>` before execution. +- If the user starts or continues a durable plan/spec conversation, use `CHANGE_ME_KNOWLEDGEOS_BIN thread-plan current --project-root .` or `CHANGE_ME_KNOWLEDGEOS_BIN thread-plan start --project-root . --title "<natural language goal>"`; update it with `CHANGE_ME_KNOWLEDGEOS_BIN thread-plan append --project-root . --thread-id <thread-id> --kind <kind> --text "<plain note>"` when the plan changes or advances, and relay `THREAD_PLAN_OK`. - If no ready task fits the user's new request, use `CHANGE_ME_KNOWLEDGEOS_BIN create-task --project-root . --title "<title>" --type <type> --output <path> --acceptance "<check>"` instead of reopening unrelated prior work. - Use `CHANGE_ME_KNOWLEDGEOS_BIN route-task --project-root . --task-id <task-id>` before dispatching work. - Use `CHANGE_ME_KNOWLEDGEOS_BIN check-route-write --project-root . --task-id <task-id> --path <planned-path>` before planned mutations. @@ -32,6 +33,8 @@ During substantial work: - Use `CHANGE_ME_KNOWLEDGEOS_BIN trace-step --project-root . --task-id <task-id> --run-id <run-id> --step <step> --note "<public trace>" --evidence "<command/file/user confirmation>"` to record visible operational progress, and relay the returned `TRACE_OK` marker. - Use `CHANGE_ME_KNOWLEDGEOS_BIN phase-task --project-root . --task-id <task-id> --run-id <run-id> --phase <route|plan|review|dispatch|execute|report> --status completed --note "<public trace>" --evidence "<command/file/user confirmation>"` to record observable phase evidence, and relay the returned `CHECKPOINT_OK` marker. - Use `CHANGE_ME_KNOWLEDGEOS_BIN capability-event --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --id <capability-id> --purpose "<purpose>"` before or after MCP, skill, subagent, orchestrator, or important script use, and relay the returned `CAPABILITY_OK` marker. +- Use `CHANGE_ME_KNOWLEDGEOS_BIN decision-event --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --title "<title>" --summary "<summary>" --reason "<reason>" --evidence "<evidence>"` when a plan branches, changes, rolls back, abandons a route, or records a major human decision, and relay the returned `DECISION_OK` marker. +- Use `CHANGE_ME_KNOWLEDGEOS_BIN thread-plan link-run --project-root . --thread-id <thread-id> --task-id <task-id> --run-id <run-id>` to connect a task run to the chat-level plan. - Use `CHANGE_ME_KNOWLEDGEOS_BIN artifact-assert --project-root . --task-id <task-id> --run-id <run-id> --kind <kind> --path <artifact>` to verify real side effects, and relay the returned `EFFECT_OK` marker. - Record run evidence under `.agent-os/runs/`. @@ -41,7 +44,10 @@ After substantial work: - Run `CHANGE_ME_KNOWLEDGEOS_BIN verify-context --project-root . --task-id <task-id> --run-id <run-id>`. - Run `CHANGE_ME_KNOWLEDGEOS_BIN verify-lifecycle --project-root . --task-id <task-id> --run-id <run-id>`. - Run `CHANGE_ME_KNOWLEDGEOS_BIN verify-effects --project-root . --task-id <task-id> --run-id <run-id>` and relay the returned `EFFECT_VERIFY_OK` marker before claiming effect verification success. -- Use `CHANGE_ME_KNOWLEDGEOS_BIN complete-task --project-root . --task-id <task-id> --run-id <run-id> --summary "<summary>"` to close task state. It enforces spec/context/plan, lifecycle evidence, effect evidence, and required postflight. +- Run `CHANGE_ME_KNOWLEDGEOS_BIN verify-decisions --project-root . --task-id <task-id> --run-id <run-id>` and relay the returned `DECISION_VERIFY_OK` marker before claiming decision verification success. +- Use `CHANGE_ME_KNOWLEDGEOS_BIN complete-task --project-root . --task-id <task-id> --run-id <run-id> --summary "<summary>"` to close task state. It enforces spec/context/plan, lifecycle evidence, effect evidence, decision evidence according to policy, and required postflight. +- For medium, high, or complex tasks, include the `FLOW_OK` Mermaid Mission Flow returned by `complete-task`; if needed, run `CHANGE_ME_KNOWLEDGEOS_BIN flow-summary --project-root . --run-id <run-id>` and relay `FLOW_OK`. +- Keep Mission Flow readable for humans: use simple labels like Goal, Health Check, Task & Plan, Safe Writes, Work Done, Tools Used, Proof, Decisions, and Finish. - Report `[SYNC_OK]` only after `complete-task` returns `sync_status: SYNC_OK`. Reset and migration: @@ -57,6 +63,8 @@ Never: - write to immutable paths; - bypass human-gated paths; - invoke generic unscoped subagents; -- claim boot, phase, lifecycle, eval, completion, or sync success without command evidence. +- claim boot, phase, lifecycle, decision, eval, completion, or sync success without command evidence. - manually write spec snapshots, context packs, plans, phase ledgers, or eval status instead of using KnowledgeOS commands. - manually write operational trace ledgers instead of using `trace-step`. +- manually write decision event ledgers instead of using `decision-event`. +- manually write thread plan ledgers instead of using `thread-plan`. diff --git a/tests/test_knowledgeos_cli.py b/tests/test_knowledgeos_cli.py index 0a526d6..d7ebc9c 100644 --- a/tests/test_knowledgeos_cli.py +++ b/tests/test_knowledgeos_cli.py @@ -163,7 +163,7 @@ def test_harness_audit_repairs_legacy_mount_and_missing_control_files(self): tmp_root = Path(tmp) project = tmp_root / "LegacyProject" project.mkdir() - old_governance = tmp_root / "Antigravity_Skills" / "global-agent-fabric" + old_governance = tmp_root / "Legacy_PreOS_Root" / "global-agent-fabric" desired_governance = tmp_root / "KnowledgeOS" / "global-agent-fabric" desired_capability = tmp_root / "KnowledgeOS" / "capability-layer" self.run_cli( @@ -212,7 +212,7 @@ def test_harness_audit_repairs_legacy_mount_and_missing_control_files(self): issues = dry_payload["projects"][0]["issues"] desired_governance = desired_governance.resolve() desired_capability = desired_capability.resolve() - self.assertIn("legacy_antigravity_governance_root", issues) + self.assertIn("legacy_preos_governance_root", issues) self.assertIn("postflight_hook_missing_or_not_executable", issues) self.assertIn("missing_control_file:.agent-os/specs.yaml", issues) self.assertIn("workflow_router_lifecycle_drift", issues) @@ -817,6 +817,138 @@ def test_check_route_write_enforces_route_allowed_outputs(self): guarded_payload = json.loads(guarded.stdout) self.assertEqual(guarded_payload["decision"], "human_gate_required") + def test_check_route_write_denies_external_paths_by_default(self): + with tempfile.TemporaryDirectory() as tmp: + project = Path(tmp) / "ExampleProject" + project.mkdir() + self.run_cli("init-project", "--root", str(ROOT), "--project-root", str(project), "--name", "Example") + + external_target = str(Path.home() / ".local" / "bin" / "agy") + blocked = self.run_cli( + "check-route-write", + "--project-root", + str(project), + "--task-id", + "T001", + "--path", + external_target, + "--json", + check=False, + ) + self.assertEqual(blocked.returncode, 2) + payload = json.loads(blocked.stdout) + self.assertEqual(payload["decision"], "deny") + self.assertFalse(payload["inside_project"]) + + def test_check_route_write_allows_external_paths_only_with_local_overlay_and_route_flag(self): + with tempfile.TemporaryDirectory() as tmp: + project = Path(tmp) / "ExampleProject" + project.mkdir() + self.run_cli("init-project", "--root", str(ROOT), "--project-root", str(project), "--name", "Example") + + tasks_path = project / ".agent-os" / "tasks.yaml" + tasks_path.write_text( + tasks_path.read_text(encoding="utf-8").replace("type: initialization", "type: migration_task", 1), + encoding="utf-8", + ) + + router_path = project / ".agent-os" / "workflows" / "router.yaml" + router_path.write_text( + router_path.read_text(encoding="utf-8") + + "\n" + + " migration_task:\n" + + " route_order:\n" + + " - doctor --project-root .\n" + + " - route-task --project-root . --task-id <task-id>\n" + + " - check-route-write --project-root . --task-id <task-id> --path <planned-path>\n" + + " eval_profile: migration_task\n" + + " human_gate: explicit_approval\n" + + " allow_external_controlled: true\n" + + " allowed_outputs:\n" + + " - .knowledgeos-local/\n", + encoding="utf-8", + ) + + local_policy = project / ".knowledgeos-local" / "write-policy.local.yaml" + local_policy.parent.mkdir(parents=True, exist_ok=True) + external_target = str(Path.home() / ".local" / "bin" / "agy") + local_policy.write_text( + "external_controlled:\n" + f" - {external_target}\n" + "external_require_receipt_for:\n" + f" - {external_target}\n", + encoding="utf-8", + ) + + allowed = self.run_cli( + "check-route-write", + "--project-root", + str(project), + "--task-id", + "T001", + "--path", + external_target, + "--json", + ) + payload = json.loads(allowed.stdout) + self.assertEqual(payload["decision"], "allow") + self.assertEqual(payload["route_status"], "allowed_by_external_route") + self.assertFalse(payload["inside_project"]) + self.assertTrue(payload["receipt_required"]) + + def test_check_route_write_blocks_external_paths_when_route_lacks_external_flag(self): + with tempfile.TemporaryDirectory() as tmp: + project = Path(tmp) / "ExampleProject" + project.mkdir() + self.run_cli("init-project", "--root", str(ROOT), "--project-root", str(project), "--name", "Example") + + tasks_path = project / ".agent-os" / "tasks.yaml" + tasks_path.write_text( + tasks_path.read_text(encoding="utf-8").replace("type: initialization", "type: migration_task", 1), + encoding="utf-8", + ) + + router_path = project / ".agent-os" / "workflows" / "router.yaml" + router_path.write_text( + router_path.read_text(encoding="utf-8") + + "\n" + + " migration_task:\n" + + " route_order:\n" + + " - doctor --project-root .\n" + + " - route-task --project-root . --task-id <task-id>\n" + + " - check-route-write --project-root . --task-id <task-id> --path <planned-path>\n" + + " eval_profile: migration_task\n" + + " human_gate: explicit_approval\n" + + " allowed_outputs:\n" + + " - .knowledgeos-local/\n", + encoding="utf-8", + ) + + local_policy = project / ".knowledgeos-local" / "write-policy.local.yaml" + local_policy.parent.mkdir(parents=True, exist_ok=True) + external_target = str(Path.home() / ".local" / "bin" / "agy") + local_policy.write_text( + "external_controlled:\n" + f" - {external_target}\n", + encoding="utf-8", + ) + + blocked = self.run_cli( + "check-route-write", + "--project-root", + str(project), + "--task-id", + "T001", + "--path", + external_target, + "--json", + check=False, + ) + self.assertEqual(blocked.returncode, 2) + payload = json.loads(blocked.stdout) + self.assertEqual(payload["decision"], "route_output_denied") + self.assertIn("does not allow external controlled writes", payload["reason"]) + def test_archive_management_route_allows_archive_root_check(self): result = self.run_cli( "check-route-write", @@ -1209,9 +1341,14 @@ def test_complete_task_requires_passed_eval_and_updates_status(self): payload = json.loads(completed.stdout) self.assertEqual(payload["status"], "completed") self.assertEqual(payload["sync_status"], "PENDING") + self.assertEqual(payload["flow_marker"], "FLOW_OK") + self.assertIn("Goal:", payload["flow_mermaid"]) + self.assertIn("Health check:", payload["flow_mermaid"]) + self.assertTrue((run_dir / "mission-flow.md").exists()) self.assertIn("status: completed", (run_dir / "run.yaml").read_text(encoding="utf-8")) self.assertIn("status: completed", (project / ".agent-os" / "tasks.yaml").read_text(encoding="utf-8")) self.assertIn("Guarded task complete", (project / ".agent-os" / "receipts" / "latest.md").read_text(encoding="utf-8")) + self.assertIn("Mission Flow Marker: FLOW_OK", (run_dir / "receipt.md").read_text(encoding="utf-8")) def test_complete_task_requires_lifecycle_phases(self): with tempfile.TemporaryDirectory() as tmp: @@ -1689,6 +1826,409 @@ def test_trace_step_records_public_operational_trace_marker(self): self.assertEqual(payload["trace_marker"], "TRACE_OK") self.assertIn("TRACE_OK step=route_guard", payload["marker"]) + def test_decision_event_records_queryable_public_decision_tree_marker(self): + with tempfile.TemporaryDirectory() as tmp: + project = Path(tmp) / "ExampleProject" + project.mkdir() + self.run_cli("init-project", "--root", str(ROOT), "--project-root", str(project), "--name", "Example") + started = self.run_cli("run-task", "--project-root", str(project), "--task-id", "T001", "--json") + run_id = json.loads(started.stdout)["run_id"] + + root = self.run_cli( + "decision-event", + "--project-root", + str(project), + "--task-id", + "T001", + "--run-id", + run_id, + "--kind", + "branch_opened", + "--status", + "planned", + "--title", + "Compare analysis paths", + "--summary", + "Open alternative analysis routes before execution.", + "--reason", + "Research tasks can branch before a stable plan is chosen.", + "--option", + "full rerun", + "--option", + "targeted rerun", + "--evidence", + "user request", + "--json", + ) + root_payload = json.loads(root.stdout) + self.assertEqual(root_payload["decision_marker"], "DECISION_OK") + self.assertTrue(root_payload["decision_id"].startswith("DEC-")) + + child = self.run_cli( + "decision-event", + "--project-root", + str(project), + "--task-id", + "T001", + "--run-id", + run_id, + "--parent-id", + root_payload["decision_id"], + "--kind", + "branch_selected", + "--status", + "selected", + "--title", + "Use targeted rerun", + "--summary", + "Select the smaller rerun path for faster validation.", + "--reason", + "The targeted path proves the changed artifact without repeating expensive work.", + "--chosen", + "targeted rerun", + "--evidence", + "plan review", + "--json", + ) + child_payload = json.loads(child.stdout) + self.assertEqual(child_payload["record"]["parent_id"], root_payload["decision_id"]) + + queried = self.run_cli( + "decision-query", + "--project-root", + str(project), + "--run-id", + run_id, + "--parent-id", + root_payload["decision_id"], + "--json", + ) + query_payload = json.loads(queried.stdout) + self.assertEqual(query_payload["count"], 1) + self.assertEqual(query_payload["events"][0]["decision_id"], child_payload["decision_id"]) + + run_dir = project / ".agent-os" / "runs" / run_id + self.assertIn('"event_type": "decision-event"', (run_dir / "command-events.ndjson").read_text(encoding="utf-8")) + self.assertIn('"kind": "branch_selected"', (run_dir / "decision-events.ndjson").read_text(encoding="utf-8")) + + def test_thread_plan_ledger_is_chat_level_append_only_and_readable(self): + with tempfile.TemporaryDirectory() as tmp: + project = Path(tmp) / "ExampleProject" + project.mkdir() + self.run_cli("init-project", "--root", str(ROOT), "--project-root", str(project), "--name", "Example") + + started = self.run_cli( + "thread-plan", + "start", + "--project-root", + str(project), + "--title", + "长期维护鸟类声景基金申请计划", + "--spec-id", + "SPEC-TEST", + "--json", + ) + start_payload = json.loads(started.stdout) + self.assertEqual(start_payload["thread_plan_marker"], "THREAD_PLAN_OK") + thread_id = start_payload["thread_id"] + thread_dir = project / ".agent-os" / "threads" / thread_id + ledger = thread_dir / "thread-plan.ndjson" + current = json.loads((project / ".agent-os" / "threads" / "current.json").read_text(encoding="utf-8")) + self.assertEqual(current["thread_id"], thread_id) + first_lines = ledger.read_text(encoding="utf-8").splitlines() + self.assertEqual(len(first_lines), 1) + self.assertIn("总体计划", first_lines[0]) + + self.run_cli( + "thread-plan", + "append", + "--project-root", + str(project), + "--thread-id", + thread_id, + "--kind", + "branch", + "--text", + "Plan A:先稳定 OS 计划记录,再做可视化;Plan B:直接做复杂 dashboard,暂缓。", + ) + self.run_cli( + "thread-plan", + "append", + "--project-root", + str(project), + "--thread-id", + thread_id, + "--kind", + "phase", + "--text", + "Phase A:把聊天级计划记录清楚;Phase B:再把多个任务串起来。", + ) + after_lines = ledger.read_text(encoding="utf-8").splitlines() + self.assertEqual(after_lines[0], first_lines[0]) + self.assertEqual(len(after_lines), 3) + + run = self.run_cli("run-task", "--project-root", str(project), "--task-id", "T001", "--json") + run_id = json.loads(run.stdout)["run_id"] + linked = self.run_cli( + "thread-plan", + "link-run", + "--project-root", + str(project), + "--thread-id", + thread_id, + "--task-id", + "T001", + "--run-id", + run_id, + "--json", + ) + self.assertEqual(json.loads(linked.stdout)["status"], "linked") + + current_result = self.run_cli("thread-plan", "current", "--project-root", str(project), "--json") + self.assertEqual(json.loads(current_result.stdout)["current"]["thread_id"], thread_id) + + markdown = self.run_cli( + "thread-plan", + "render", + "--project-root", + str(project), + "--thread-id", + thread_id, + "--format", + "markdown", + "--json", + ) + md_payload = json.loads(markdown.stdout) + md_text = Path(md_payload["output"]).read_text(encoding="utf-8") + self.assertIn("Plan A / Plan B", md_text) + self.assertIn("Phase A / Phase B", md_text) + self.assertIn("当前工作线", md_text) + self.assertIn(run_id, md_text) + + mermaid = self.run_cli( + "thread-plan", + "render", + "--project-root", + str(project), + "--thread-id", + thread_id, + "--format", + "mermaid", + ) + self.assertIn("THREAD_PLAN_OK action=render", mermaid.stdout) + self.assertIn("flowchart LR", mermaid.stdout) + self.assertIn("Plan A", mermaid.stdout) + + html = self.run_cli( + "thread-plan", + "render", + "--project-root", + str(project), + "--thread-id", + thread_id, + "--format", + "html", + "--json", + ) + html_payload = json.loads(html.stdout) + html_text = Path(html_payload["output"]).read_text(encoding="utf-8") + manifest = json.loads(Path(html_payload["manifest"]).read_text(encoding="utf-8")) + source_sha = hashlib.sha256(ledger.read_bytes()).hexdigest() + self.assertEqual(manifest["kind"], "thread-plan") + self.assertEqual(manifest["source_sha256"], source_sha) + self.assertIn(source_sha, html_text) + self.assertIn("HTML is presentation, not source of truth.", html_text) + self.assertEqual(html_text.lower().count("<h1"), 2) + self.assertNotIn("<script", html_text.lower()) + self.assertNotIn("https://", html_text) + + old_lines = ledger.read_text(encoding="utf-8").splitlines() + second = self.run_cli( + "thread-plan", + "start", + "--project-root", + str(project), + "--title", + "另一个聊天计划", + "--json", + ) + second_thread_id = json.loads(second.stdout)["thread_id"] + self.assertNotEqual(second_thread_id, thread_id) + self.assertEqual(ledger.read_text(encoding="utf-8").splitlines(), old_lines) + second_current = json.loads((project / ".agent-os" / "threads" / "current.json").read_text(encoding="utf-8")) + self.assertEqual(second_current["thread_id"], second_thread_id) + + def test_verify_decisions_detects_orphans_and_requires_explanations(self): + with tempfile.TemporaryDirectory() as tmp: + project = Path(tmp) / "ExampleProject" + project.mkdir() + self.run_cli("init-project", "--root", str(ROOT), "--project-root", str(project), "--name", "Example") + started = self.run_cli("run-task", "--project-root", str(project), "--task-id", "T001", "--json") + run_id = json.loads(started.stdout)["run_id"] + + invalid = self.run_cli( + "decision-event", + "--project-root", + str(project), + "--task-id", + "T001", + "--run-id", + run_id, + "--kind", + "branch_abandoned", + "--status", + "abandoned", + "--title", + "Abandon unexplained path", + "--summary", + "This should fail because reason is required.", + "--evidence", + "test", + check=False, + ) + self.assertEqual(invalid.returncode, 1) + self.assertIn("--reason is required", invalid.stderr) + + valid = self.run_cli( + "decision-event", + "--project-root", + str(project), + "--task-id", + "T001", + "--run-id", + run_id, + "--kind", + "final_decision", + "--status", + "executed", + "--title", + "Finish linear path", + "--summary", + "Record a simple final decision.", + "--reason", + "No branch was needed.", + "--evidence", + "test", + "--json", + ) + self.assertIn("DECISION_OK", json.loads(valid.stdout)["marker"]) + passed = self.run_cli("verify-decisions", "--project-root", str(project), "--task-id", "T001", "--run-id", run_id, "--json") + self.assertEqual(json.loads(passed.stdout)["status"], "passed") + + run_dir = project / ".agent-os" / "runs" / run_id + forged = { + "decision_id": "DEC-ORPHAN", + "parent_id": "DEC-MISSING", + "task_id": "T001", + "run_id": run_id, + "kind": "branch_selected", + "status": "selected", + "title": "Forged orphan", + "summary": "This node has no parent.", + "reason": "test", + "options": [], + "chosen": "", + "evidence": "manual write", + "timestamp": "2026-05-29T00:00:00+00:00", + } + with (run_dir / "decision-events.ndjson").open("a", encoding="utf-8") as handle: + handle.write(json.dumps(forged, ensure_ascii=False, sort_keys=True) + "\n") + + failed = self.run_cli("verify-decisions", "--project-root", str(project), "--task-id", "T001", "--run-id", run_id, "--json", check=False) + self.assertEqual(failed.returncode, 2) + self.assertIn("decision_orphan_parent", failed.stdout) + + def test_complete_task_enforces_decision_verification_when_policy_enforces(self): + with tempfile.TemporaryDirectory() as tmp: + project = Path(tmp) / "ExampleProject" + project.mkdir() + self.run_cli("init-project", "--root", str(ROOT), "--project-root", str(project), "--name", "Example") + created = self.run_cli( + "create-task", + "--project-root", + str(project), + "--title", + "Decision gated completion", + "--type", + "report_task", + "--output", + "docs/report.md", + "--acceptance", + "report exists", + "--json", + ) + task_id = json.loads(created.stdout)["task_id"] + started = self.run_cli("run-task", "--project-root", str(project), "--task-id", task_id, "--json") + run_id = json.loads(started.stdout)["run_id"] + self.log_required_phases(project, task_id, run_id) + self.write_plan_context(project, task_id, run_id) + report = project / "docs" / "report.md" + report.parent.mkdir() + report.write_text("report\n", encoding="utf-8") + self.run_cli("eval-task", "--project-root", str(project), "--task-id", task_id, "--run-id", run_id) + self.run_cli("artifact-assert", "--project-root", str(project), "--task-id", task_id, "--run-id", run_id, "--kind", "file_exists", "--path", "docs/report.md") + (project / ".agent-os" / "decision-policy.yaml").write_text( + "decision_policy:\n strictness: enforce\n", + encoding="utf-8", + ) + + blocked = self.run_cli( + "complete-task", + "--project-root", + str(project), + "--task-id", + task_id, + "--run-id", + run_id, + "--summary", + "Should fail before decision proof.", + "--allow-pending-postflight", + "temporary project", + check=False, + ) + self.assertEqual(blocked.returncode, 1) + self.assertIn("decision verification failed", blocked.stderr) + + self.run_cli( + "decision-event", + "--project-root", + str(project), + "--task-id", + task_id, + "--run-id", + run_id, + "--kind", + "final_decision", + "--status", + "executed", + "--title", + "Complete simple report", + "--summary", + "No branch was needed for this deterministic report task.", + "--reason", + "The task had a single declared output.", + "--evidence", + "docs/report.md", + ) + completed = self.run_cli( + "complete-task", + "--project-root", + str(project), + "--task-id", + task_id, + "--run-id", + run_id, + "--summary", + "Completed after decision proof.", + "--allow-pending-postflight", + "temporary project", + "--json", + ) + payload = json.loads(completed.stdout) + self.assertEqual(payload["decision_status"], "passed") + receipt = (project / ".agent-os" / "receipts" / "latest.md").read_text(encoding="utf-8") + self.assertIn("Decision Verification Status: passed", receipt) + def test_artifact_assert_records_effect_marker_for_real_file_checks(self): with tempfile.TemporaryDirectory() as tmp: project = Path(tmp) / "ExampleProject" @@ -2474,6 +3014,147 @@ def test_render_html_generates_receipt_and_handoff_sidecars_with_source_hash(sel self.assertTrue(Path(handoff_payload["manifest"]).exists()) self.assertIn("status: ready", (project / ".agent-os" / "tasks.yaml").read_text(encoding="utf-8")) + def test_render_html_decision_map_sidecar_from_decision_ledger(self): + with tempfile.TemporaryDirectory() as tmp: + project = Path(tmp) / "ExampleProject" + project.mkdir() + self.run_cli("init-project", "--root", str(ROOT), "--project-root", str(project), "--name", "Example") + started = self.run_cli("run-task", "--project-root", str(project), "--task-id", "T001", "--json") + run_id = json.loads(started.stdout)["run_id"] + root = self.run_cli( + "decision-event", + "--project-root", + str(project), + "--task-id", + "T001", + "--run-id", + run_id, + "--kind", + "branch_opened", + "--status", + "planned", + "--title", + "Open model strategy", + "--summary", + "Compare model paths.", + "--reason", + "Research modeling can require alternatives.", + "--option", + "fast model", + "--option", + "full model", + "--evidence", + "plan", + "--json", + ) + root_id = json.loads(root.stdout)["decision_id"] + self.run_cli( + "decision-event", + "--project-root", + str(project), + "--task-id", + "T001", + "--run-id", + run_id, + "--parent-id", + root_id, + "--kind", + "branch_abandoned", + "--status", + "abandoned", + "--title", + "Skip full model", + "--summary", + "Full model is deferred for this run.", + "--reason", + "The quick validation branch is enough for the current task.", + "--evidence", + "runtime budget", + ) + + rendered = self.run_cli( + "render-html", + "--project-root", + str(project), + "--run-id", + run_id, + "--kind", + "decision-map", + "--json", + ) + payload = json.loads(rendered.stdout) + html = Path(payload["output"]).read_text(encoding="utf-8") + manifest = json.loads(Path(payload["manifest"]).read_text(encoding="utf-8")) + source_sha = hashlib.sha256((project / ".agent-os" / "runs" / run_id / "decision-events.ndjson").read_bytes()).hexdigest() + self.assertIn(source_sha, html) + self.assertIn("Open model strategy", html) + self.assertIn("Skip full model", html) + self.assertIn("HTML is presentation, not source of truth.", html) + self.assertEqual(manifest["kind"], "decision-map") + self.assertEqual(manifest["source_sha256"], source_sha) + + def test_flow_summary_and_mission_flow_html_are_readable_sidecars(self): + with tempfile.TemporaryDirectory() as tmp: + project = Path(tmp) / "ExampleProject" + project.mkdir() + self.run_cli("init-project", "--root", str(ROOT), "--project-root", str(project), "--name", "Example") + started = self.run_cli("run-task", "--project-root", str(project), "--task-id", "T001", "--json") + run_id = json.loads(started.stdout)["run_id"] + self.write_plan_context(project, "T001", run_id) + self.log_required_phases(project, "T001", run_id) + self.run_cli( + "capability-event", + "--project-root", + str(project), + "--task-id", + "T001", + "--run-id", + run_id, + "--kind", + "shell", + "--id", + "unit-test", + "--purpose", + "Validate mission flow readability.", + ) + + plain = self.run_cli("flow-summary", "--project-root", str(project), "--run-id", run_id) + self.assertIn("FLOW_OK run=", plain.stdout) + self.assertIn("```mermaid", plain.stdout) + self.assertIn("Goal:", plain.stdout) + self.assertIn("Health check:", plain.stdout) + self.assertIn("Safe writes:", plain.stdout) + self.assertNotIn("lifecycle_status", plain.stdout) + + as_json = self.run_cli("flow-summary", "--project-root", str(project), "--run-id", run_id, "--json") + payload = json.loads(as_json.stdout) + self.assertEqual(payload["flow_marker"], "FLOW_OK") + self.assertIn("Task and plan:", payload["mermaid"]) + source = project / ".agent-os" / "runs" / run_id / "mission-flow.md" + self.assertTrue(source.exists()) + + rendered = self.run_cli( + "render-html", + "--project-root", + str(project), + "--run-id", + run_id, + "--kind", + "mission-flow", + "--json", + ) + html_payload = json.loads(rendered.stdout) + html = Path(html_payload["output"]).read_text(encoding="utf-8") + manifest = json.loads(Path(html_payload["manifest"]).read_text(encoding="utf-8")) + source_sha = hashlib.sha256(source.read_bytes()).hexdigest() + self.assertEqual(html_payload["flow_marker"], "FLOW_OK") + self.assertEqual(manifest["kind"], "mission-flow") + self.assertEqual(manifest["source_sha256"], source_sha) + self.assertIn(source_sha, html) + self.assertIn("Mission Flow", html) + self.assertIn("Safe Writes", html) + self.assertIn("HTML is presentation, not source of truth.", html) + def test_render_html_rich_report_is_self_contained_composable_and_stale_detectable(self): with tempfile.TemporaryDirectory() as tmp: project = Path(tmp) / "ExampleProject" @@ -2608,8 +3289,47 @@ def test_doctor_passes_initialized_project(self): self.assertTrue(any(item["label"] == "phase_keys" for item in payload)) self.assertTrue(any(item["label"] == "workflow_router" for item in payload)) self.assertTrue(any(item["label"] == "tool_registry" for item in payload)) + self.assertTrue(any(item["label"] == "decision_policy" for item in payload)) self.assertTrue(any(item["label"] == "effect_policy" for item in payload)) + def test_decision_policy_defaults_to_warn_and_requires_reason_when_disabled(self): + with tempfile.TemporaryDirectory() as tmp: + project = Path(tmp) / "ExampleProject" + project.mkdir() + runtime = Path(tmp) / "KnowledgeOSRuntime" + self.run_cli("init-os", "--root", str(ROOT), "--os-root", str(runtime), "--json") + self.run_cli( + "init-project", + "--root", + str(ROOT), + "--project-root", + str(project), + "--name", + "Example", + "--global-root", + str(runtime / "global-agent-fabric"), + "--capability-root", + str(runtime / "capability-layer"), + ) + policy = project / ".agent-os" / "decision-policy.yaml" + self.assertTrue(policy.exists()) + policy.unlink() + missing = self.run_cli("doctor", "--root", str(ROOT), "--project-root", str(project), "--summary") + self.assertIn("status: ok", missing.stdout) + + policy.write_text("decision_policy:\n strictness: off\n downgrade_reason:\n", encoding="utf-8") + disabled = self.run_cli("doctor", "--root", str(ROOT), "--project-root", str(project), "--summary", check=False) + self.assertEqual(disabled.returncode, 1) + self.assertIn("decision_policy", disabled.stdout) + self.assertIn("strictness=off requires downgrade_reason", disabled.stdout) + + policy.write_text( + "decision_policy:\n strictness: off\n downgrade_reason: temporary exploration mode\n", + encoding="utf-8", + ) + reasoned = self.run_cli("doctor", "--root", str(ROOT), "--project-root", str(project), "--summary") + self.assertIn("status: ok", reasoned.stdout) + def test_effect_policy_defaults_to_observe_and_requires_reason_when_disabled(self): with tempfile.TemporaryDirectory() as tmp: project = Path(tmp) / "ExampleProject" @@ -2959,6 +3679,8 @@ def test_agent_guide_outputs_operational_checklist(self): self.assertIn("check-route-write", result.stdout) self.assertIn("create-spec", result.stdout) self.assertIn("align-spec", result.stdout) + self.assertIn("thread-plan", result.stdout) + self.assertIn("THREAD_PLAN_OK", result.stdout) self.assertIn("context-pack", result.stdout) self.assertIn("plan-task", result.stdout) self.assertIn("verify-context", result.stdout) @@ -2968,12 +3690,20 @@ def test_agent_guide_outputs_operational_checklist(self): self.assertIn("CHECKPOINT_OK", result.stdout) self.assertIn("capability-event", result.stdout) self.assertIn("CAPABILITY_OK", result.stdout) + self.assertIn("decision-event", result.stdout) + self.assertIn("DECISION_OK", result.stdout) + self.assertIn("verify-decisions", result.stdout) + self.assertIn("DECISION_VERIFY_OK", result.stdout) self.assertIn("artifact-assert", result.stdout) self.assertIn("EFFECT_OK", result.stdout) self.assertIn("verify-lifecycle", result.stdout) self.assertIn("verify-effects", result.stdout) self.assertIn("EFFECT_VERIFY_OK", result.stdout) self.assertIn("complete-task", result.stdout) + self.assertIn("flow-summary", result.stdout) + self.assertIn("FLOW_OK", result.stdout) + self.assertIn("thread-plan", result.stdout) + self.assertIn("THREAD_PLAN_OK", result.stdout) self.assertIn("archive-legacy-project", result.stdout) def test_startup_prompt_outputs_trace_step_contract(self): @@ -2992,6 +3722,8 @@ def test_startup_prompt_outputs_trace_step_contract(self): self.assertIn("EFFECT_OK", result.stdout) self.assertIn("verify-effects", result.stdout) self.assertIn("EFFECT_VERIFY_OK", result.stdout) + self.assertIn("flow-summary", result.stdout) + self.assertIn("FLOW_OK", result.stdout) def test_dispatch_task_prioritizes_branch_builder_and_consultation(self): result = self.run_cli("dispatch-task", "--project-root", str(ROOT), "--task-id", "KOS-T009", "--json")