From a998aca0f7f99c061973114ff5714c19f1a48a7f Mon Sep 17 00:00:00 2001
From: Gourav Shah <gs@initcron.org>
Date: Fri, 2 Jan 2026 18:47:32 +0530
Subject: [PATCH 01/14] feat: Add builtin command handlers and stale message
 filtering
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add `agent: builtin` support for trigger command bindings
  - Use built-in interactive menus instead of LLM routing
  - Supports /help, /agent, /fleet with platform-specific UI
- Add stale message filtering for webhook handlers
  - Messages older than 60 seconds are silently dropped
  - Prevents processing queued messages on daemon restart
  - Configurable via `max_message_age_secs` in handler config
- Add debug logging to Google LLM provider for troubleshooting
- Add comprehensive user guide: docs/guides/builtin-commands.md
- Update trigger examples and reference documentation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CHANGELOG.md                            |  10 +
 configs/telegram-test.yaml              |   2 +-
 crates/aof-llm/src/provider/google.rs   |  28 +++
 crates/aof-triggers/src/handler/mod.rs  |  31 +++
 crates/aofctl/src/commands/serve.rs     |   1 +
 docs/guides/builtin-commands.md         | 316 ++++++++++++++++++++++++
 docs/reference/trigger-spec.md          |  30 +++
 docusaurus-site/sidebars.ts             |   1 +
 examples/triggers/slack-starter.yaml    |   3 +-
 examples/triggers/telegram-starter.yaml |   2 +-
 10 files changed, 421 insertions(+), 3 deletions(-)
 create mode 100644 docs/guides/builtin-commands.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
index dee555f..55ac8f3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+- Built-in command handler support via `agent: builtin` in trigger command bindings
+  - Use `agent: builtin` for `/help`, `/agent`, `/fleet` to get interactive menus
+  - Interactive menus include fleet/agent selection buttons (Telegram/Slack)
+  - Keeps built-in UI handlers separate from LLM-routed commands
+- Stale message filtering for webhook handlers
+  - Messages older than 60 seconds are silently dropped
+  - Prevents processing of queued messages when daemon restarts
+  - Configurable via `max_message_age_secs` in handler config
+
 ### Fixed
 - `aofctl serve` now produces visible startup output
   - Changed from tracing (default level: error) to println for critical startup messages
diff --git a/configs/telegram-test.yaml b/configs/telegram-test.yaml
index 4f57edd..448a953 100644
--- a/configs/telegram-test.yaml
+++ b/configs/telegram-test.yaml
@@ -6,7 +6,7 @@ metadata:
 
 spec:
   server:
-    port: 8080
+    port: 3000
     host: 0.0.0.0
     cors: true
     timeout_secs: 60
diff --git a/crates/aof-llm/src/provider/google.rs b/crates/aof-llm/src/provider/google.rs
index 558b35c..6a29325 100644
--- a/crates/aof-llm/src/provider/google.rs
+++ b/crates/aof-llm/src/provider/google.rs
@@ -65,6 +65,18 @@ impl GoogleModel {
         // Note: Gemini uses "user" and "model" roles only. Tool responses use functionResponse parts.
         let mut contents: Vec<GeminiContent> = Vec::new();
 
+        // Debug: Log all incoming messages with their structure
+        tracing::warn!("[GOOGLE] Building request with {} messages:", request.messages.len());
+        for (idx, msg) in request.messages.iter().enumerate() {
+            let tool_calls_info = msg.tool_calls.as_ref()
+                .map(|tcs| format!("{} tool calls", tcs.len()))
+                .unwrap_or_else(|| "no tool calls".to_string());
+            tracing::warn!(
+                "[GOOGLE]   Message[{}]: role={:?}, content_len={}, {}",
+                idx, msg.role, msg.content.len(), tool_calls_info
+            );
+        }
+
         for (i, m) in request.messages.iter().enumerate() {
             match m.role {
                 MessageRole::User => {
@@ -163,6 +175,22 @@ impl GoogleModel {
             top_k: None,
         };
 
+        // Debug: Log the final converted contents structure
+        tracing::warn!("[GOOGLE] Final contents structure ({} items):", contents.len());
+        for (idx, content) in contents.iter().enumerate() {
+            let parts_info: Vec<String> = content.parts.iter().map(|p| {
+                match p {
+                    GeminiPart::Text { text } => format!("text({})", text.len()),
+                    GeminiPart::FunctionCall { function_call } => format!("functionCall({})", function_call.name),
+                    GeminiPart::FunctionResponse { function_response } => format!("functionResponse({})", function_response.name),
+                }
+            }).collect();
+            tracing::warn!(
+                "[GOOGLE]   Content[{}]: role={}, parts=[{}]",
+                idx, content.role, parts_info.join(", ")
+            );
+        }
+
         GeminiRequest {
             contents,
             system_instruction,
diff --git a/crates/aof-triggers/src/handler/mod.rs b/crates/aof-triggers/src/handler/mod.rs
index 43b5431..a7c6885 100644
--- a/crates/aof-triggers/src/handler/mod.rs
+++ b/crates/aof-triggers/src/handler/mod.rs
@@ -109,6 +109,12 @@ pub struct TriggerHandlerConfig {
     /// Command bindings (slash command name -> binding)
     /// Maps commands like "/diagnose" to specific agents or fleets
     pub command_bindings: HashMap<String, CommandBinding>,
+
+    /// Maximum age of messages to process (in seconds)
+    /// Messages older than this are silently dropped to handle queued messages
+    /// from platforms like Telegram when the daemon was down.
+    /// Default: 60 seconds. Set to 0 to disable.
+    pub max_message_age_secs: u64,
 }
 
 impl Default for TriggerHandlerConfig {
@@ -120,6 +126,7 @@ impl Default for TriggerHandlerConfig {
             command_timeout_secs: 300, // 5 minutes
             default_agent: None,
             command_bindings: HashMap::new(),
+            max_message_age_secs: 60, // Drop messages older than 1 minute
         }
     }
 }
@@ -836,6 +843,24 @@ impl TriggerHandler {
             platform, message.id, message.user.id
         );
 
+        // Check if message is too old (stale/queued messages from when daemon was down)
+        if self.config.max_message_age_secs > 0 {
+            let message_age = chrono::Utc::now()
+                .signed_duration_since(message.timestamp)
+                .num_seconds();
+
+            if message_age > self.config.max_message_age_secs as i64 {
+                info!(
+                    "Dropping stale message from {}: {} seconds old (max: {}s) - text: '{}'",
+                    platform,
+                    message_age,
+                    self.config.max_message_age_secs,
+                    message.text.chars().take(50).collect::<String>()
+                );
+                return Ok(());
+            }
+        }
+
         // Get platform for response
         let platform_impl = self
             .platforms
@@ -881,6 +906,11 @@ impl TriggerHandler {
         if let Some(cmd_name) = command_name {
             // Check if we have a binding for this command
             if let Some(binding) = self.config.command_bindings.get(&cmd_name) {
+                // Check for builtin handler - skip binding and use built-in command handler
+                if binding.agent.as_deref() == Some("builtin") {
+                    info!("Command '{}' uses builtin handler, falling through to built-in command parser", cmd_name);
+                    // Fall through to TriggerCommand::parse below which handles built-ins
+                } else {
                 info!("Command '{}' matched binding: {:?}", cmd_name, binding);
 
                 // Create modified message with context from metadata if command text is empty
@@ -937,6 +967,7 @@ impl TriggerHandler {
                     info!("Routing command '{}' to agent '{}'", cmd_name, agent_name);
                     return self.handle_natural_language(&routed_message, platform_impl, agent_name).await;
                 }
+                } // end else (non-builtin handler)
             }
 
             // Check for default binding (for any unbound slash command)
diff --git a/crates/aofctl/src/commands/serve.rs b/crates/aofctl/src/commands/serve.rs
index 9103f93..71b1c61 100644
--- a/crates/aofctl/src/commands/serve.rs
+++ b/crates/aofctl/src/commands/serve.rs
@@ -366,6 +366,7 @@ pub async fn execute(
         command_timeout_secs: config.spec.runtime.task_timeout_secs,
         default_agent: config.spec.runtime.default_agent.clone(),
         command_bindings: std::collections::HashMap::new(), // Loaded from Trigger CRDs
+        max_message_age_secs: 60, // Drop messages older than 1 minute (handles queued messages)
     };
 
     if let Some(ref agent) = config.spec.runtime.default_agent {
diff --git a/docs/guides/builtin-commands.md b/docs/guides/builtin-commands.md
new file mode 100644
index 0000000..bf8ed30
--- /dev/null
+++ b/docs/guides/builtin-commands.md
@@ -0,0 +1,316 @@
+# Built-in Command Handlers
+
+Configure slash commands to use AOF's built-in interactive handlers instead of routing to LLM agents.
+
+## Overview
+
+By default, slash commands in trigger configurations route to LLM agents. However, for system commands like `/help`, `/agent`, and `/fleet`, you often want rich interactive menus with buttons rather than LLM-generated text responses.
+
+The `agent: builtin` configuration tells AOF to use its built-in command handlers, which provide:
+- Interactive inline keyboards (Telegram, Slack)
+- Fleet and agent selection menus
+- System information display
+- Consistent, instant responses (no LLM latency)
+
+## Quick Start
+
+Add `agent: builtin` to any command that should use built-in handlers:
+
+```yaml
+apiVersion: aof.dev/v1
+kind: Trigger
+metadata:
+  name: telegram-bot
+spec:
+  type: Telegram
+  config:
+    bot_token: ${TELEGRAM_BOT_TOKEN}
+
+  commands:
+    /help:
+      agent: builtin          # Built-in interactive menu
+      description: "Show available commands"
+    /agent:
+      agent: builtin          # Agent selection buttons
+      description: "Switch active agent"
+    /fleet:
+      agent: builtin          # Fleet selection buttons
+      description: "Switch active fleet"
+    /status:
+      agent: devops           # Routes to LLM agent
+      description: "Check system status"
+
+  default_agent: devops
+```
+
+## Available Built-in Handlers
+
+| Command | Description | Platform Support |
+|---------|-------------|------------------|
+| `/help` | Interactive help menu with command list and selection buttons | Telegram, Slack, Discord |
+| `/agent` | Agent selection menu with inline keyboard | Telegram, Slack, Discord |
+| `/fleet` | Fleet selection menu with inline keyboard | Telegram, Slack, Discord |
+| `/info` | System information (version, loaded agents, platforms) | All platforms |
+| `/flows` | List available flows with descriptions | All platforms |
+
+## User Experience
+
+### Telegram Example
+
+```
+User: /help
+
+Bot: 📋 Available Commands
+
+     /help - Show this menu
+     /agent - Switch agent
+     /fleet - Switch fleet
+     /status - System status
+     /kubectl - Kubernetes ops
+
+     [🤖 Agents]  [👥 Fleets]
+     [ℹ️ Info]    [📊 Flows]
+
+User: *taps Agents button*
+
+Bot: Select Agent
+     Current: devops
+
+     [devops]      [k8s-agent]
+     [docker-ops]  [security]
+```
+
+### Slack Example
+
+```
+User: /help
+
+Bot: 📋 AOF Help
+     Select a category:
+
+     • /status - Check system status
+     • /kubectl - Kubernetes operations
+     • /diagnose - Run diagnostics
+
+     [Agents ▼] [Fleets ▼] [Info]
+```
+
+## Configuration Examples
+
+### Basic Setup
+
+```yaml
+commands:
+  /help:
+    agent: builtin
+    description: "Show help menu"
+```
+
+### Mixed Built-in and LLM Commands
+
+```yaml
+commands:
+  # Built-in interactive handlers
+  /help:
+    agent: builtin
+    description: "Show available commands"
+  /agent:
+    agent: builtin
+    description: "Switch active agent"
+  /fleet:
+    agent: builtin
+    description: "Switch active fleet"
+
+  # LLM-powered commands
+  /kubectl:
+    agent: k8s-agent
+    description: "Kubernetes operations"
+  /diagnose:
+    fleet: rca-fleet
+    description: "Root cause analysis"
+  /deploy:
+    flow: deploy-flow
+    description: "Deployment workflow"
+```
+
+### Platform-Specific Configuration
+
+Built-in handlers adapt to platform capabilities:
+
+```yaml
+# Telegram - Full interactive buttons
+apiVersion: aof.dev/v1
+kind: Trigger
+metadata:
+  name: telegram-interactive
+spec:
+  type: Telegram
+  config:
+    bot_token: ${TELEGRAM_BOT_TOKEN}
+  commands:
+    /help:
+      agent: builtin    # Shows inline keyboard buttons
+    /agent:
+      agent: builtin    # Agent selection with buttons
+```
+
+```yaml
+# WhatsApp - Text-based menus (no inline buttons)
+apiVersion: aof.dev/v1
+kind: Trigger
+metadata:
+  name: whatsapp-mobile
+spec:
+  type: WhatsApp
+  config:
+    bot_token: ${WHATSAPP_ACCESS_TOKEN}
+    phone_number_id: ${WHATSAPP_PHONE_NUMBER_ID}
+  commands:
+    /help:
+      agent: builtin    # Text menu with numbered options
+```
+
+## When to Use Builtin vs Agent
+
+| Scenario | Use | Why |
+|----------|-----|-----|
+| Help menu | `agent: builtin` | Instant, consistent, interactive buttons |
+| Agent/fleet switching | `agent: builtin` | Rich selection UI |
+| System info | `agent: builtin` | Deterministic, no LLM needed |
+| Natural language queries | `agent: <name>` | Requires LLM reasoning |
+| Tool execution | `agent: <name>` | Needs MCP tools |
+| Multi-step workflows | `fleet: <name>` or `flow: <name>` | Complex coordination |
+
+## How It Works
+
+When a message arrives:
+
+1. **Command Parsing**: AOF extracts the command (e.g., `/help`)
+2. **Binding Lookup**: Checks `commands` section for matching binding
+3. **Builtin Check**: If `agent: builtin`, routes to built-in handler
+4. **Handler Execution**: Built-in handler generates response with platform-appropriate UI
+5. **Response**: Interactive menu sent to user
+
+```
+User: /help
+    │
+    ▼
+┌─────────────────┐
+│ Command Parser  │
+└────────┬────────┘
+         │ /help
+         ▼
+┌─────────────────┐
+│ Binding Lookup  │ ← commands: { /help: { agent: builtin } }
+└────────┬────────┘
+         │ agent: builtin
+         ▼
+┌─────────────────┐
+│ Built-in Handler│ ← HelpHandler
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ Platform Adapter│ ← Telegram: inline keyboard
+└────────┬────────┘   Slack: button blocks
+         │            WhatsApp: text menu
+         ▼
+    Interactive Response
+```
+
+## Extending Built-in Handlers
+
+Built-in handlers automatically discover:
+- **Agents**: Loaded from `--agents-dir` or agent library
+- **Fleets**: Loaded from `--fleets-dir`
+- **Flows**: Loaded from `--flows-dir`
+
+To add more agents to the selection menu, simply add more agent YAML files to your agents directory.
+
+## Troubleshooting
+
+### Buttons Not Appearing
+
+**Problem**: `/help` shows text but no buttons
+
+**Solutions**:
+1. Verify platform supports interactive elements (Telegram, Slack, Discord do)
+2. Check bot has required permissions for inline keyboards
+3. For Telegram: Ensure bot is using webhook mode, not polling
+
+### Command Routes to LLM Instead of Menu
+
+**Problem**: `/help` gives LLM response instead of menu
+
+**Solutions**:
+1. Verify `agent: builtin` (not `agent: help` or agent name)
+2. Check trigger is loaded (daemon logs show loaded triggers)
+3. Ensure command binding exists in trigger YAML
+
+### Menu Shows No Agents
+
+**Problem**: Agent selection shows empty list
+
+**Solutions**:
+1. Check `--agents-dir` points to correct directory
+2. Verify agent YAML files are valid
+3. Look for loading errors in daemon logs
+
+## Complete Example
+
+```yaml
+# examples/triggers/telegram-with-builtins.yaml
+apiVersion: aof.dev/v1
+kind: Trigger
+metadata:
+  name: telegram-full-featured
+  labels:
+    platform: telegram
+    environment: production
+
+spec:
+  type: Telegram
+  config:
+    bot_token: ${TELEGRAM_BOT_TOKEN}
+
+  # Built-in handlers for system commands
+  commands:
+    /help:
+      agent: builtin
+      description: "Show available commands with interactive menu"
+    /agent:
+      agent: builtin
+      description: "Switch between agents using selection buttons"
+    /fleet:
+      agent: builtin
+      description: "Switch between fleets using selection buttons"
+    /info:
+      agent: builtin
+      description: "Show system information"
+
+    # LLM-powered commands
+    /status:
+      agent: devops
+      description: "Check system status"
+    /kubectl:
+      agent: k8s-agent
+      description: "Kubernetes operations"
+    /pods:
+      agent: k8s-agent
+      description: "List pods in namespace"
+    /logs:
+      agent: k8s-agent
+      description: "View pod logs"
+    /diagnose:
+      fleet: rca-fleet
+      description: "Root cause analysis with multiple agents"
+
+  # Fallback for natural language
+  default_agent: devops
+```
+
+## See Also
+
+- [Trigger Specification](../reference/trigger-spec.md) - Full trigger configuration reference
+- [Agent Switching Guide](agent-switching.md) - How fleet/agent switching works
+- [Quickstart: Telegram](quickstart-telegram.md) - Set up a Telegram bot
diff --git a/docs/reference/trigger-spec.md b/docs/reference/trigger-spec.md
index 1a46491..1b338af 100644
--- a/docs/reference/trigger-spec.md
+++ b/docs/reference/trigger-spec.md
@@ -1048,6 +1048,36 @@ commands:
 
 **Note:** Only one of `agent`, `fleet`, or `flow` should be specified per command.
 
+### Built-in Command Handlers
+
+Use `agent: builtin` to invoke AOF's built-in interactive command handlers instead of routing to an LLM agent. This is useful for commands that need rich interactive menus.
+
+```yaml
+commands:
+  /help:
+    agent: builtin          # Uses built-in help menu with fleet/agent selection
+    description: "Show available commands"
+  /agent:
+    agent: builtin          # Uses built-in agent selection menu
+    description: "Switch active agent"
+  /fleet:
+    agent: builtin          # Uses built-in fleet selection menu
+    description: "Switch active fleet"
+```
+
+**Available built-in handlers:**
+| Command | Description |
+|---------|-------------|
+| `/help` | Interactive help menu with fleet/agent selection buttons |
+| `/agent` | Agent selection menu with inline keyboard |
+| `/fleet` | Fleet selection menu with inline keyboard |
+| `/info` | System information display |
+| `/flows` | List available flows |
+
+**When to use `builtin` vs agent:**
+- Use `agent: builtin` for interactive menus and system commands
+- Use `agent: <name>` when you want the LLM to handle the command
+
 ### When to Use Each
 
 | Target | Use When | Example |
diff --git a/docusaurus-site/sidebars.ts b/docusaurus-site/sidebars.ts
index 0231090..a297ed5 100644
--- a/docusaurus-site/sidebars.ts
+++ b/docusaurus-site/sidebars.ts
@@ -167,6 +167,7 @@ const sidebars: SidebarsConfig = {
         'guides/quickstart-whatsapp',
         'guides/quickstart-teams',
         'guides/quickstart-discord',
+        'guides/builtin-commands',
         'guides/approval-workflow',
         'guides/deployment',
       ],
diff --git a/examples/triggers/slack-starter.yaml b/examples/triggers/slack-starter.yaml
index 851d645..39c8224 100644
--- a/examples/triggers/slack-starter.yaml
+++ b/examples/triggers/slack-starter.yaml
@@ -41,9 +41,10 @@ spec:
     #   - devops
 
   # Slash commands - maps /command to handlers
+  # Use "agent: builtin" for built-in interactive menus (/help, /agent, /fleet)
   commands:
     /help:
-      agent: devops
+      agent: builtin  # Use built-in help handler with interactive menu
       description: "Show available commands"
 
     /status:
diff --git a/examples/triggers/telegram-starter.yaml b/examples/triggers/telegram-starter.yaml
index d38d7e9..dcdd63f 100644
--- a/examples/triggers/telegram-starter.yaml
+++ b/examples/triggers/telegram-starter.yaml
@@ -50,7 +50,7 @@ spec:
   # These also appear in Telegram's command menu
   commands:
     /help:
-      agent: devops
+      agent: builtin  # Use built-in help handler with interactive menu
       description: "Show available commands"
 
     /status:

From 3a5cc5adc8e4e710b76ee141f1b3ccd3a923ee71 Mon Sep 17 00:00:00 2001
From: Gourav Shah <gs@initcron.org>
Date: Fri, 2 Jan 2026 18:55:56 +0530
Subject: [PATCH 02/14] feat: Enable cargo install aofctl via crates.io
 publishing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Update workspace Cargo.toml with proper metadata
  - Author: Gourav Shah <gjs@opsflow.sh>
  - Homepage: https://aof.sh
  - Documentation: https://docs.aof.sh
  - Repository: https://github.com/agenticdevops/aof
  - Keywords and categories for discoverability
- Add version requirements to workspace dependencies for crates.io
- Add required fields to all publishable crates
- Add publish-crates.sh script for manual publishing
- Add publish-crates job to release.yml workflow
  - Publishes in dependency order after GitHub release
  - Requires CARGO_REGISTRY_TOKEN secret

To publish manually:
  cargo login
  ./scripts/publish-crates.sh --publish

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .github/workflows/release.yml  | 41 +++++++++++++++
 Cargo.toml                     | 24 +++++----
 crates/aof-core/Cargo.toml     |  4 ++
 crates/aof-llm/Cargo.toml      |  4 ++
 crates/aof-mcp/Cargo.toml      |  4 ++
 crates/aof-memory/Cargo.toml   |  4 ++
 crates/aof-runtime/Cargo.toml  |  4 ++
 crates/aof-tools/Cargo.toml    |  4 ++
 crates/aof-triggers/Cargo.toml |  5 ++
 crates/aofctl/Cargo.toml       |  5 ++
 scripts/publish-crates.sh      | 95 ++++++++++++++++++++++++++++++++++
 11 files changed, 184 insertions(+), 10 deletions(-)
 create mode 100755 scripts/publish-crates.sh

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b01ace9..d04531a 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -166,6 +166,47 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
+  publish-crates:
+    name: Publish to crates.io
+    needs: create-release
+    runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags/')
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Publish crates in dependency order
+        env:
+          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+        run: |
+          # Crates in dependency order (leaf dependencies first)
+          CRATES=(
+            "aof-core"
+            "aof-mcp"
+            "aof-llm"
+            "aof-memory"
+            "aof-tools"
+            "aof-runtime"
+            "aof-triggers"
+            "aofctl"
+          )
+
+          # Wait time between publishes to allow crates.io index to update
+          WAIT_SECONDS=30
+
+          for crate in "${CRATES[@]}"; do
+            echo "📦 Publishing $crate..."
+            cargo publish -p "$crate" --no-verify || true
+            echo "⏳ Waiting ${WAIT_SECONDS}s for crates.io index to update..."
+            sleep $WAIT_SECONDS
+          done
+
+          echo "✅ All crates published!"
+
   deploy-install-script:
     name: Deploy install.sh to web
     needs: create-release
diff --git a/Cargo.toml b/Cargo.toml
index 24760e5..ef1f12b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,8 +19,12 @@ version = "0.3.1-beta"
 edition = "2021"
 rust-version = "1.75"
 license = "Apache-2.0"
-repository = "https://github.com/yourusername/aof"
-authors = ["Your Name <you@example.com>"]
+repository = "https://github.com/agenticdevops/aof"
+authors = ["Gourav Shah <gjs@opsflow.sh>"]
+keywords = ["ai", "agents", "llm", "devops", "kubernetes"]
+categories = ["command-line-utilities", "development-tools"]
+homepage = "https://aof.sh"
+documentation = "https://docs.aof.sh"
 
 [workspace.dependencies]
 # Async runtime
@@ -72,14 +76,14 @@ rand = "0.8"
 # Regex
 regex = "1.10"
 
-# Internal workspace dependencies
-aof-core = { path = "crates/aof-core" }
-aof-mcp = { path = "crates/aof-mcp" }
-aof-llm = { path = "crates/aof-llm" }
-aof-runtime = { path = "crates/aof-runtime" }
-aof-memory = { path = "crates/aof-memory" }
-aof-triggers = { path = "crates/aof-triggers" }
-aof-tools = { path = "crates/aof-tools" }
+# Internal workspace dependencies (path for local dev, version for crates.io)
+aof-core = { path = "crates/aof-core", version = "0.3.1-beta" }
+aof-mcp = { path = "crates/aof-mcp", version = "0.3.1-beta" }
+aof-llm = { path = "crates/aof-llm", version = "0.3.1-beta" }
+aof-runtime = { path = "crates/aof-runtime", version = "0.3.1-beta" }
+aof-memory = { path = "crates/aof-memory", version = "0.3.1-beta" }
+aof-triggers = { path = "crates/aof-triggers", version = "0.3.1-beta" }
+aof-tools = { path = "crates/aof-tools", version = "0.3.1-beta" }
 
 # File utilities
 glob = "0.3"
diff --git a/crates/aof-core/Cargo.toml b/crates/aof-core/Cargo.toml
index a571799..2657192 100644
--- a/crates/aof-core/Cargo.toml
+++ b/crates/aof-core/Cargo.toml
@@ -7,6 +7,10 @@ license.workspace = true
 repository.workspace = true
 authors.workspace = true
 description = "Core types, traits, and abstractions for AOF framework"
+keywords.workspace = true
+categories.workspace = true
+homepage.workspace = true
+documentation.workspace = true
 
 [dependencies]
 serde = { workspace = true }
diff --git a/crates/aof-llm/Cargo.toml b/crates/aof-llm/Cargo.toml
index edfe180..58a437f 100644
--- a/crates/aof-llm/Cargo.toml
+++ b/crates/aof-llm/Cargo.toml
@@ -7,6 +7,10 @@ license.workspace = true
 repository.workspace = true
 authors.workspace = true
 description = "Multi-provider LLM abstraction layer"
+keywords.workspace = true
+categories.workspace = true
+homepage.workspace = true
+documentation.workspace = true
 
 [dependencies]
 aof-core = { workspace = true }
diff --git a/crates/aof-mcp/Cargo.toml b/crates/aof-mcp/Cargo.toml
index 1940189..6f1f269 100644
--- a/crates/aof-mcp/Cargo.toml
+++ b/crates/aof-mcp/Cargo.toml
@@ -7,6 +7,10 @@ license.workspace = true
 repository.workspace = true
 authors.workspace = true
 description = "Model Context Protocol (MCP) client implementation"
+keywords.workspace = true
+categories.workspace = true
+homepage.workspace = true
+documentation.workspace = true
 
 [dependencies]
 aof-core = { workspace = true }
diff --git a/crates/aof-memory/Cargo.toml b/crates/aof-memory/Cargo.toml
index b5d5cc9..ee73ad1 100644
--- a/crates/aof-memory/Cargo.toml
+++ b/crates/aof-memory/Cargo.toml
@@ -7,6 +7,10 @@ license.workspace = true
 repository.workspace = true
 authors.workspace = true
 description = "Pluggable memory backends for agent state management"
+keywords.workspace = true
+categories.workspace = true
+homepage.workspace = true
+documentation.workspace = true
 
 [dependencies]
 aof-core = { workspace = true }
diff --git a/crates/aof-runtime/Cargo.toml b/crates/aof-runtime/Cargo.toml
index 3ac1509..441cda2 100644
--- a/crates/aof-runtime/Cargo.toml
+++ b/crates/aof-runtime/Cargo.toml
@@ -7,6 +7,10 @@ license.workspace = true
 repository.workspace = true
 authors.workspace = true
 description = "Agent execution runtime with task orchestration"
+keywords.workspace = true
+categories.workspace = true
+homepage.workspace = true
+documentation.workspace = true
 
 [dependencies]
 aof-core = { workspace = true }
diff --git a/crates/aof-tools/Cargo.toml b/crates/aof-tools/Cargo.toml
index 59f56fd..1ba0901 100644
--- a/crates/aof-tools/Cargo.toml
+++ b/crates/aof-tools/Cargo.toml
@@ -7,6 +7,10 @@ license.workspace = true
 repository.workspace = true
 authors.workspace = true
 description = "Modular tool implementations for AOF agents"
+keywords.workspace = true
+categories.workspace = true
+homepage.workspace = true
+documentation.workspace = true
 
 [features]
 default = ["file", "shell", "git"]
diff --git a/crates/aof-triggers/Cargo.toml b/crates/aof-triggers/Cargo.toml
index 142f7a2..04f938f 100644
--- a/crates/aof-triggers/Cargo.toml
+++ b/crates/aof-triggers/Cargo.toml
@@ -6,6 +6,11 @@ rust-version.workspace = true
 license.workspace = true
 repository.workspace = true
 authors.workspace = true
+description = "Event triggers and webhook handlers for AOF agents"
+keywords.workspace = true
+categories.workspace = true
+homepage.workspace = true
+documentation.workspace = true
 
 [dependencies]
 # Workspace dependencies
diff --git a/crates/aofctl/Cargo.toml b/crates/aofctl/Cargo.toml
index ab50e74..7b55858 100644
--- a/crates/aofctl/Cargo.toml
+++ b/crates/aofctl/Cargo.toml
@@ -7,6 +7,11 @@ license.workspace = true
 repository.workspace = true
 authors.workspace = true
 description = "CLI for AOF framework - kubectl-style agent orchestration"
+keywords.workspace = true
+categories.workspace = true
+homepage.workspace = true
+documentation.workspace = true
+readme = "../../README.md"
 
 [[bin]]
 name = "aofctl"
diff --git a/scripts/publish-crates.sh b/scripts/publish-crates.sh
new file mode 100755
index 0000000..a28ab9c
--- /dev/null
+++ b/scripts/publish-crates.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# Publish AOF crates to crates.io in dependency order
+#
+# Prerequisites:
+#   1. cargo login with your crates.io token
+#   2. All tests passing: cargo test --workspace
+#   3. Clean git status (all changes committed)
+#
+# Usage:
+#   ./scripts/publish-crates.sh           # Dry run (default)
+#   ./scripts/publish-crates.sh --publish # Actually publish
+
+set -e
+
+DRY_RUN=true
+if [[ "$1" == "--publish" ]]; then
+    DRY_RUN=false
+    echo "🚀 Publishing crates to crates.io..."
+else
+    echo "🔍 Dry run mode (use --publish to actually publish)"
+fi
+
+# Crates in dependency order (leaf dependencies first)
+CRATES=(
+    "aof-core"
+    "aof-mcp"
+    "aof-llm"
+    "aof-memory"
+    "aof-tools"
+    "aof-runtime"
+    "aof-triggers"
+    "aofctl"
+)
+
+# Wait time between publishes to allow crates.io index to update
+WAIT_SECONDS=30
+
+publish_crate() {
+    local crate=$1
+    echo ""
+    echo "📦 Publishing $crate..."
+
+    if $DRY_RUN; then
+        cargo publish -p "$crate" --dry-run --allow-dirty
+    else
+        cargo publish -p "$crate"
+        echo "⏳ Waiting ${WAIT_SECONDS}s for crates.io index to update..."
+        sleep $WAIT_SECONDS
+    fi
+}
+
+# Verify we're logged in
+if ! cargo login --help > /dev/null 2>&1; then
+    echo "❌ cargo not found. Please install Rust."
+    exit 1
+fi
+
+# Check for uncommitted changes
+if ! git diff --quiet; then
+    if $DRY_RUN; then
+        echo "⚠️  Uncommitted changes detected (allowed in dry-run mode)"
+    else
+        echo "❌ Uncommitted changes detected. Please commit or stash before publishing."
+        git status --short
+        exit 1
+    fi
+fi
+
+# Run tests first
+echo "🧪 Running tests..."
+cargo test --workspace --lib 2>&1 | tail -5
+
+echo ""
+echo "Publishing order:"
+for i in "${!CRATES[@]}"; do
+    echo "  $((i+1)). ${CRATES[$i]}"
+done
+
+# Publish each crate
+for crate in "${CRATES[@]}"; do
+    publish_crate "$crate"
+done
+
+echo ""
+if $DRY_RUN; then
+    echo "✅ Dry run completed successfully!"
+    echo ""
+    echo "To actually publish, run:"
+    echo "  ./scripts/publish-crates.sh --publish"
+else
+    echo "✅ All crates published successfully!"
+    echo ""
+    echo "Users can now install with:"
+    echo "  cargo install aofctl"
+fi

From c3f7316c31d953154cc5bc2e20f2acc6d72d748e Mon Sep 17 00:00:00 2001
From: Gourav Shah <gs@initcron.org>
Date: Fri, 2 Jan 2026 19:05:36 +0530
Subject: [PATCH 03/14] chore: Release v0.3.2-beta
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changes in this release:
- Built-in command handlers (`agent: builtin`)
- Stale message filtering for webhooks
- cargo install aofctl support
- Improved daemon startup output
- Single-response GitHub PR reviews
- Improved library:// URI resolution
- New Built-in Commands Guide

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CHANGELOG.md |  7 +++++++
 Cargo.toml   | 16 ++++++++--------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 55ac8f3..1c7103d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.3.2-beta] - 2026-01-02
+
 ### Added
 - Built-in command handler support via `agent: builtin` in trigger command bindings
   - Use `agent: builtin` for `/help`, `/agent`, `/fleet` to get interactive menus
@@ -16,6 +18,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - Messages older than 60 seconds are silently dropped
   - Prevents processing of queued messages when daemon restarts
   - Configurable via `max_message_age_secs` in handler config
+- `cargo install aofctl` support via crates.io publishing
+  - All AOF crates now published to crates.io
+  - Automated publishing on tagged releases
+- New documentation: Built-in Commands Guide (`docs/guides/builtin-commands.md`)
 
 ### Fixed
 - `aofctl serve` now produces visible startup output
@@ -26,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - Intermediate acknowledgment messages ("Thinking...", "Processing...") are skipped for Git platforms
   - Only the final response is posted, keeping PR threads clean
   - Slack/Telegram/Discord still show real-time progress indicators
+- Improved `library://` URI path resolution for agent library
 
 ## [0.3.1-beta] - 2025-12-26
 
diff --git a/Cargo.toml b/Cargo.toml
index ef1f12b..f2b7cb9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,7 +15,7 @@ members = [
 ]
 
 [workspace.package]
-version = "0.3.1-beta"
+version = "0.3.2-beta"
 edition = "2021"
 rust-version = "1.75"
 license = "Apache-2.0"
@@ -77,13 +77,13 @@ rand = "0.8"
 regex = "1.10"
 
 # Internal workspace dependencies (path for local dev, version for crates.io)
-aof-core = { path = "crates/aof-core", version = "0.3.1-beta" }
-aof-mcp = { path = "crates/aof-mcp", version = "0.3.1-beta" }
-aof-llm = { path = "crates/aof-llm", version = "0.3.1-beta" }
-aof-runtime = { path = "crates/aof-runtime", version = "0.3.1-beta" }
-aof-memory = { path = "crates/aof-memory", version = "0.3.1-beta" }
-aof-triggers = { path = "crates/aof-triggers", version = "0.3.1-beta" }
-aof-tools = { path = "crates/aof-tools", version = "0.3.1-beta" }
+aof-core = { path = "crates/aof-core", version = "0.3.2-beta" }
+aof-mcp = { path = "crates/aof-mcp", version = "0.3.2-beta" }
+aof-llm = { path = "crates/aof-llm", version = "0.3.2-beta" }
+aof-runtime = { path = "crates/aof-runtime", version = "0.3.2-beta" }
+aof-memory = { path = "crates/aof-memory", version = "0.3.2-beta" }
+aof-triggers = { path = "crates/aof-triggers", version = "0.3.2-beta" }
+aof-tools = { path = "crates/aof-tools", version = "0.3.2-beta" }
 
 # File utilities
 glob = "0.3"

From 375069ca28b276f9c477b6b3874e71d5755bca6d Mon Sep 17 00:00:00 2001
From: Gopal <gopal@Gopals-MacBook-Air.local>
Date: Sat, 3 Jan 2026 15:13:47 +0530
Subject: [PATCH 04/14] feat: Add Jira platform support to daemon serve command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add JiraPlatformConfig struct and registration logic in serve.rs
- Export JiraPlatform and JiraConfig from aof-triggers lib
- Support both direct secret and HMAC-SHA256 signature verification
- Handle sha256= prefix in webhook signatures

Documentation updates:
- Add Jira platform section to daemon-config.md reference
- Update jira-integration.md with correct DaemonConfig format
- Add webhook payload templates for Jira Automation rules
- Document both Automation Rules and System Webhooks setup options
- Add payload templates for: issue created/updated, comment, worklog, sprint events

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 crates/aof-triggers/src/lib.rs            |   1 +
 crates/aof-triggers/src/platforms/jira.rs |  29 ++-
 crates/aofctl/src/commands/serve.rs       | 100 +++++++++
 docs/concepts/jira-integration.md         |  50 +++--
 docs/reference/daemon-config.md           |  62 ++++++
 docs/reference/jira-integration.md        | 244 ++++++++++++++++------
 docs/tutorials/jira-automation.md         | 212 +++++++++++++++++--
 7 files changed, 595 insertions(+), 103 deletions(-)

diff --git a/crates/aof-triggers/src/lib.rs b/crates/aof-triggers/src/lib.rs
index 70a6c0e..454b632 100644
--- a/crates/aof-triggers/src/lib.rs
+++ b/crates/aof-triggers/src/lib.rs
@@ -54,6 +54,7 @@ pub use platforms::{
     TelegramConfig, TelegramPlatform,
     WhatsAppConfig, WhatsAppPlatform,
     GitHubConfig, GitHubPlatform,
+    JiraConfig, JiraPlatform,
     PagerDutyConfig, PagerDutyPlatform,
     TypedPlatformConfig,
     // Platform registry for extensibility
diff --git a/crates/aof-triggers/src/platforms/jira.rs b/crates/aof-triggers/src/platforms/jira.rs
index 88272b7..e816c59 100644
--- a/crates/aof-triggers/src/platforms/jira.rs
+++ b/crates/aof-triggers/src/platforms/jira.rs
@@ -431,8 +431,25 @@ impl JiraPlatform {
         Ok(Self { config, client })
     }
 
-    /// Verify HMAC-SHA256 signature from Jira webhook
+    /// Verify signature from Jira webhook
+    /// Supports multiple modes:
+    /// 1. HMAC-SHA256 signature (prefixed with "sha256=" or raw hex)
+    /// 2. Static shared secret (direct comparison for Jira Automation)
     fn verify_jira_signature(&self, payload: &[u8], signature: &str) -> bool {
+        // Strip common prefixes like "sha256=" or "sha1=" if present
+        let provided_signature = signature
+            .strip_prefix("sha256=")
+            .or_else(|| signature.strip_prefix("sha1="))
+            .unwrap_or(signature);
+
+        // Mode 1: Direct secret comparison (for Jira Automation static secrets)
+        // Jira Automation sends the secret value directly in the header
+        if provided_signature == self.config.webhook_secret {
+            debug!("Jira signature verified via direct secret match");
+            return true;
+        }
+
+        // Mode 2: HMAC-SHA256 verification (for computed signatures)
         let mut mac = match HmacSha256::new_from_slice(self.config.webhook_secret.as_bytes()) {
             Ok(m) => m,
             Err(e) => {
@@ -445,14 +462,14 @@ impl JiraPlatform {
         let result = mac.finalize();
         let computed_signature = hex::encode(result.into_bytes());
 
-        if computed_signature == signature {
-            debug!("Jira signature verified successfully");
+        if computed_signature == provided_signature {
+            debug!("Jira signature verified via HMAC-SHA256");
             true
         } else {
             debug!(
-                "Signature mismatch - computed: {}, provided: {}",
-                &computed_signature[..8],
-                &signature[..8.min(signature.len())]
+                "Signature mismatch - computed HMAC: {}..., provided: {}...",
+                &computed_signature[..8.min(computed_signature.len())],
+                &provided_signature[..8.min(provided_signature.len())]
             );
             false
         }
diff --git a/crates/aofctl/src/commands/serve.rs b/crates/aofctl/src/commands/serve.rs
index 9103f93..923e39d 100644
--- a/crates/aofctl/src/commands/serve.rs
+++ b/crates/aofctl/src/commands/serve.rs
@@ -18,6 +18,7 @@ use aof_triggers::{
     TelegramPlatform, TelegramConfig,
     WhatsAppPlatform, WhatsAppConfig,
     GitHubPlatform, GitHubConfig,
+    JiraPlatform, JiraConfig,
     CommandBinding as HandlerCommandBinding,
     flow::{FlowRegistry, FlowRouter},
 };
@@ -135,6 +136,9 @@ pub struct PlatformConfigs {
 
     /// WhatsApp configuration
     pub whatsapp: Option<WhatsAppPlatformConfig>,
+
+    /// Jira configuration
+    pub jira: Option<JiraPlatformConfig>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -203,6 +207,43 @@ pub struct WhatsAppPlatformConfig {
     pub app_secret: Option<String>,
 }
 
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct JiraPlatformConfig {
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+
+    /// Jira Cloud ID (for cloud instances)
+    pub cloud_id: Option<String>,
+    pub cloud_id_env: Option<String>,
+
+    /// Base URL (e.g., https://your-domain.atlassian.net)
+    pub base_url: Option<String>,
+
+    /// User email for API authentication
+    pub user_email: Option<String>,
+    pub user_email_env: Option<String>,
+
+    /// API token for authentication
+    pub api_token: Option<String>,
+    pub api_token_env: Option<String>,
+
+    /// Webhook secret for signature verification
+    pub webhook_secret: Option<String>,
+    pub webhook_secret_env: Option<String>,
+
+    /// Bot name for identification in comments
+    #[serde(default)]
+    pub bot_name: Option<String>,
+
+    /// Allowed project keys (whitelist)
+    #[serde(default)]
+    pub allowed_projects: Option<Vec<String>>,
+
+    /// Allowed event types (whitelist)
+    #[serde(default)]
+    pub allowed_events: Option<Vec<String>>,
+}
+
 #[derive(Debug, Clone, Default, Serialize, Deserialize)]
 pub struct AgentDiscoveryConfig {
     /// Directory containing agent YAML files
@@ -512,6 +553,65 @@ pub async fn execute(
         }
     }
 
+    // Jira
+    if let Some(jira_config) = &config.spec.platforms.jira {
+        if jira_config.enabled {
+            let api_token = resolve_env_value(
+                jira_config.api_token.as_deref(),
+                jira_config.api_token_env.as_deref(),
+            );
+            let user_email = resolve_env_value(
+                jira_config.user_email.as_deref(),
+                jira_config.user_email_env.as_deref(),
+            );
+            let webhook_secret = resolve_env_value(
+                jira_config.webhook_secret.as_deref(),
+                jira_config.webhook_secret_env.as_deref(),
+            );
+
+            // Build base URL from cloud_id or use provided base_url
+            let base_url = if let Some(ref url) = jira_config.base_url {
+                Some(url.clone())
+            } else {
+                let cloud_id = resolve_env_value(
+                    jira_config.cloud_id.as_deref(),
+                    jira_config.cloud_id_env.as_deref(),
+                );
+                cloud_id.map(|id| format!("https://api.atlassian.com/ex/jira/{}", id))
+            };
+
+            if let (Some(token), Some(email), Some(secret), Some(url)) =
+                (api_token, user_email, webhook_secret, base_url)
+            {
+                let platform_config = JiraConfig {
+                    base_url: url,
+                    email,
+                    api_token: token,
+                    webhook_secret: secret,
+                    bot_name: jira_config.bot_name.clone().unwrap_or_else(|| "aofbot".to_string()),
+                    allowed_projects: jira_config.allowed_projects.clone(),
+                    allowed_events: jira_config.allowed_events.clone(),
+                    allowed_users: None,
+                    enable_comments: true,
+                    enable_updates: true,
+                    enable_transitions: true,
+                };
+                match JiraPlatform::new(platform_config) {
+                    Ok(platform) => {
+                        handler.register_platform(Arc::new(platform));
+                        println!("  Registered platform: jira");
+                        platforms_registered += 1;
+                    }
+                    Err(e) => {
+                        eprintln!("  Failed to create Jira platform: {}", e);
+                    }
+                }
+            } else {
+                eprintln!("  Jira enabled but missing required config (api_token, user_email, webhook_secret, and base_url or cloud_id)");
+            }
+        }
+    }
+
     // Load Triggers from directory
     let triggers_dir_path = triggers_dir
         .map(PathBuf::from)
diff --git a/docs/concepts/jira-integration.md b/docs/concepts/jira-integration.md
index df66e86..d1dc7c5 100644
--- a/docs/concepts/jira-integration.md
+++ b/docs/concepts/jira-integration.md
@@ -404,39 +404,53 @@ spec:
 
 ```yaml
 # daemon.yaml
-apiVersion: aof.dev/v1alpha1
+apiVersion: aof.dev/v1
 kind: DaemonConfig
 metadata:
   name: aof-daemon
 
 spec:
   server:
-    host: 0.0.0.0
+    host: "0.0.0.0"
     port: 3000
+    cors: true
+    timeout_secs: 60
 
   platforms:
-    - type: Jira
-      config:
-        webhook_secret: ${JIRA_WEBHOOK_SECRET}
-        webhook_path: /webhook/jira  # Default path
+    jira:
+      enabled: true
+      # Use base_url for direct Atlassian URL (recommended)
+      base_url: https://your-domain.atlassian.net
+      # Authentication credentials via environment variables
+      user_email_env: JIRA_USER_EMAIL
+      api_token_env: JIRA_API_TOKEN
+      webhook_secret_env: JIRA_WEBHOOK_SECRET
+      bot_name: aofbot  # Optional: name displayed in comments
+
+      # Optional: Restrict to specific projects
+      allowed_projects:
+        - PROJ
+        - DEV
 
-        # Optional: Filter at platform level
-        allowed_projects:
-          - PROJ
-          - DEV
+  # Resource directories
+  triggers:
+    directory: "./triggers"
+    watch: true
 
-        # Jira API credentials for agent actions
-        api_config:
-          instance_url: ${JIRA_CLOUD_INSTANCE_URL}
-          user_email: ${JIRA_USER_EMAIL}
-          api_token: ${JIRA_API_TOKEN}
+  agents:
+    directory: "./agents"
 
   flows:
-    - path: flows/bug-triage.yaml
-    - path: flows/sprint-planning.yaml
-    - path: flows/standup-summary.yaml
+    directory: "./flows"
+    enabled: true
+
+  runtime:
+    max_concurrent_tasks: 10
+    task_timeout_secs: 300
 ```
 
+**Webhook endpoint**: `https://your-domain.com/webhook/jira`
+
 ### Trigger with Interactive Commands
 
 Enable `/analyze` style commands in Jira comments:
diff --git a/docs/reference/daemon-config.md b/docs/reference/daemon-config.md
index 9a069c8..718f0d3 100644
--- a/docs/reference/daemon-config.md
+++ b/docs/reference/daemon-config.md
@@ -25,6 +25,7 @@ spec:
     telegram: object
     discord: object
     whatsapp: object
+    jira: object
   agents:                   # Required: Agent discovery
     directory: string
   fleets:                   # Optional: Fleet discovery
@@ -167,6 +168,65 @@ spec:
       verify_token_env: WHATSAPP_VERIFY_TOKEN
 ```
 
+### Jira Platform
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `enabled` | bool | Yes | Enable Jira Cloud integration |
+| `base_url` | string | Yes* | Jira instance URL (e.g., `https://your-domain.atlassian.net`) |
+| `cloud_id_env` | string | Yes* | Env var for Jira Cloud ID (alternative to base_url) |
+| `user_email_env` | string | Yes | Env var for user email for API authentication |
+| `api_token_env` | string | Yes | Env var for API token |
+| `webhook_secret_env` | string | Yes | Env var for webhook secret (for signature verification) |
+| `bot_name` | string | No | Bot name for comments (default: "aofbot") |
+| `allowed_projects` | array | No | Project keys allowed to trigger (whitelist) |
+| `allowed_events` | array | No | Event types to handle (whitelist) |
+
+*Either `base_url` or `cloud_id_env` must be provided.
+
+**Supported Events:**
+- `jira:issue_created` - Issue created
+- `jira:issue_updated` - Issue updated
+- `jira:issue_deleted` - Issue deleted
+- `comment_created` - Comment added
+- `comment_updated` - Comment updated
+- `comment_deleted` - Comment deleted
+- `sprint_started` - Sprint started
+- `sprint_closed` - Sprint closed
+- `worklog_created` - Work logged
+- `worklog_updated` - Worklog updated
+
+**Example:**
+```yaml
+spec:
+  platforms:
+    jira:
+      enabled: true
+      base_url: https://your-domain.atlassian.net
+      user_email_env: JIRA_USER_EMAIL
+      api_token_env: JIRA_API_TOKEN
+      webhook_secret_env: JIRA_WEBHOOK_SECRET
+      bot_name: aof-automation
+
+      # Optional: Restrict to specific projects
+      allowed_projects:
+        - SCRUM
+        - OPS
+
+      # Optional: Only handle these events
+      allowed_events:
+        - jira:issue_created
+        - jira:issue_updated
+        - comment_created
+```
+
+**Setting up Jira Automation webhook URL:**
+
+Configure your Jira Automation rules to POST to:
+```
+https://your-domain/webhook/jira
+```
+
 ---
 
 ## Agent Discovery
@@ -379,6 +439,7 @@ DaemonConfig references environment variables for sensitive data. Never hardcode
 | Telegram | `TELEGRAM_BOT_TOKEN` |
 | Discord | `DISCORD_BOT_TOKEN`, `DISCORD_APPLICATION_ID` |
 | WhatsApp | `WHATSAPP_PHONE_NUMBER_ID`, `WHATSAPP_ACCESS_TOKEN`, `WHATSAPP_VERIFY_TOKEN` |
+| Jira | `JIRA_USER_EMAIL`, `JIRA_API_TOKEN`, `JIRA_WEBHOOK_SECRET` (+ `JIRA_CLOUD_ID` or `base_url` in config) |
 
 **LLM API keys:**
 | Provider | Variable |
@@ -478,6 +539,7 @@ The server exposes these endpoints for each platform:
 | GitHub | `https://your-domain/webhook/github` |
 | GitLab | `https://your-domain/webhook/gitlab` |
 | Bitbucket | `https://your-domain/webhook/bitbucket` |
+| Jira | `https://your-domain/webhook/jira` |
 
 ---
 
diff --git a/docs/reference/jira-integration.md b/docs/reference/jira-integration.md
index a784eb1..869ce43 100644
--- a/docs/reference/jira-integration.md
+++ b/docs/reference/jira-integration.md
@@ -75,13 +75,25 @@ spec:
   platforms:
     jira:
       enabled: true
-      base_url: https://yourcompany.atlassian.net  # Jira Cloud URL
-      auth:
-        type: api_token                            # api_token, oauth2, or pat
-        email_env: JIRA_EMAIL                      # For API token auth
-        token_env: JIRA_API_TOKEN
+      # Use base_url for direct Atlassian URL (recommended)
+      base_url: https://yourcompany.atlassian.net
+      # Or use cloud_id_env for Cloud ID based URL construction
+      # cloud_id_env: JIRA_CLOUD_ID
+      user_email_env: JIRA_USER_EMAIL
+      api_token_env: JIRA_API_TOKEN
       webhook_secret_env: JIRA_WEBHOOK_SECRET
-      bot_name: aofbot                             # Optional: for @mentions
+      bot_name: aofbot  # Optional: name for comments
+
+      # Optional: Restrict to specific projects
+      allowed_projects:
+        - PROJ
+        - DEV
+
+      # Optional: Filter by event types
+      allowed_events:
+        - jira:issue_created
+        - jira:issue_updated
+        - comment_created
 
   # Resource discovery
   triggers:
@@ -103,70 +115,43 @@ spec:
     task_timeout_secs: 300
 ```
 
+**Webhook endpoint**: `https://your-domain.com/webhook/jira`
+
+> **Important**: When configuring Jira automation rules, use the full URL with `/webhook/jira` path, not just the base domain.
+
 ### Platform Configuration Fields
 
 | Field | Type | Required | Description |
 |-------|------|----------|-------------|
 | `enabled` | bool | Yes | Enable Jira webhook endpoint (`/webhook/jira`) |
-| `base_url` | string | Yes | Jira instance URL (Cloud or self-hosted) |
-| `auth.type` | string | Yes | Authentication type: `api_token`, `oauth2`, or `pat` |
-| `auth.email_env` | string | Conditional | Required for `api_token` auth |
-| `auth.token_env` | string | Yes | Environment variable name for token/PAT |
+| `base_url` | string | Yes* | Jira instance URL (e.g., `https://your-domain.atlassian.net`) |
+| `cloud_id_env` | string | Yes* | Environment variable for Jira Cloud ID (alternative to base_url) |
+| `user_email_env` | string | Yes | Environment variable name for user email |
+| `api_token_env` | string | Yes | Environment variable name for API token |
 | `webhook_secret_env` | string | Yes | Environment variable name for webhook secret |
-| `bot_name` | string | No | Bot name for @mentions (default: "aofbot") |
-
-#### Authentication Types
-
-**API Token (Recommended for Cloud):**
-```yaml
-auth:
-  type: api_token
-  email_env: JIRA_EMAIL
-  token_env: JIRA_API_TOKEN
-```
-
-**Personal Access Token (Server/Data Center):**
-```yaml
-auth:
-  type: pat
-  token_env: JIRA_PAT
-```
+| `bot_name` | string | No | Bot name for comments (default: "aofbot") |
+| `allowed_projects` | array | No | Project keys allowed to trigger (whitelist) |
+| `allowed_events` | array | No | Event types to handle (whitelist) |
 
-**OAuth 2.0 (Advanced):**
-```yaml
-auth:
-  type: oauth2
-  token_env: JIRA_OAUTH_TOKEN
-  # Additional OAuth config...
-```
+*Either `base_url` or `cloud_id_env` must be provided.
 
 ### Self-Hosted Jira Configuration
 
-For Jira Server or Data Center deployments:
+For Jira Server or Data Center deployments, use `base_url` pointing to your internal instance:
 
 ```yaml
 platforms:
   jira:
     enabled: true
     base_url: https://jira.yourcompany.com  # Self-hosted URL
-    auth:
-      type: pat
-      token_env: JIRA_PAT
+    user_email_env: JIRA_USER_EMAIL
+    api_token_env: JIRA_API_TOKEN  # Use PAT for Server/DC
     webhook_secret_env: JIRA_WEBHOOK_SECRET
-
-    # Optional: Proxy configuration
-    proxy:
-      http_proxy: http://proxy.company.com:8080
-      https_proxy: https://proxy.company.com:8080
-      no_proxy: localhost,127.0.0.1
-
-    # Optional: TLS configuration
-    tls:
-      verify: true
-      ca_cert_path: /etc/ssl/certs/company-ca.pem
 ```
 
-> **Note**: Event filtering, project filtering, and command routing are configured in **Trigger** files, not in DaemonConfig. This separation keeps daemon config minimal and allows per-trigger customization.
+> **Note**: For Jira Server/Data Center, create a Personal Access Token (PAT) instead of an API token. The configuration is the same - just store the PAT in `JIRA_API_TOKEN`.
+
+> **Note**: Event filtering, project filtering, and command routing can also be configured in **Trigger** files for per-trigger customization.
 
 ### Trigger Configuration
 
@@ -1338,7 +1323,140 @@ spec:
 
 ## Webhook Setup
 
-### 1. Create Webhook in Jira
+There are two ways to configure Jira webhooks:
+
+### Option A: Jira Automation Rules (Project-Level)
+
+Use this method if you don't have Jira admin access or want per-project control.
+
+#### 1. Create Automation Rule
+
+1. Go to your Jira project
+2. Navigate to **Project Settings** → **Automation**
+3. Click **Create rule**
+4. Choose a trigger (e.g., **When: Issue created**)
+5. Add action → **Send web request**
+
+#### 2. Configure Web Request
+
+**URL**: `https://your-domain.com/webhook/jira`
+
+**HTTP method**: `POST`
+
+**Headers**:
+| Key | Value |
+|-----|-------|
+| `Content-Type` | `application/json` |
+| `X-Hub-Signature` | `<your JIRA_WEBHOOK_SECRET value>` |
+
+**Web request body**: Select **Custom data** and use a payload template.
+
+#### 3. Payload Templates
+
+**Issue Created/Updated:**
+```json
+{
+  "webhookEvent": "jira:issue_created",
+  "timestamp": {{now.asLong}},
+  "issue": {
+    "id": "{{issue.id}}",
+    "key": "{{issue.key}}",
+    "fields": {
+      "summary": "{{issue.summary}}",
+      "description": "{{issue.description}}",
+      "issuetype": { "name": "{{issue.issueType.name}}" },
+      "status": { "name": "{{issue.status.name}}" },
+      "priority": { "name": "{{issue.priority.name}}" },
+      "project": {
+        "key": "{{issue.project.key}}",
+        "name": "{{issue.project.name}}"
+      },
+      "assignee": {
+        "displayName": "{{issue.assignee.displayName}}",
+        "accountId": "{{issue.assignee.accountId}}"
+      },
+      "reporter": {
+        "displayName": "{{issue.reporter.displayName}}",
+        "accountId": "{{issue.reporter.accountId}}"
+      },
+      "labels": {{issue.labels.asJsonArray}}
+    }
+  },
+  "user": {
+    "accountId": "{{initiator.accountId}}",
+    "displayName": "{{initiator.displayName}}"
+  }
+}
+```
+
+**Comment Created:**
+```json
+{
+  "webhookEvent": "comment_created",
+  "timestamp": {{now.asLong}},
+  "issue": {
+    "id": "{{issue.id}}",
+    "key": "{{issue.key}}",
+    "fields": {
+      "summary": "{{issue.summary}}",
+      "project": {
+        "key": "{{issue.project.key}}",
+        "name": "{{issue.project.name}}"
+      }
+    }
+  },
+  "comment": {
+    "id": "{{comment.id}}",
+    "body": "{{comment.body}}",
+    "author": {
+      "accountId": "{{comment.author.accountId}}",
+      "displayName": "{{comment.author.displayName}}"
+    }
+  },
+  "user": {
+    "accountId": "{{initiator.accountId}}",
+    "displayName": "{{initiator.displayName}}"
+  }
+}
+```
+
+**Work Logged:**
+```json
+{
+  "webhookEvent": "worklog_created",
+  "timestamp": {{now.asLong}},
+  "issue": {
+    "id": "{{issue.id}}",
+    "key": "{{issue.key}}",
+    "fields": {
+      "summary": "{{issue.summary}}",
+      "issuetype": { "name": "{{issue.issueType.name}}" },
+      "status": { "name": "{{issue.status.name}}" },
+      "priority": { "name": "{{issue.priority.name}}" },
+      "project": {
+        "key": "{{issue.project.key}}",
+        "name": "{{issue.project.name}}"
+      }
+    }
+  },
+  "user": {
+    "accountId": "{{initiator.accountId}}",
+    "displayName": "{{initiator.displayName}}"
+  }
+}
+```
+
+> **Important**: The `{{...}}` placeholders are Jira smart values. They get replaced with actual data when the webhook fires.
+
+#### 4. Signature Verification
+
+Jira Automation sends the `X-Hub-Signature` header value as a **static secret** (not computed HMAC). Your `JIRA_WEBHOOK_SECRET` environment variable must **exactly match** the value you configure in the header.
+
+---
+
+### Option B: System Webhooks (Admin Only)
+
+Use this method if you have Jira admin access. System webhooks automatically include complete payloads.
 
 #### Jira Cloud
 
@@ -1348,8 +1466,8 @@ spec:
    - **Name**: AOF Automation
    - **Status**: Enabled
    - **URL**: `https://your-domain.com/webhook/jira`
-   - **Secret**: Your `JIRA_WEBHOOK_SECRET` value
-   - **Events**: Select desired events or check "All issues"
+   - **Secret**: Your `JIRA_WEBHOOK_SECRET` value (enables HMAC verification)
+   - **Events**: Select desired events
    - **Exclude body**: Uncheck (AOF needs full payload)
 
 #### Jira Server/Data Center
@@ -1358,7 +1476,9 @@ spec:
 2. Create webhook with same configuration as Cloud
 3. Ensure firewall allows webhook traffic to AOF daemon
 
-### 2. Expose Endpoint
+---
+
+### Expose Endpoint
 
 **For production:**
 ```bash
@@ -1380,19 +1500,19 @@ ngrok http 3000
 
 Use tunnel URL as webhook URL in Jira.
 
-### 3. Verify Webhook
+### Verify Webhook
 
-1. Test webhook in Jira webhook settings
-2. Check webhook delivery logs in Jira
-3. Verify AOF logs show received event
+1. Test webhook using Jira's "Validate" button (Automation) or delivery logs (System webhooks)
+2. Check AOF daemon logs for received events
 
 ```bash
 # Check logs
-tail -f /var/log/aof/daemon.log
+RUST_LOG=debug aofctl serve --config daemon.yaml
 
 # Look for:
-# INFO  Jira webhook received: issue_created
-# INFO  Posted comment to PROJ-123
+# INFO  Received webhook for platform: jira
+# DEBUG Jira signature verified via direct secret match
+# INFO  Processing event: jira:issue_created
 ```
 
 ---
diff --git a/docs/tutorials/jira-automation.md b/docs/tutorials/jira-automation.md
index 0fd0cea..215631c 100644
--- a/docs/tutorials/jira-automation.md
+++ b/docs/tutorials/jira-automation.md
@@ -406,10 +406,19 @@ spec:
   platforms:
     jira:
       enabled: true
-      cloud_id_env: JIRA_CLOUD_ID
+      # Use base_url for direct Atlassian URL (recommended)
+      base_url: https://your-domain.atlassian.net
+      # Or use cloud_id_env for Cloud ID based URL construction
+      # cloud_id_env: JIRA_CLOUD_ID
       user_email_env: JIRA_USER_EMAIL
       api_token_env: JIRA_API_TOKEN
       webhook_secret_env: JIRA_WEBHOOK_SECRET
+      bot_name: aof-automation  # Optional: name for comments
+
+      # Optional: Restrict to specific projects
+      # allowed_projects:
+      #   - PROJ
+      #   - DEV
 
   # Resource directories
   triggers:
@@ -425,7 +434,9 @@ spec:
     task_timeout_secs: 300
 ```
 
-**Webhook endpoint**: `http://your-domain:3000/webhook/jira`
+**Webhook endpoint**: `https://your-domain.com/webhook/jira`
+
+> **Important**: Configure your Jira automation rules to POST to `/webhook/jira`, not just the base URL.
 
 ## Step 9: Start the AOF Daemon
 
@@ -458,25 +469,192 @@ Deploy to a server with HTTPS:
 # Webhook URL: https://aof.example.com/webhook/jira
 ```
 
-## Step 11: Configure Jira Webhook
+## Step 11: Configure Jira Automation Webhook
+
+Jira Automation requires you to explicitly configure the webhook body. Here's how:
+
+### Creating the Automation Rule
 
 1. Go to your Jira project
 2. Navigate to **Project Settings** → **Automation**
-3. Click **Create rule** → **When: Issue created**
-4. Add action → **Send web request**
-5. Configure:
+3. Click **Create rule**
+4. Choose a trigger (e.g., **When: Issue created**)
+5. Add action → **Send web request**
+
+### Configuring the Web Request
+
+**URL**:
+```
+https://your-domain.com/webhook/jira
+```
+
+**HTTP method**: `POST`
+
+**Headers** (click "Add another header"):
+
+| Key | Value |
+|-----|-------|
+| `Content-Type` | `application/json` |
+| `X-Hub-Signature` | `<your JIRA_WEBHOOK_SECRET value>` |
+
+**Web request body**: Select **Custom data** and paste the appropriate template below.
+
+### Payload Templates by Event Type
+
+#### Issue Created / Issue Updated
+
+```json
+{
+  "webhookEvent": "jira:issue_created",
+  "timestamp": {{now.asLong}},
+  "issue": {
+    "id": "{{issue.id}}",
+    "key": "{{issue.key}}",
+    "fields": {
+      "summary": "{{issue.summary}}",
+      "description": "{{issue.description}}",
+      "issuetype": {
+        "name": "{{issue.issueType.name}}"
+      },
+      "status": {
+        "name": "{{issue.status.name}}"
+      },
+      "priority": {
+        "name": "{{issue.priority.name}}"
+      },
+      "project": {
+        "key": "{{issue.project.key}}",
+        "name": "{{issue.project.name}}"
+      },
+      "assignee": {
+        "displayName": "{{issue.assignee.displayName}}",
+        "accountId": "{{issue.assignee.accountId}}"
+      },
+      "reporter": {
+        "displayName": "{{issue.reporter.displayName}}",
+        "accountId": "{{issue.reporter.accountId}}"
+      },
+      "labels": {{issue.labels.asJsonArray}}
+    }
+  },
+  "user": {
+    "accountId": "{{initiator.accountId}}",
+    "displayName": "{{initiator.displayName}}"
+  }
+}
+```
+
+> **Note**: Change `"webhookEvent": "jira:issue_created"` to `"jira:issue_updated"` for update triggers.
+
+#### Comment Created
+
+```json
+{
+  "webhookEvent": "comment_created",
+  "timestamp": {{now.asLong}},
+  "issue": {
+    "id": "{{issue.id}}",
+    "key": "{{issue.key}}",
+    "fields": {
+      "summary": "{{issue.summary}}",
+      "project": {
+        "key": "{{issue.project.key}}",
+        "name": "{{issue.project.name}}"
+      }
+    }
+  },
+  "comment": {
+    "id": "{{comment.id}}",
+    "body": "{{comment.body}}",
+    "author": {
+      "accountId": "{{comment.author.accountId}}",
+      "displayName": "{{comment.author.displayName}}"
+    }
+  },
+  "user": {
+    "accountId": "{{initiator.accountId}}",
+    "displayName": "{{initiator.displayName}}"
+  }
+}
+```
+
+#### Work Logged
+
+```json
+{
+  "webhookEvent": "worklog_created",
+  "timestamp": {{now.asLong}},
+  "issue": {
+    "id": "{{issue.id}}",
+    "key": "{{issue.key}}",
+    "fields": {
+      "summary": "{{issue.summary}}",
+      "description": "{{issue.description}}",
+      "issuetype": {
+        "name": "{{issue.issueType.name}}"
+      },
+      "status": {
+        "name": "{{issue.status.name}}"
+      },
+      "priority": {
+        "name": "{{issue.priority.name}}"
+      },
+      "project": {
+        "key": "{{issue.project.key}}",
+        "name": "{{issue.project.name}}"
+      }
+    }
+  },
+  "user": {
+    "accountId": "{{initiator.accountId}}",
+    "displayName": "{{initiator.displayName}}"
+  }
+}
+```
+
+#### Sprint Started / Sprint Closed
+
+```json
+{
+  "webhookEvent": "sprint_started",
+  "timestamp": {{now.asLong}},
+  "sprint": {
+    "id": {{sprint.id}},
+    "name": "{{sprint.name}}",
+    "state": "{{sprint.state}}",
+    "goal": "{{sprint.goal}}"
+  },
+  "user": {
+    "accountId": "{{initiator.accountId}}",
+    "displayName": "{{initiator.displayName}}"
+  }
+}
+```
+
+### Important Notes
+
+1. **The `X-Hub-Signature` header value must exactly match your `JIRA_WEBHOOK_SECRET` environment variable** (case-sensitive)
+
+2. **Jira Automation sends a static secret**, not a computed HMAC signature. AOF supports both modes.
+
+3. **Smart values**: The `{{...}}` placeholders are Jira smart values that get replaced with actual data when the webhook fires.
+
+4. **Test your webhook**: After saving, use Jira's "Validate" button to test the configuration.
+
+### Alternative: System Webhooks (Admin Only)
+
+If you have Jira admin access, you can use built-in webhooks which automatically include full payloads:
+
+1. Go to **Settings** → **System** → **WebHooks**
+2. Click **Create a WebHook**
+3. Configure:
+   - **Name**: AOF Integration
    - **URL**: `https://your-domain.com/webhook/jira`
-   - **Headers**: Add `X-Hub-Signature` with webhook secret
-   - **HTTP method**: POST
-   - **Webhook body**: Issue data
-   - **Events**: Issue created, Issue updated
-6. Click **Turn it on**
-
-**Alternative (Jira Cloud)**:
-- Settings → System → Webhooks → Create Webhook
-- URL: `https://your-domain.com/webhook/jira`
-- Events: Issue created, updated, commented
-- Secret: Your webhook secret
+   - **Secret**: Your webhook secret (for HMAC verification)
+   - **Events**: Select desired events
+4. Click **Create**
+
+System webhooks automatically send complete payloads without manual body configuration.
 
 ## Step 12: Test Bug Triage
 

From 2f395a9ac0ad7cea82448cc8c2698a8f0f840c0a Mon Sep 17 00:00:00 2001
From: Gopal <gopal@Gopals-MacBook-Air.local>
Date: Sat, 3 Jan 2026 16:33:50 +0530
Subject: [PATCH 05/14] fix: Make Jira webhook payload fields optional for
 Automation compatibility
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Make id fields optional in JiraIssueType, JiraProject, JiraStatus,
  JiraPriority, and JiraStatusCategory structs
- Make self_url optional in JiraIssue and JiraComment structs
- Make created field optional in JiraComment
- Add debug logging for raw webhook payload
- Support both direct secret match and HMAC signature verification

These changes allow AOF to accept simpler webhook payloads from Jira
Automation rules, which don't include all the fields that Jira's
built-in system webhooks provide.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 crates/aof-triggers/src/platforms/jira.rs | 40 ++++++++++++++++-------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/crates/aof-triggers/src/platforms/jira.rs b/crates/aof-triggers/src/platforms/jira.rs
index e816c59..320dd26 100644
--- a/crates/aof-triggers/src/platforms/jira.rs
+++ b/crates/aof-triggers/src/platforms/jira.rs
@@ -229,7 +229,8 @@ pub struct JiraIssueFields {
 /// Jira issue type
 #[derive(Debug, Clone, Deserialize)]
 pub struct JiraIssueType {
-    pub id: String,
+    #[serde(default)]
+    pub id: Option<String>,
     pub name: String,
     #[serde(default)]
     pub description: Option<String>,
@@ -238,7 +239,8 @@ pub struct JiraIssueType {
 /// Jira project information
 #[derive(Debug, Clone, Deserialize)]
 pub struct JiraProject {
-    pub id: String,
+    #[serde(default)]
+    pub id: Option<String>,
     pub key: String,
     pub name: String,
 }
@@ -247,7 +249,8 @@ pub struct JiraProject {
 #[derive(Debug, Clone, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct JiraStatus {
-    pub id: String,
+    #[serde(default)]
+    pub id: Option<String>,
     pub name: String,
     #[serde(default)]
     pub status_category: Option<JiraStatusCategory>,
@@ -257,15 +260,18 @@ pub struct JiraStatus {
 #[derive(Debug, Clone, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct JiraStatusCategory {
-    pub id: i64,
-    pub key: String,
+    #[serde(default)]
+    pub id: Option<i64>,
+    #[serde(default)]
+    pub key: Option<String>,
     pub name: String,
 }
 
 /// Jira priority
 #[derive(Debug, Clone, Deserialize)]
 pub struct JiraPriority {
-    pub id: String,
+    #[serde(default)]
+    pub id: Option<String>,
     pub name: String,
 }
 
@@ -274,8 +280,8 @@ pub struct JiraPriority {
 pub struct JiraIssue {
     pub id: String,
     pub key: String,
-    #[serde(rename = "self")]
-    pub self_url: String,
+    #[serde(rename = "self", default)]
+    pub self_url: Option<String>,
     pub fields: JiraIssueFields,
 }
 
@@ -283,15 +289,17 @@ pub struct JiraIssue {
 #[derive(Debug, Clone, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct JiraComment {
-    pub id: String,
-    #[serde(rename = "self")]
-    pub self_url: String,
+    #[serde(default)]
+    pub id: Option<String>,
+    #[serde(rename = "self", default)]
+    pub self_url: Option<String>,
     pub body: String,
     #[serde(default)]
     pub author: Option<JiraUser>,
     #[serde(default)]
     pub update_author: Option<JiraUser>,
-    pub created: String,
+    #[serde(default)]
+    pub created: Option<String>,
     #[serde(default)]
     pub updated: Option<String>,
 }
@@ -884,6 +892,14 @@ impl TriggerPlatform for JiraPlatform {
         raw: &[u8],
         headers: &HashMap<String, String>,
     ) -> Result<TriggerMessage, PlatformError> {
+        // Log raw payload for debugging
+        if let Ok(raw_str) = std::str::from_utf8(raw) {
+            debug!("Jira webhook raw payload ({} bytes): {}", raw.len(),
+                   if raw_str.len() > 500 { &raw_str[..500] } else { raw_str });
+        } else {
+            debug!("Jira webhook raw payload ({} bytes): <binary>", raw.len());
+        }
+
         // Verify signature if present
         if let Some(signature) = headers.get("x-hub-signature") {
             if !self.verify_jira_signature(raw, signature) {

From a7f7fe47086c2ef8bd3f859e1ddfe80cff181aeb Mon Sep 17 00:00:00 2001
From: Gopal <gopal@Gopals-MacBook-Air.local>
Date: Sat, 3 Jan 2026 16:42:14 +0530
Subject: [PATCH 06/14] docs: Update Jira webhook payload templates with
 minimal examples and curl testing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add minimal payload templates for quick testing
- Add full payload templates with all fields for production use
- Add curl testing section for debugging webhooks without Jira
- Simplify field requirements documentation
- Document signature header for Jira Automation rules

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/reference/jira-integration.md |  83 ++++++++----------
 docs/tutorials/jira-automation.md  | 136 +++++++++++++++--------------
 2 files changed, 108 insertions(+), 111 deletions(-)

diff --git a/docs/reference/jira-integration.md b/docs/reference/jira-integration.md
index 869ce43..0fcdc01 100644
--- a/docs/reference/jira-integration.md
+++ b/docs/reference/jira-integration.md
@@ -1353,39 +1353,24 @@ Use this method if you don't have Jira admin access or want per-project control.
 
 #### 3. Payload Templates
 
-**Issue Created/Updated:**
+AOF accepts flexible payloads - most fields are optional. Use minimal templates or add more fields as needed.
+
+**Issue Created/Updated (Minimal):**
 ```json
 {
   "webhookEvent": "jira:issue_created",
-  "timestamp": {{now.asLong}},
+  "timestamp": {{now.epochMillis}},
   "issue": {
     "id": "{{issue.id}}",
     "key": "{{issue.key}}",
     "fields": {
       "summary": "{{issue.summary}}",
-      "description": "{{issue.description}}",
       "issuetype": { "name": "{{issue.issueType.name}}" },
       "status": { "name": "{{issue.status.name}}" },
-      "priority": { "name": "{{issue.priority.name}}" },
-      "project": {
-        "key": "{{issue.project.key}}",
-        "name": "{{issue.project.name}}"
-      },
-      "assignee": {
-        "displayName": "{{issue.assignee.displayName}}",
-        "accountId": "{{issue.assignee.accountId}}"
-      },
-      "reporter": {
-        "displayName": "{{issue.reporter.displayName}}",
-        "accountId": "{{issue.reporter.accountId}}"
-      },
-      "labels": {{issue.labels.asJsonArray}}
+      "project": { "key": "{{issue.project.key}}", "name": "{{issue.project.name}}" }
     }
   },
-  "user": {
-    "accountId": "{{initiator.accountId}}",
-    "displayName": "{{initiator.displayName}}"
-  }
+  "user": { "accountId": "{{initiator.accountId}}", "displayName": "{{initiator.displayName}}" }
 }
 ```
 
@@ -1393,30 +1378,20 @@ Use this method if you don't have Jira admin access or want per-project control.
 ```json
 {
   "webhookEvent": "comment_created",
-  "timestamp": {{now.asLong}},
+  "timestamp": {{now.epochMillis}},
   "issue": {
     "id": "{{issue.id}}",
     "key": "{{issue.key}}",
     "fields": {
       "summary": "{{issue.summary}}",
-      "project": {
-        "key": "{{issue.project.key}}",
-        "name": "{{issue.project.name}}"
-      }
+      "project": { "key": "{{issue.project.key}}", "name": "{{issue.project.name}}" }
     }
   },
   "comment": {
-    "id": "{{comment.id}}",
     "body": "{{comment.body}}",
-    "author": {
-      "accountId": "{{comment.author.accountId}}",
-      "displayName": "{{comment.author.displayName}}"
-    }
+    "author": { "accountId": "{{comment.author.accountId}}", "displayName": "{{comment.author.displayName}}" }
   },
-  "user": {
-    "accountId": "{{initiator.accountId}}",
-    "displayName": "{{initiator.displayName}}"
-  }
+  "user": { "accountId": "{{initiator.accountId}}", "displayName": "{{initiator.displayName}}" }
 }
 ```
 
@@ -1424,7 +1399,7 @@ Use this method if you don't have Jira admin access or want per-project control.
 ```json
 {
   "webhookEvent": "worklog_created",
-  "timestamp": {{now.asLong}},
+  "timestamp": {{now.epochMillis}},
   "issue": {
     "id": "{{issue.id}}",
     "key": "{{issue.key}}",
@@ -1432,22 +1407,40 @@ Use this method if you don't have Jira admin access or want per-project control.
       "summary": "{{issue.summary}}",
       "issuetype": { "name": "{{issue.issueType.name}}" },
       "status": { "name": "{{issue.status.name}}" },
-      "priority": { "name": "{{issue.priority.name}}" },
-      "project": {
-        "key": "{{issue.project.key}}",
-        "name": "{{issue.project.name}}"
-      }
+      "project": { "key": "{{issue.project.key}}", "name": "{{issue.project.name}}" }
     }
   },
-  "user": {
-    "accountId": "{{initiator.accountId}}",
-    "displayName": "{{initiator.displayName}}"
-  }
+  "user": { "accountId": "{{initiator.accountId}}", "displayName": "{{initiator.displayName}}" }
 }
 ```
 
 > **Important**: The `{{...}}` placeholders are Jira smart values. They get replaced with actual data when the webhook fires.
 
+#### Testing with curl
+
+Test the endpoint before configuring Jira:
+
+```bash
+curl -X POST https://your-domain.com/webhook/jira \
+  -H "Content-Type: application/json" \
+  -H "X-Hub-Signature: YOUR_SECRET_HERE" \
+  -d '{
+    "webhookEvent": "jira:issue_created",
+    "timestamp": 1735897519000,
+    "issue": {
+      "id": "10005",
+      "key": "PROJ-123",
+      "fields": {
+        "summary": "Test issue",
+        "issuetype": { "name": "Bug" },
+        "status": { "name": "To Do" },
+        "project": { "key": "PROJ", "name": "My Project" }
+      }
+    },
+    "user": { "accountId": "test", "displayName": "Test User" }
+  }'
+```
+
 #### 4. Signature Verification
 
 Jira Automation sends the `X-Hub-Signature` header value as a **static secret** (not computed HMAC). Your `JIRA_WEBHOOK_SECRET` environment variable must **exactly match** the value you configure in the header.
diff --git a/docs/tutorials/jira-automation.md b/docs/tutorials/jira-automation.md
index 215631c..eeca0d8 100644
--- a/docs/tutorials/jira-automation.md
+++ b/docs/tutorials/jira-automation.md
@@ -501,46 +501,49 @@ https://your-domain.com/webhook/jira
 
 ### Payload Templates by Event Type
 
-#### Issue Created / Issue Updated
+AOF accepts flexible payloads - most fields are optional. Use the minimal templates below, or add more fields as needed.
+
+#### Issue Created / Issue Updated (Minimal)
+
+```json
+{
+  "webhookEvent": "jira:issue_created",
+  "timestamp": {{now.epochMillis}},
+  "issue": {
+    "id": "{{issue.id}}",
+    "key": "{{issue.key}}",
+    "fields": {
+      "summary": "{{issue.summary}}",
+      "issuetype": { "name": "{{issue.issueType.name}}" },
+      "status": { "name": "{{issue.status.name}}" },
+      "project": { "key": "{{issue.project.key}}", "name": "{{issue.project.name}}" }
+    }
+  },
+  "user": { "accountId": "{{initiator.accountId}}", "displayName": "{{initiator.displayName}}" }
+}
+```
+
+#### Issue Created / Issue Updated (Full)
 
 ```json
 {
   "webhookEvent": "jira:issue_created",
-  "timestamp": {{now.asLong}},
+  "timestamp": {{now.epochMillis}},
   "issue": {
     "id": "{{issue.id}}",
     "key": "{{issue.key}}",
     "fields": {
       "summary": "{{issue.summary}}",
       "description": "{{issue.description}}",
-      "issuetype": {
-        "name": "{{issue.issueType.name}}"
-      },
-      "status": {
-        "name": "{{issue.status.name}}"
-      },
-      "priority": {
-        "name": "{{issue.priority.name}}"
-      },
-      "project": {
-        "key": "{{issue.project.key}}",
-        "name": "{{issue.project.name}}"
-      },
-      "assignee": {
-        "displayName": "{{issue.assignee.displayName}}",
-        "accountId": "{{issue.assignee.accountId}}"
-      },
-      "reporter": {
-        "displayName": "{{issue.reporter.displayName}}",
-        "accountId": "{{issue.reporter.accountId}}"
-      },
-      "labels": {{issue.labels.asJsonArray}}
+      "issuetype": { "name": "{{issue.issueType.name}}" },
+      "status": { "name": "{{issue.status.name}}" },
+      "priority": { "name": "{{issue.priority.name}}" },
+      "project": { "key": "{{issue.project.key}}", "name": "{{issue.project.name}}" },
+      "assignee": { "displayName": "{{issue.assignee.displayName}}", "accountId": "{{issue.assignee.accountId}}" },
+      "reporter": { "displayName": "{{issue.reporter.displayName}}", "accountId": "{{issue.reporter.accountId}}" }
     }
   },
-  "user": {
-    "accountId": "{{initiator.accountId}}",
-    "displayName": "{{initiator.displayName}}"
-  }
+  "user": { "accountId": "{{initiator.accountId}}", "displayName": "{{initiator.displayName}}" }
 }
 ```
 
@@ -551,30 +554,20 @@ https://your-domain.com/webhook/jira
 ```json
 {
   "webhookEvent": "comment_created",
-  "timestamp": {{now.asLong}},
+  "timestamp": {{now.epochMillis}},
   "issue": {
     "id": "{{issue.id}}",
     "key": "{{issue.key}}",
     "fields": {
       "summary": "{{issue.summary}}",
-      "project": {
-        "key": "{{issue.project.key}}",
-        "name": "{{issue.project.name}}"
-      }
+      "project": { "key": "{{issue.project.key}}", "name": "{{issue.project.name}}" }
     }
   },
   "comment": {
-    "id": "{{comment.id}}",
     "body": "{{comment.body}}",
-    "author": {
-      "accountId": "{{comment.author.accountId}}",
-      "displayName": "{{comment.author.displayName}}"
-    }
+    "author": { "accountId": "{{comment.author.accountId}}", "displayName": "{{comment.author.displayName}}" }
   },
-  "user": {
-    "accountId": "{{initiator.accountId}}",
-    "displayName": "{{initiator.displayName}}"
-  }
+  "user": { "accountId": "{{initiator.accountId}}", "displayName": "{{initiator.displayName}}" }
 }
 ```
 
@@ -583,32 +576,19 @@ https://your-domain.com/webhook/jira
 ```json
 {
   "webhookEvent": "worklog_created",
-  "timestamp": {{now.asLong}},
+  "timestamp": {{now.epochMillis}},
   "issue": {
     "id": "{{issue.id}}",
     "key": "{{issue.key}}",
     "fields": {
       "summary": "{{issue.summary}}",
-      "description": "{{issue.description}}",
-      "issuetype": {
-        "name": "{{issue.issueType.name}}"
-      },
-      "status": {
-        "name": "{{issue.status.name}}"
-      },
-      "priority": {
-        "name": "{{issue.priority.name}}"
-      },
-      "project": {
-        "key": "{{issue.project.key}}",
-        "name": "{{issue.project.name}}"
-      }
+      "issuetype": { "name": "{{issue.issueType.name}}" },
+      "status": { "name": "{{issue.status.name}}" },
+      "priority": { "name": "{{issue.priority.name}}" },
+      "project": { "key": "{{issue.project.key}}", "name": "{{issue.project.name}}" }
     }
   },
-  "user": {
-    "accountId": "{{initiator.accountId}}",
-    "displayName": "{{initiator.displayName}}"
-  }
+  "user": { "accountId": "{{initiator.accountId}}", "displayName": "{{initiator.displayName}}" }
 }
 ```
 
@@ -617,20 +597,44 @@ https://your-domain.com/webhook/jira
 ```json
 {
   "webhookEvent": "sprint_started",
-  "timestamp": {{now.asLong}},
+  "timestamp": {{now.epochMillis}},
   "sprint": {
     "id": {{sprint.id}},
     "name": "{{sprint.name}}",
     "state": "{{sprint.state}}",
     "goal": "{{sprint.goal}}"
   },
-  "user": {
-    "accountId": "{{initiator.accountId}}",
-    "displayName": "{{initiator.displayName}}"
-  }
+  "user": { "accountId": "{{initiator.accountId}}", "displayName": "{{initiator.displayName}}" }
 }
 ```
 
+### Testing with curl
+
+Before configuring Jira, test the endpoint directly:
+
+```bash
+curl -X POST https://your-ngrok-url.ngrok-free.dev/webhook/jira \
+  -H "Content-Type: application/json" \
+  -H "X-Hub-Signature: YOUR_SECRET_HERE" \
+  -d '{
+    "webhookEvent": "worklog_created",
+    "timestamp": 1735897519000,
+    "issue": {
+      "id": "10005",
+      "key": "SCRUM-5",
+      "fields": {
+        "summary": "Test issue",
+        "issuetype": { "name": "Task" },
+        "status": { "name": "To Do" },
+        "project": { "key": "SCRUM", "name": "Team Astro" }
+      }
+    },
+    "user": { "accountId": "test", "displayName": "Test User" }
+  }'
+```
+
+Replace `YOUR_SECRET_HERE` with your `JIRA_WEBHOOK_SECRET` value.
+
 ### Important Notes
 
 1. **The `X-Hub-Signature` header value must exactly match your `JIRA_WEBHOOK_SECRET` environment variable** (case-sensitive)

From a99154ad6d9f4b6b08befee9824d11afc917025f Mon Sep 17 00:00:00 2001
From: Gopal <gopal@Gopals-MacBook-Air.local>
Date: Sat, 3 Jan 2026 17:31:29 +0530
Subject: [PATCH 07/14] fix: Make Jira webhook timestamp field optional
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Jira Automation doesn't substitute smart values during validation
when no work item key is provided, resulting in empty timestamp values.
Made timestamp optional and default to current time when not provided.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 crates/aof-triggers/src/platforms/jira.rs | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/crates/aof-triggers/src/platforms/jira.rs b/crates/aof-triggers/src/platforms/jira.rs
index 320dd26..0607e34 100644
--- a/crates/aof-triggers/src/platforms/jira.rs
+++ b/crates/aof-triggers/src/platforms/jira.rs
@@ -347,8 +347,9 @@ pub struct JiraChangelog {
 #[derive(Debug, Clone, Deserialize)]
 #[serde(rename_all = "camelCase")]
 pub struct JiraWebhookPayload {
-    /// Webhook event timestamp
-    pub timestamp: i64,
+    /// Webhook event timestamp (optional - may not be provided by Jira Automation)
+    #[serde(default)]
+    pub timestamp: Option<i64>,
 
     /// Event type
     pub webhook_event: String,
@@ -861,8 +862,9 @@ impl JiraPlatform {
             metadata.insert("changelog".to_string(), serde_json::to_value(changelog).unwrap_or_default());
         }
 
-        // Message ID from issue and timestamp
-        let message_id = format!("jira-{}-{}-{}", issue.id, event_type, payload.timestamp);
+        // Message ID from issue and timestamp (use current time if not provided)
+        let ts = payload.timestamp.unwrap_or_else(|| chrono::Utc::now().timestamp_millis());
+        let message_id = format!("jira-{}-{}-{}", issue.id, event_type, ts);
 
         // Thread ID from issue key
         let thread_id = Some(issue.key.clone());
@@ -873,7 +875,7 @@ impl JiraPlatform {
             channel_id,
             user: trigger_user,
             text,
-            timestamp: chrono::DateTime::from_timestamp(payload.timestamp / 1000, 0).unwrap_or_else(chrono::Utc::now),
+            timestamp: chrono::DateTime::from_timestamp(ts / 1000, 0).unwrap_or_else(chrono::Utc::now),
             metadata,
             thread_id,
             reply_to: None,

From 34181e658e7d42989d37e2508a9036dc4448b544 Mon Sep 17 00:00:00 2001
From: Gopal <gopal@Gopals-MacBook-Air.local>
Date: Sat, 3 Jan 2026 18:36:27 +0530
Subject: [PATCH 08/14] fix: Make JiraProject.name field optional
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Jira Automation smart values may not populate all fields during
webhook validation. Made project name optional to handle minimal payloads.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 crates/aof-triggers/src/platforms/jira.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/aof-triggers/src/platforms/jira.rs b/crates/aof-triggers/src/platforms/jira.rs
index 0607e34..cdc9fd0 100644
--- a/crates/aof-triggers/src/platforms/jira.rs
+++ b/crates/aof-triggers/src/platforms/jira.rs
@@ -242,7 +242,8 @@ pub struct JiraProject {
     #[serde(default)]
     pub id: Option<String>,
     pub key: String,
-    pub name: String,
+    #[serde(default)]
+    pub name: Option<String>,
 }
 
 /// Jira status

From e0a8b4065b83f6e1135e3c131dc6e50f305c2f5b Mon Sep 17 00:00:00 2001
From: Gopal <gopal@Gopals-MacBook-Air.local>
Date: Sat, 3 Jan 2026 18:40:38 +0530
Subject: [PATCH 09/14] fix: Make JiraIssue.id field optional
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use issue key as fallback when id is not provided in payload.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 crates/aof-triggers/src/platforms/jira.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/crates/aof-triggers/src/platforms/jira.rs b/crates/aof-triggers/src/platforms/jira.rs
index cdc9fd0..c53ef6e 100644
--- a/crates/aof-triggers/src/platforms/jira.rs
+++ b/crates/aof-triggers/src/platforms/jira.rs
@@ -279,7 +279,8 @@ pub struct JiraPriority {
 /// Jira issue information
 #[derive(Debug, Clone, Deserialize)]
 pub struct JiraIssue {
-    pub id: String,
+    #[serde(default)]
+    pub id: Option<String>,
     pub key: String,
     #[serde(rename = "self", default)]
     pub self_url: Option<String>,
@@ -837,7 +838,9 @@ impl JiraPlatform {
         // Build metadata with full event details
         let mut metadata = HashMap::new();
         metadata.insert("event_type".to_string(), serde_json::json!(event_type));
-        metadata.insert("issue_id".to_string(), serde_json::json!(issue.id));
+        if let Some(ref id) = issue.id {
+            metadata.insert("issue_id".to_string(), serde_json::json!(id));
+        }
         metadata.insert("issue_key".to_string(), serde_json::json!(issue.key));
         metadata.insert("issue_type".to_string(), serde_json::json!(issue.fields.issuetype.name));
         metadata.insert("project_id".to_string(), serde_json::json!(issue.fields.project.id));
@@ -865,7 +868,8 @@ impl JiraPlatform {
 
         // Message ID from issue and timestamp (use current time if not provided)
         let ts = payload.timestamp.unwrap_or_else(|| chrono::Utc::now().timestamp_millis());
-        let message_id = format!("jira-{}-{}-{}", issue.id, event_type, ts);
+        let issue_id = issue.id.as_deref().unwrap_or(&issue.key);
+        let message_id = format!("jira-{}-{}-{}", issue_id, event_type, ts);
 
         // Thread ID from issue key
         let thread_id = Some(issue.key.clone());

From 7f93f33e0b1ac9e4561e3c31cf32c34a55567710 Mon Sep 17 00:00:00 2001
From: Gourav Shah <gs@initcron.org>
Date: Fri, 23 Jan 2026 10:59:45 +0530
Subject: [PATCH 10/14] wip: Add GitHub platform support and PR automation
 flows

- Add GitHub platform configuration to daemon serve
- Add event-based command matching for GitHub webhooks
- Add PR review agent, flows, and trigger configs
- Add GitHub automation documentation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 agents/github-pr-reviewer.yaml         |  39 ++
 config/aof/daemon.yaml                 |  23 +-
 config/github-automation.yaml          |  79 +++
 config/github-triggers.yaml            |  30 ++
 crates/aof-triggers/src/handler/mod.rs |  13 +
 crates/aofctl/src/commands/serve.rs    |  77 +++
 docs/guides/github-setup.md            | 218 +++++++++
 docs/platforms/github.md               | 644 +++++++++++++++++++++++++
 docs/tutorials/github-pr-automation.md | 592 +++++++++++++++++++++++
 examples/config/daemon.yaml            |   2 +-
 flows/github-pr-review.yaml            |  25 +
 flows/github/issue-triage-flow.yaml    | 132 +++++
 flows/github/pr-labeler-flow.yaml      |  73 +++
 flows/github/pr-review-flow.yaml       | 215 +++++++++
 flows/github/pr-review.yaml            |  25 +
 scripts/flow-pr-review.sh              |  84 ++++
 scripts/pr-review.sh                   | 118 +++++
 scripts/test-github-webhook.sh         | 146 ++++++
 18 files changed, 2531 insertions(+), 4 deletions(-)
 create mode 100644 agents/github-pr-reviewer.yaml
 create mode 100644 config/github-automation.yaml
 create mode 100644 config/github-triggers.yaml
 create mode 100644 docs/guides/github-setup.md
 create mode 100644 docs/platforms/github.md
 create mode 100644 docs/tutorials/github-pr-automation.md
 create mode 100644 flows/github-pr-review.yaml
 create mode 100644 flows/github/issue-triage-flow.yaml
 create mode 100644 flows/github/pr-labeler-flow.yaml
 create mode 100644 flows/github/pr-review-flow.yaml
 create mode 100644 flows/github/pr-review.yaml
 create mode 100755 scripts/flow-pr-review.sh
 create mode 100755 scripts/pr-review.sh
 create mode 100755 scripts/test-github-webhook.sh

diff --git a/agents/github-pr-reviewer.yaml b/agents/github-pr-reviewer.yaml
new file mode 100644
index 0000000..8a11332
--- /dev/null
+++ b/agents/github-pr-reviewer.yaml
@@ -0,0 +1,39 @@
+# GitHub PR Review Agent
+# Automatically reviews pull requests using AI
+
+apiVersion: aof.dev/v1
+kind: Agent
+metadata:
+  name: github-pr-reviewer
+  labels:
+    category: code-review
+    platform: github
+
+spec:
+  model: google:gemini-2.5-flash
+
+  system_prompt: |
+    You are a GitHub PR review bot. When you receive a PR event, you MUST:
+
+    1. Extract the repository and PR number from the message
+    2. Use the shell tool to run the review script:
+       ./scripts/pr-review.sh <owner/repo> <pr_number>
+
+    For example, if you receive:
+    "pr:opened:main:feature #42 Add new feature - owner/repo"
+
+    You should run:
+    ./scripts/pr-review.sh owner/repo 42
+
+    The script will:
+    - Fetch the PR diff from GitHub
+    - Generate an AI-powered code review
+    - Post the review as a comment on the PR
+
+    IMPORTANT: Always use the shell tool to run the script. Do not just acknowledge the PR.
+
+  tools:
+    - shell
+
+  max_iterations: 3
+  temperature: 0.1
diff --git a/config/aof/daemon.yaml b/config/aof/daemon.yaml
index 0001dc1..e263153 100644
--- a/config/aof/daemon.yaml
+++ b/config/aof/daemon.yaml
@@ -32,6 +32,19 @@ spec:
       bot_token_env: TELEGRAM_BOT_TOKEN
       webhook_secret: null
 
+    # GitHub - Enable with GITHUB_TOKEN and GITHUB_WEBHOOK_SECRET
+    github:
+      enabled: true
+      token_env: GITHUB_TOKEN
+      webhook_secret_env: GITHUB_WEBHOOK_SECRET
+      bot_name: "aofbot"
+      # Optional: Restrict to specific repositories
+      # allowed_repos:
+      #   - "yourorg/yourrepo"
+      # Optional: Restrict to specific organizations
+      # allowed_orgs:
+      #   - "yourorg"
+
     # Discord Bot (not yet implemented)
     # discord:
     #   enabled: false
@@ -45,12 +58,16 @@ spec:
     #   access_token_env: WHATSAPP_ACCESS_TOKEN
     #   verify_token_env: WHATSAPP_VERIFY_TOKEN
 
+  triggers:
+    directory: ./config
+    watch: true
+
   agents:
-    directory: /app/agents
+    directory: ./agents
     watch: true
 
   flows:
-    directory: /app/flows
+    directory: ./flows
     watch: true
     enabled: true
 
@@ -58,4 +75,4 @@ spec:
     max_concurrent_tasks: 10
     task_timeout_secs: 300
     max_tasks_per_user: 5
-    default_agent: devops
+    default_agent: devops-agent
diff --git a/config/github-automation.yaml b/config/github-automation.yaml
new file mode 100644
index 0000000..60766ba
--- /dev/null
+++ b/config/github-automation.yaml
@@ -0,0 +1,79 @@
+version: v1
+kind: TriggerConfig
+
+server:
+  host: "0.0.0.0"
+  port: 8080
+  base_path: "/webhooks"
+
+platforms:
+  github:
+    type: github
+
+    # Authentication (choose one)
+    # Option A: Personal Access Token
+    #token: "${GITHUB_TOKEN}"
+
+    # Option B: GitHub App (preferred)
+    # app_id: "${GITHUB_APP_ID}"
+    # private_key_path: "/etc/aof/github-app-private-key.pem"
+    # installation_id: "${GITHUB_INSTALLATION_ID}"
+
+    # Webhook verification
+    webhook_secret: "${GITHUB_WEBHOOK_SECRET}"
+
+    # Bot identity for comments
+    bot_name: "AOFBot"
+
+    # Repository filters (empty = all repos app has access to)
+    allowed_repos:
+      - "gouravshah/instavote-kustomize"
+      - "myorg/web"
+      - "myorg/infrastructure"
+
+    # Event filters
+    allowed_events:
+      - "pull_request"
+      - "push"
+      - "issues"
+      - "workflow_run"
+      - "check_run"
+      - "release"
+
+    # User filters for sensitive operations
+    allowed_users:
+      - "initcron"
+      - "gops123"
+      - "sre-team"
+
+# Event routing
+routing:
+  default_flow: "github-event-logger"
+
+  # Route by event type and action
+  events:
+    pull_request:
+      opened: "pr-review-flow"
+      synchronize: "pr-review-flow"
+      closed: "pr-cleanup-flow"
+
+    push:
+      # Route by branch
+      branches:
+        main: "production-deploy-flow"
+        develop: "staging-deploy-flow"
+        "release/*": "release-deploy-flow"
+
+    issues:
+      opened: "issue-triage-flow"
+      labeled: "issue-handler-flow"
+
+    workflow_run:
+      completed: "workflow-result-handler"
+
+    release:
+      published: "release-announce-flow"
+
+flows:
+  directory: "./flows/github"
+  watch: true
diff --git a/config/github-triggers.yaml b/config/github-triggers.yaml
new file mode 100644
index 0000000..c02283b
--- /dev/null
+++ b/config/github-triggers.yaml
@@ -0,0 +1,30 @@
+apiVersion: aof.dev/v1
+kind: Trigger
+metadata:
+  name: github-pr-automation
+  labels:
+    platform: github
+
+spec:
+  type: GitHub
+
+  config:
+    webhook_secret: ${GITHUB_WEBHOOK_SECRET}
+    github_events:
+      - pull_request
+
+  # Map PR events to the review flow (not agent!)
+  # AgentFlows execute steps deterministically
+  # Agents just respond to messages (LLM decides what to do)
+  commands:
+    pull_request.opened:
+      flow: github-pr-review
+      description: "AI code review for new PRs"
+
+    pull_request.synchronize:
+      flow: github-pr-review
+      description: "AI code review for updated PRs"
+
+    pull_request.reopened:
+      flow: github-pr-review
+      description: "AI code review for reopened PRs"
diff --git a/crates/aof-triggers/src/handler/mod.rs b/crates/aof-triggers/src/handler/mod.rs
index a7c6885..a0b23bc 100644
--- a/crates/aof-triggers/src/handler/mod.rs
+++ b/crates/aof-triggers/src/handler/mod.rs
@@ -1908,6 +1908,19 @@ impl TriggerHandler {
                     return (Some(cmd_name), Some(message.text.clone()));
                 }
             }
+
+            // Check GitHub/GitLab style: event_type + action = command
+            // e.g., event_type="pull_request", action="opened" -> "pull_request.opened"
+            if let Some(action) = message.metadata.get("action").and_then(|v| v.as_str()) {
+                let cmd_name = format!("{}.{}", event_type, action);
+                info!("Constructed GitHub command from event: {}", cmd_name);
+
+                // Check if we have a binding for this command
+                if self.config.command_bindings.contains_key(&cmd_name) {
+                    info!("Found command binding for GitHub event: {}", cmd_name);
+                    return (Some(cmd_name), Some(message.text.clone()));
+                }
+            }
         }
 
         // Check Telegram/WhatsApp/CLI style: message starts with /command
diff --git a/crates/aofctl/src/commands/serve.rs b/crates/aofctl/src/commands/serve.rs
index 71b1c61..b95db60 100644
--- a/crates/aofctl/src/commands/serve.rs
+++ b/crates/aofctl/src/commands/serve.rs
@@ -135,6 +135,9 @@ pub struct PlatformConfigs {
 
     /// WhatsApp configuration
     pub whatsapp: Option<WhatsAppPlatformConfig>,
+
+    /// GitHub configuration
+    pub github: Option<GitHubPlatformConfig>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -203,6 +206,33 @@ pub struct WhatsAppPlatformConfig {
     pub app_secret: Option<String>,
 }
 
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GitHubPlatformConfig {
+    /// Enable this platform
+    #[serde(default = "default_true")]
+    pub enabled: bool,
+
+    /// GitHub token (or env var name with _env suffix)
+    pub token: Option<String>,
+    pub token_env: Option<String>,
+
+    /// Webhook secret (or env var name)
+    pub webhook_secret: Option<String>,
+    pub webhook_secret_env: Option<String>,
+
+    /// Bot/App name for identification
+    pub bot_name: Option<String>,
+
+    /// Allowed repository filter (optional whitelist)
+    /// Format: ["owner/repo", "owner/*", "*"]
+    #[serde(default)]
+    pub allowed_repos: Option<Vec<String>>,
+
+    /// Allowed GitHub organizations (optional whitelist)
+    #[serde(default)]
+    pub allowed_orgs: Option<Vec<String>>,
+}
+
 #[derive(Debug, Clone, Default, Serialize, Deserialize)]
 pub struct AgentDiscoveryConfig {
     /// Directory containing agent YAML files
@@ -513,6 +543,53 @@ pub async fn execute(
         }
     }
 
+    // GitHub
+    if let Some(github_config) = &config.spec.platforms.github {
+        if github_config.enabled {
+            let token = resolve_env_value(
+                github_config.token.as_deref(),
+                github_config.token_env.as_deref(),
+            );
+            let webhook_secret = resolve_env_value(
+                github_config.webhook_secret.as_deref(),
+                github_config.webhook_secret_env.as_deref(),
+            );
+
+            if let Some(secret) = webhook_secret {
+                let platform_config = GitHubConfig {
+                    token: token.unwrap_or_default(), // Token is optional, webhook_secret is required
+                    webhook_secret: secret,
+                    bot_name: github_config.bot_name.clone().unwrap_or_else(|| "aofbot".to_string()),
+                    api_url: "https://api.github.com".to_string(),
+                    allowed_repos: github_config.allowed_repos.clone(),
+                    allowed_events: None,
+                    allowed_users: None,
+                    auto_approve_patterns: None,
+                    enable_status_checks: true,
+                    enable_reviews: true,
+                    enable_comments: true,
+                };
+
+                if platform_config.token.is_empty() {
+                    eprintln!("  GitHub: GITHUB_TOKEN not set, API features (posting comments) disabled");
+                }
+
+                match GitHubPlatform::new(platform_config) {
+                    Ok(platform) => {
+                        handler.register_platform(Arc::new(platform));
+                        println!("  Registered platform: github");
+                        platforms_registered += 1;
+                    }
+                    Err(e) => {
+                        eprintln!("  Failed to create GitHub platform: {}", e);
+                    }
+                }
+            } else {
+                eprintln!("  GitHub enabled but missing webhook_secret");
+            }
+        }
+    }
+
     // Load Triggers from directory
     let triggers_dir_path = triggers_dir
         .map(PathBuf::from)
diff --git a/docs/guides/github-setup.md b/docs/guides/github-setup.md
new file mode 100644
index 0000000..2c7002f
--- /dev/null
+++ b/docs/guides/github-setup.md
@@ -0,0 +1,218 @@
+# GitHub PR Review Setup - Quick Start
+
+✅ **Configuration complete!** Your daemon config now includes GitHub integration.
+
+## 1. Set Environment Variables
+
+Add these to your `~/.zshrc` (or `~/.bashrc`):
+
+```bash
+# Required for GitHub
+export GITHUB_TOKEN=ghp_your_github_token
+export GITHUB_WEBHOOK_SECRET=$(openssl rand -hex 32)
+
+# Required for LLM (using Gemini)
+export GOOGLE_API_KEY=your_google_api_key
+
+# Keep your existing Slack/Telegram (if using)
+export SLACK_BOT_TOKEN=xoxb-...
+export SLACK_SIGNING_SECRET=...
+export TELEGRAM_BOT_TOKEN=...
+```
+
+### Get GitHub Token
+1. Visit: https://github.com/settings/tokens
+2. **Generate new token (classic)**
+3. Select scope: `repo` (Full control)
+4. Copy the token (starts with `ghp_`)
+
+**IMPORTANT**: After setting environment variables, source your config:
+
+```bash
+source ~/.zshrc  # or source ~/.bashrc
+```
+
+## 2. Test the Setup
+
+Before starting the server, verify everything works:
+
+```bash
+# Source environment variables
+source ~/.zshrc
+
+# Run the test script
+./scripts/test-github-webhook.sh
+```
+
+You should see:
+```
+✓ Server is running
+✓ GITHUB_WEBHOOK_SECRET is set
+✓ GITHUB_TOKEN is set
+✓ Ping event successful (HTTP 200)
+✓ Pull request event successful (HTTP 200)
+All tests passed! ✓
+```
+
+## 3. Start the Server
+
+```bash
+# Make sure to source environment variables first!
+source ~/.zshrc
+
+# From /Users/gshah/work/opsflow-sh/aof
+./target/release/aofctl serve --config config/aof/daemon.yaml
+```
+
+**You should see:**
+```
+Starting AOF Trigger Server
+  Bind address: 0.0.0.0:8080
+  Registered platform: slack
+  Registered platform: telegram
+  Registered platform: github          ← This confirms it works!
+Server starting...
+```
+
+## 3. Test the Setup
+
+```bash
+# Check if server is running
+curl http://localhost:8080/health
+# Should return: {"status":"ok"}
+
+# Check platforms registered
+curl http://localhost:8080/platforms
+# Should include "github"
+```
+
+## 4. Configure GitHub Webhook (Local Testing)
+
+### Start ngrok
+```bash
+ngrok http 8080
+# Copy the HTTPS URL (e.g., https://abc123.ngrok.io)
+```
+
+### Add Webhook to GitHub
+1. Go to your repository on GitHub
+2. **Settings** → **Webhooks** → **Add webhook**
+3. Configure:
+   - **Payload URL**: `https://abc123.ngrok.io/webhook/github`
+     - ⚠️ Important: `/webhook/github` (singular, not `/webhooks/`)
+   - **Content type**: `application/json`
+   - **Secret**: Your `GITHUB_WEBHOOK_SECRET` value
+   - **Events**: Select "Pull requests"
+   - **Active**: ✅ Checked
+4. Click **Add webhook**
+
+## 5. Test It!
+
+### Create a Test PR
+```bash
+git checkout -b test-pr-review
+echo "// Test" >> README.md
+git add README.md
+git commit -m "test: trigger PR review"
+git push origin test-pr-review
+```
+
+Then open a PR on GitHub and watch:
+- ✅ Your terminal logs (AOF server)
+- ✅ ngrok dashboard (webhook received)
+- ✅ GitHub PR comments (automated review)
+
+## What's Configured
+
+### Files Updated
+```
+config/aof/
+  └── daemon.yaml          ← GitHub platform added, paths fixed
+
+agents/
+  ├── github-pr-reviewer.yaml  ← NEW PR review agent
+  ├── devops-agent.yaml        (existing)
+  ├── k8s-ops.yaml            (existing)
+  └── sre-agent.yaml          (existing)
+
+flows/
+  └── (directory created for future use)
+```
+
+### Platforms Enabled
+- ✅ Slack
+- ✅ Telegram
+- ✅ **GitHub** (NEW!)
+
+### Changes Made
+1. **Added GitHub platform** to `config/aof/daemon.yaml`
+2. **Fixed directory paths** from `/app/agents` → `./agents`
+3. **Created PR review agent** at `agents/github-pr-reviewer.yaml`
+4. **Created flows directory** for future AgentFlow support
+
+## Troubleshooting
+
+### "GitHub platform not registered" or "GitHub enabled but missing webhook_secret"
+**Solution:**
+```bash
+# Check if variables are set
+echo $GITHUB_TOKEN
+echo $GITHUB_WEBHOOK_SECRET
+
+# If empty, source your config
+source ~/.zshrc  # or source ~/.bashrc
+
+# Verify they're set now
+echo $GITHUB_WEBHOOK_SECRET
+
+# Restart the server
+./target/release/aofctl serve --config config/aof/daemon.yaml
+```
+
+### "Failed to load agents"
+The paths are now relative (`./agents`), so make sure you run from the project root:
+```bash
+pwd
+# Should be: /Users/gshah/work/opsflow-sh/aof
+```
+
+### "502 Bad Gateway" from ngrok
+GitHub webhook URL must be `/webhook/github` (singular), not `/webhooks/github`.
+
+### Agent not triggering
+1. Check GitHub webhook deliveries (Settings → Webhooks → Recent Deliveries)
+2. Look for green checkmark and HTTP 200 response
+3. Check AOF server logs for "GitHub pull_request event"
+
+## Next Steps
+
+1. ✅ Environment variables set
+2. ✅ Server running with GitHub registered
+3. ✅ ngrok tunnel active
+4. ✅ GitHub webhook configured
+5. 🎯 Create a test PR and see the magic!
+
+## Quick Reference
+
+### Webhook Endpoint
+```
+http://localhost:8080/webhook/github  (local)
+https://your-ngrok.ngrok.io/webhook/github  (ngrok)
+```
+
+### Server Command
+```bash
+./target/release/aofctl serve --config config/aof/daemon.yaml
+```
+
+### Test Webhook Locally
+```bash
+curl -X POST http://localhost:8080/webhook/github \
+  -H "Content-Type: application/json" \
+  -H "X-GitHub-Event: ping" \
+  -d '{"zen":"test"}'
+```
+
+---
+
+**You're ready to go!** 🚀
diff --git a/docs/platforms/github.md b/docs/platforms/github.md
new file mode 100644
index 0000000..476296a
--- /dev/null
+++ b/docs/platforms/github.md
@@ -0,0 +1,644 @@
+# GitHub Platform Integration
+
+## Overview
+
+The GitHub platform adapter provides webhook-based integration with GitHub.com and GitHub Enterprise, enabling AOF agents to respond to repository events, manage pull requests, post reviews, and interact with the GitHub API.
+
+## Features
+
+### Supported Events
+
+**Pull Request Events:**
+- `pull_request.opened` - New pull request created
+- `pull_request.synchronize` - PR updated with new commits
+- `pull_request.closed` - PR closed or merged
+- `pull_request.reopened` - PR reopened
+- `pull_request.edited` - PR title/description edited
+- `pull_request.assigned` - PR assigned to someone
+- `pull_request.review_requested` - Review requested
+
+**Pull Request Review Events:**
+- `pull_request_review.submitted` - Review submitted
+- `pull_request_review.edited` - Review edited
+- `pull_request_review.dismissed` - Review dismissed
+
+**Issue Events:**
+- `issues.opened` - New issue created
+- `issues.closed` - Issue closed
+- `issues.reopened` - Issue reopened
+- `issues.edited` - Issue edited
+- `issues.assigned` - Issue assigned
+- `issues.labeled` - Label added to issue
+
+**Comment Events:**
+- `issue_comment.created` - Comment on issue or PR
+- `issue_comment.edited` - Comment edited
+- `issue_comment.deleted` - Comment deleted
+- `pull_request_review_comment.created` - Inline PR comment
+
+**Repository Events:**
+- `push` - Code pushed to repository
+- `create` - Branch or tag created
+- `delete` - Branch or tag deleted
+- `fork` - Repository forked
+- `release.published` - Release published
+- `star.created` - Repository starred
+- `watch.started` - Repository watched
+
+**CI/CD Events:**
+- `workflow_run.completed` - GitHub Actions workflow completed
+- `workflow_run.requested` - Workflow triggered
+- `workflow_job.completed` - Workflow job completed
+- `check_run.created` - Check run created
+- `check_run.completed` - Check run completed
+- `check_suite.completed` - Check suite completed
+
+### API Methods
+
+The GitHub platform provides the following API methods:
+
+```rust
+// Post a comment on an issue or PR
+pub async fn post_comment(
+    &self,
+    owner: &str,
+    repo: &str,
+    issue_number: i64,
+    body: &str,
+) -> Result<i64, PlatformError>
+
+// Post a PR review
+pub async fn post_review(
+    &self,
+    owner: &str,
+    repo: &str,
+    pr_number: i64,
+    body: &str,
+    event: &str, // APPROVE, REQUEST_CHANGES, COMMENT
+) -> Result<i64, PlatformError>
+
+// Create or update a check run
+pub async fn create_check_run(
+    &self,
+    owner: &str,
+    repo: &str,
+    head_sha: &str,
+    name: &str,
+    status: &str, // queued, in_progress, completed
+    conclusion: Option<&str>, // success, failure, neutral, etc.
+    output: Option<CheckRunOutput>,
+) -> Result<i64, PlatformError>
+
+// Add labels to an issue or PR
+pub async fn add_labels(
+    &self,
+    owner: &str,
+    repo: &str,
+    issue_number: i64,
+    labels: &[String],
+) -> Result<(), PlatformError>
+
+// Remove a label
+pub async fn remove_label(
+    &self,
+    owner: &str,
+    repo: &str,
+    issue_number: i64,
+    label: &str,
+) -> Result<(), PlatformError>
+```
+
+## Configuration
+
+### Basic Setup
+
+Add GitHub to your daemon configuration (`config/aof/daemon.yaml`):
+
+```yaml
+apiVersion: aof.dev/v1
+kind: DaemonConfig
+metadata:
+  name: production
+
+spec:
+  server:
+    port: 8080
+    host: 0.0.0.0
+
+  platforms:
+    github:
+      enabled: true
+      token_env: GITHUB_TOKEN
+      webhook_secret_env: GITHUB_WEBHOOK_SECRET
+      bot_name: "aofbot"
+```
+
+### Environment Variables
+
+```bash
+# Required: Personal Access Token or GitHub App token
+export GITHUB_TOKEN="ghp_your_token_here"
+
+# Required: Webhook secret for signature verification
+export GITHUB_WEBHOOK_SECRET="your_webhook_secret_here"
+
+# Source your shell config if variables are in ~/.zshrc or ~/.bashrc
+source ~/.zshrc  # or source ~/.bashrc
+```
+
+#### Creating a GitHub Token
+
+1. Go to GitHub Settings → Developer settings → Personal access tokens → Tokens (classic)
+2. Click "Generate new token (classic)"
+3. Select scopes:
+   - `repo` (Full control of private repositories)
+   - `write:discussion` (Read and write discussions)
+4. Generate and copy the token
+
+#### Generating Webhook Secret
+
+```bash
+openssl rand -hex 32
+```
+
+Save this for both the environment variable and GitHub webhook configuration.
+
+### Repository Filtering
+
+Restrict which repositories can trigger agents:
+
+```yaml
+platforms:
+  github:
+    enabled: true
+    token_env: GITHUB_TOKEN
+    webhook_secret_env: GITHUB_WEBHOOK_SECRET
+    allowed_repos:
+      - "myorg/important-repo"    # Specific repo
+      - "myorg/another-repo"
+      - "myorg/*"                 # All repos in organization
+```
+
+### Organization Filtering
+
+```yaml
+platforms:
+  github:
+    enabled: true
+    token_env: GITHUB_TOKEN
+    webhook_secret_env: GITHUB_WEBHOOK_SECRET
+    allowed_orgs:
+      - "myorg"
+      - "anotherorg"
+```
+
+## Setting Up Webhooks
+
+### 1. Configure Webhook in GitHub
+
+1. Go to repository Settings → Webhooks → Add webhook
+2. Configure:
+   - **Payload URL**: `https://your-server.com/webhook/github`
+   - **Content type**: `application/json`
+   - **Secret**: Your `GITHUB_WEBHOOK_SECRET` value
+   - **SSL verification**: Enable (required)
+   - **Events**: Select events you want:
+     - Pull requests
+     - Pull request reviews
+     - Pull request review comments
+     - Issue comments
+     - Push events
+     - Workflow runs
+3. Click "Add webhook"
+
+### 2. Local Development with ngrok
+
+```bash
+# Start ngrok tunnel
+ngrok http 8080
+
+# Copy the HTTPS URL (e.g., https://abc123.ngrok.io)
+# Use https://abc123.ngrok.io/webhook/github as Payload URL
+```
+
+### 3. Verify Webhook
+
+After adding the webhook, GitHub will send a `ping` event. Check:
+- GitHub webhook page shows green checkmark
+- Recent Deliveries shows HTTP 200 response
+- AOF logs show webhook received
+
+## Agent Configuration
+
+### PR Review Agent Example
+
+Create `agents/github-pr-reviewer.yaml`:
+
+```yaml
+apiVersion: aof.dev/v1
+kind: Agent
+metadata:
+  name: github-pr-reviewer
+  labels:
+    category: code-review
+    platform: github
+
+spec:
+  model: google:gemini-2.5-flash
+
+  system_prompt: |
+    You are an expert code reviewer performing thorough pull request reviews.
+
+    ## Review Focus Areas
+
+    ### 1. Code Quality
+    - Readability and maintainability
+    - Proper error handling
+    - Code organization and structure
+
+    ### 2. Security
+    - SQL injection vulnerabilities
+    - XSS risks
+    - Authentication/authorization issues
+    - Secrets or API keys in code
+
+    ### 3. Performance
+    - Inefficient algorithms
+    - Memory leaks
+    - N+1 query problems
+
+    ### 4. Best Practices
+    - Design patterns
+    - Language-specific idioms
+    - Testing coverage
+
+    ## Output Format
+
+    Provide your review in markdown:
+
+    ## 🔍 Code Review Summary
+
+    **Overall Assessment**: [Approve ✅ / Request Changes ⚠️ / Comment 💬]
+
+    ### ✨ Strengths
+    - [Positive aspects]
+
+    ### ⚠️ Issues Found
+
+    #### Critical (Must Fix)
+    - **[File:Line]** - [Issue description]
+
+    #### Suggestions
+    - **[File:Line]** - [Suggestion]
+
+  tools:
+    - shell
+
+  max_iterations: 8
+  temperature: 0.3
+```
+
+### Issue Triage Agent Example
+
+Create `agents/github-issue-triager.yaml`:
+
+```yaml
+apiVersion: aof.dev/v1
+kind: Agent
+metadata:
+  name: github-issue-triager
+  labels:
+    category: triage
+    platform: github
+
+spec:
+  model: google:gemini-2.5-flash
+
+  system_prompt: |
+    You are a GitHub issue triage assistant.
+
+    For each new issue:
+    1. Analyze the issue description
+    2. Determine the type: bug, feature, documentation, question
+    3. Assess severity: critical, high, medium, low
+    4. Assign appropriate labels
+    5. Suggest which team/person should handle it
+
+  tools:
+    - shell
+
+  max_iterations: 5
+  temperature: 0.3
+```
+
+## AgentFlow Integration
+
+### PR Review Flow Example
+
+Create `flows/github/pr-review.yaml`:
+
+```yaml
+apiVersion: aof.dev/v1
+kind: AgentFlow
+metadata:
+  name: pr-review-flow
+  labels:
+    platform: github
+    event: pull_request
+
+spec:
+  trigger:
+    type: GitHub
+    config:
+      events:
+        - pull_request.opened
+        - pull_request.synchronize
+      filters:
+        - field: pull_request.draft
+          operator: equals
+          value: false
+
+  nodes:
+    - id: fetch-pr-diff
+      type: Action
+      action:
+        type: shell
+        command: |
+          gh pr diff ${{ event.pull_request.number }} \
+            --repo ${{ event.repository.full_name }}
+
+    - id: review
+      type: Agent
+      agent: github-pr-reviewer
+      input: |
+        Review this pull request:
+
+        **Title**: ${{ event.pull_request.title }}
+        **Description**: ${{ event.pull_request.body }}
+        **Changes**:
+        ${{ nodes.fetch-pr-diff.output }}
+
+    - id: post-review
+      type: Action
+      action:
+        type: github_review
+        owner: ${{ event.repository.owner.login }}
+        repo: ${{ event.repository.name }}
+        pr_number: ${{ event.pull_request.number }}
+        body: ${{ nodes.review.output }}
+        event: COMMENT
+```
+
+## Command Detection
+
+GitHub supports command detection in PR/issue comments using slash commands:
+
+```
+/review - Trigger PR review
+/deploy staging - Deploy to staging
+/run-tests - Run test suite
+```
+
+The platform automatically detects commands that start with `/` in comments.
+
+## Security Features
+
+### Signature Verification
+
+All webhooks are verified using HMAC-SHA256 signature:
+
+```rust
+// Automatic signature verification
+fn verify_github_signature(&self, payload: &[u8], signature: &str) -> bool {
+    // Verifies X-Hub-Signature-256 header
+}
+```
+
+### Repository Filtering
+
+Only allowed repositories can trigger agents:
+
+```yaml
+github:
+  allowed_repos:
+    - "trusted-org/*"
+```
+
+### Event Filtering
+
+Control which events are processed:
+
+```yaml
+github:
+  allowed_events:
+    - "pull_request"
+    - "issues"
+```
+
+## Advanced Features
+
+### Status Checks
+
+Create GitHub status checks for CI/CD:
+
+```rust
+platform.create_check_run(
+    "owner",
+    "repo",
+    "commit-sha",
+    "AOF Analysis",
+    "completed",
+    Some("success"),
+    Some(CheckRunOutput {
+        title: "Analysis Complete".to_string(),
+        summary: "All checks passed".to_string(),
+        text: Some("Detailed results...".to_string()),
+    }),
+).await?;
+```
+
+### PR Reviews
+
+Post structured PR reviews:
+
+```rust
+platform.post_review(
+    "owner",
+    "repo",
+    42,
+    "LGTM! Great work on this PR.",
+    "APPROVE", // or REQUEST_CHANGES, COMMENT
+).await?;
+```
+
+### Label Management
+
+Automatically manage labels:
+
+```rust
+// Add labels
+platform.add_labels("owner", "repo", 42, &[
+    "bug".to_string(),
+    "high-priority".to_string(),
+]).await?;
+
+// Remove labels
+platform.remove_label("owner", "repo", 42, "needs-triage").await?;
+```
+
+## Troubleshooting
+
+### "GitHub enabled but missing webhook_secret"
+
+**Solution**: Set the environment variable and source your shell config:
+
+```bash
+export GITHUB_WEBHOOK_SECRET="your_secret"
+source ~/.zshrc  # or ~/.bashrc
+./target/release/aofctl serve --config config/aof/daemon.yaml
+```
+
+### "Invalid signature" in webhook deliveries
+
+**Causes**:
+1. Webhook secret mismatch
+2. Using HTTP instead of HTTPS
+3. Payload modified by proxy
+
+**Solution**:
+1. Verify secrets match exactly
+2. Use HTTPS URL (ngrok provides this)
+3. Check proxy configuration
+
+### "GitHub: GITHUB_TOKEN not set, API features disabled"
+
+This is a warning, not an error. Webhooks still work, but:
+- Cannot post comments
+- Cannot post reviews
+- Cannot update status checks
+
+**Solution**: Set `GITHUB_TOKEN` environment variable.
+
+### Agent not responding to events
+
+**Check**:
+1. Webhook configured for correct events
+2. Agent file exists and loads successfully
+3. Event passes repository/organization filters
+4. Check AOF logs for errors
+
+## Production Deployment
+
+### GitHub App (Recommended)
+
+For production, use a GitHub App instead of PAT:
+
+**Benefits:**
+- Better security
+- Granular permissions
+- Higher rate limits
+- Per-installation tokens
+
+### Rate Limiting
+
+GitHub API has rate limits:
+- PAT: 5,000 requests/hour
+- GitHub App: 15,000 requests/hour per installation
+
+Monitor rate limits in logs and implement backoff strategies.
+
+### High Availability
+
+For production:
+1. Run multiple AOF instances behind load balancer
+2. Use shared state (Redis, PostgreSQL)
+3. Configure webhook redelivery
+4. Set up monitoring and alerting
+
+## Examples
+
+### Complete PR Review Workflow
+
+See the quickstart guide: [GITHUB_SETUP.md](../../GITHUB_SETUP.md)
+
+### Issue Auto-Labeling
+
+```yaml
+apiVersion: aof.dev/v1
+kind: AgentFlow
+metadata:
+  name: issue-labeler
+
+spec:
+  trigger:
+    type: GitHub
+    config:
+      events:
+        - issues.opened
+
+  nodes:
+    - id: analyze
+      type: Agent
+      agent: github-issue-triager
+      input: |
+        Analyze this issue and suggest labels:
+
+        **Title**: ${{ event.issue.title }}
+        **Body**: ${{ event.issue.body }}
+
+    - id: apply-labels
+      type: Action
+      action:
+        type: github_labels
+        owner: ${{ event.repository.owner.login }}
+        repo: ${{ event.repository.name }}
+        issue_number: ${{ event.issue.number }}
+        labels: ${{ nodes.analyze.output.labels }}
+```
+
+### Automated Dependency Updates
+
+```yaml
+apiVersion: aof.dev/v1
+kind: AgentFlow
+metadata:
+  name: dependency-checker
+
+spec:
+  trigger:
+    type: GitHub
+    config:
+      events:
+        - pull_request.opened
+      filters:
+        - field: pull_request.user.login
+          operator: equals
+          value: dependabot[bot]
+
+  nodes:
+    - id: review-deps
+      type: Agent
+      agent: dependency-reviewer
+      input: |
+        Review this dependency update PR:
+        ${{ event.pull_request.title }}
+
+    - id: auto-approve
+      type: Condition
+      condition: ${{ nodes.review-deps.output.safe == true }}
+      then:
+        - type: github_review
+          event: APPROVE
+```
+
+## API Reference
+
+Complete API documentation: [GitHub API Docs](https://docs.github.com/en/rest)
+
+AOF Platform Methods: See `crates/aof-triggers/src/platforms/github.rs`
+
+## Support
+
+- GitHub webhook documentation: https://docs.github.com/webhooks
+- AOF documentation: https://docs.aof.sh
+- Issues: https://github.com/agenticdevops/aof/issues
diff --git a/docs/tutorials/github-pr-automation.md b/docs/tutorials/github-pr-automation.md
new file mode 100644
index 0000000..dd5d3e5
--- /dev/null
+++ b/docs/tutorials/github-pr-automation.md
@@ -0,0 +1,592 @@
+# Tutorial: Automated GitHub PR Review with AOF
+
+This tutorial will guide you through setting up automated pull request reviews using AOF (Agentic Ops Framework).
+
+## What You'll Build
+
+By the end of this tutorial, you'll have:
+- ✅ AOF daemon receiving GitHub webhook events
+- ✅ Automated PR reviews with AI-powered code analysis
+- ✅ Security and quality checks on every PR
+- ✅ Automatic comments posted back to GitHub
+
+## Prerequisites
+
+- GitHub account with admin access to a repository
+- AOF installed (`cargo build --release`)
+- Basic understanding of webhooks
+- For local development: [ngrok](https://ngrok.com/) for webhook tunneling
+
+## Time Required
+
+~15 minutes
+
+## Step 1: Set Up Environment Variables
+
+First, create a GitHub Personal Access Token:
+
+1. Visit https://github.com/settings/tokens
+2. Click "Generate new token (classic)"
+3. Give it a name: "AOF Bot"
+4. Select scopes:
+   - `repo` (Full control of private repositories)
+   - `write:discussion` (Read and write discussions)
+5. Generate and copy the token
+
+Generate a webhook secret:
+
+```bash
+openssl rand -hex 32
+```
+
+Add these to your `~/.zshrc` (or `~/.bashrc`):
+
+```bash
+# GitHub Integration
+export GITHUB_TOKEN="ghp_your_token_here"
+export GITHUB_WEBHOOK_SECRET="your_webhook_secret_here"
+
+# LLM Provider (using Google Gemini)
+export GOOGLE_API_KEY="your_google_api_key"
+
+# Optional: Existing platform tokens
+export SLACK_BOT_TOKEN="xoxb-..."
+export SLACK_SIGNING_SECRET="..."
+export TELEGRAM_BOT_TOKEN="..."
+```
+
+**Important**: Source your config before starting AOF:
+
+```bash
+source ~/.zshrc  # or source ~/.bashrc
+```
+
+## Step 2: Configure AOF Daemon
+
+Your `config/aof/daemon.yaml` should already have GitHub enabled:
+
+```yaml
+apiVersion: aof.dev/v1
+kind: DaemonConfig
+metadata:
+  name: production
+
+spec:
+  server:
+    port: 8080
+    host: 0.0.0.0
+
+  platforms:
+    github:
+      enabled: true
+      token_env: GITHUB_TOKEN
+      webhook_secret_env: GITHUB_WEBHOOK_SECRET
+      bot_name: "aofbot"
+
+  agents:
+    directory: ./agents
+    watch: true
+
+  flows:
+    directory: ./flows
+    watch: true
+    enabled: true
+
+  runtime:
+    max_concurrent_tasks: 10
+    task_timeout_secs: 300
+    max_tasks_per_user: 5
+    default_agent: devops
+```
+
+## Step 3: Create PR Review Agent
+
+Create `agents/github-pr-reviewer.yaml`:
+
+```yaml
+apiVersion: aof.dev/v1
+kind: Agent
+metadata:
+  name: github-pr-reviewer
+  labels:
+    category: code-review
+    platform: github
+
+spec:
+  # Using Google Gemini 2.5 Flash for fast, cost-effective reviews
+  model: google:gemini-2.5-flash
+
+  system_prompt: |
+    You are an expert code reviewer performing thorough pull request reviews.
+
+    ## Review Focus Areas
+
+    ### 1. Code Quality
+    - Readability and maintainability
+    - Proper error handling
+    - Code organization and structure
+    - DRY principle adherence
+
+    ### 2. Security
+    - SQL injection vulnerabilities
+    - XSS risks
+    - Authentication/authorization issues
+    - Secrets or API keys in code
+    - Insecure dependencies
+    - Input validation issues
+
+    ### 3. Performance
+    - Inefficient algorithms
+    - Memory leaks
+    - N+1 query problems
+    - Unnecessary API calls
+
+    ### 4. Best Practices
+    - Design patterns
+    - Language-specific idioms
+    - Testing coverage
+    - Documentation quality
+
+    ## Output Format
+
+    Provide your review in this markdown format:
+
+    ```markdown
+    ## 🔍 Code Review Summary
+
+    **Overall Assessment**: [Approve ✅ / Request Changes ⚠️ / Comment 💬]
+
+    ### ✨ Strengths
+    - [List positive aspects of the PR]
+    - [Good patterns or approaches used]
+
+    ### ⚠️ Issues Found
+
+    #### Critical (Must Fix)
+    - **[File:Line]** - [Issue description]
+
+    #### High Priority
+    - **[File:Line]** - [Issue description]
+
+    #### Medium Priority
+    - **[File:Line]** - [Suggestion]
+
+    ### 💡 Suggestions for Improvement
+    - [Actionable suggestions]
+    - [Alternative approaches]
+
+    ### 🔒 Security Analysis
+    [Security concerns or "No security issues detected ✅"]
+
+    ### 📝 Additional Notes
+    [Any other relevant comments]
+
+    ---
+    *Automated review by AOF GitHub Bot*
+    ```
+
+    ## Guidelines
+    - Be constructive and helpful, not just critical
+    - Be specific with file names and line numbers
+    - Provide actionable suggestions for fixes
+    - Acknowledge both good and bad aspects
+    - Prioritize issues by severity
+    - If unsure, express uncertainty rather than being wrong
+
+    If the PR looks good overall, say so! Don't manufacture issues.
+
+  tools:
+    - shell
+
+  max_iterations: 8
+  temperature: 0.3
+```
+
+## Step 4: Create PR Review Flow (Optional)
+
+For more control, create `flows/github/pr-review.yaml`:
+
+```yaml
+apiVersion: aof.dev/v1
+kind: AgentFlow
+metadata:
+  name: pr-review-flow
+  labels:
+    platform: github
+    event: pull_request
+
+spec:
+  trigger:
+    type: GitHub
+    config:
+      events:
+        - pull_request.opened
+        - pull_request.synchronize
+      filters:
+        # Don't review draft PRs
+        - field: pull_request.draft
+          operator: equals
+          value: false
+
+  nodes:
+    # Step 1: Fetch PR diff using GitHub CLI
+    - id: fetch-pr-diff
+      type: Action
+      action:
+        type: shell
+        command: |
+          # Install gh CLI if not present
+          if ! command -v gh &> /dev/null; then
+              echo "Installing gh CLI..."
+              brew install gh  # or appropriate package manager
+          fi
+
+          # Authenticate with token
+          echo "$GITHUB_TOKEN" | gh auth login --with-token
+
+          # Fetch PR diff
+          gh pr diff ${{ event.pull_request.number }} \
+            --repo ${{ event.repository.full_name }} \
+            > /tmp/pr-diff.txt
+
+          # Also get file list
+          gh pr diff ${{ event.pull_request.number }} \
+            --repo ${{ event.repository.full_name }} \
+            --name-only \
+            > /tmp/pr-files.txt
+
+          cat /tmp/pr-diff.txt
+
+    # Step 2: AI Review
+    - id: review
+      type: Agent
+      agent: github-pr-reviewer
+      input: |
+        Review this pull request:
+
+        **Repository**: ${{ event.repository.full_name }}
+        **PR Number**: #${{ event.pull_request.number }}
+        **Title**: ${{ event.pull_request.title }}
+        **Author**: @${{ event.pull_request.user.login }}
+
+        **Description**:
+        ${{ event.pull_request.body }}
+
+        **Files Changed** (${{ event.pull_request.changed_files }} files):
+        - ${{ event.pull_request.additions }} additions
+        - ${{ event.pull_request.deletions }} deletions
+
+        **Code Changes**:
+        ${{ nodes.fetch-pr-diff.output }}
+
+        Please provide a thorough code review following the guidelines in your system prompt.
+
+    # Step 3: Post review as comment
+    - id: post-review
+      type: Action
+      action:
+        type: github_comment
+        owner: ${{ event.repository.owner.login }}
+        repo: ${{ event.repository.name }}
+        issue_number: ${{ event.pull_request.number }}
+        body: ${{ nodes.review.output }}
+
+    # Step 4: Add labels based on review
+    - id: label-pr
+      type: Condition
+      condition: ${{ nodes.review.output contains "Critical" }}
+      then:
+        - type: github_labels
+          labels: ["needs-work", "security-review"]
+      else:
+        - type: github_labels
+          labels: ["reviewed", "ready-for-merge"]
+```
+
+## Step 5: Start AOF Daemon
+
+```bash
+# Make sure to source environment variables first!
+source ~/.zshrc  # or source ~/.bashrc
+
+# Build (if not already done)
+cargo build --release
+
+# Start the daemon
+./target/release/aofctl serve --config config/aof/daemon.yaml
+```
+
+You should see:
+
+```
+Starting AOF Trigger Server
+  Bind address: 0.0.0.0:8080
+  Registered platform: github
+  Pre-loaded 1 agents from "./agents"
+Server starting...
+```
+
+## Step 6: Expose Webhook Endpoint
+
+### For Local Development:
+
+```bash
+# In a new terminal
+ngrok http 8080
+
+# Copy the HTTPS URL (e.g., https://abc123.ngrok.io)
+```
+
+### For Production:
+
+Deploy to a server with a public IP and domain:
+
+```
+https://aof.yourdomain.com/webhook/github
+```
+
+## Step 7: Configure GitHub Webhook
+
+1. Go to your repository on GitHub
+2. Navigate to **Settings** → **Webhooks** → **Add webhook**
+
+3. Configure the webhook:
+   - **Payload URL**: `https://abc123.ngrok.io/webhook/github` (or your production URL)
+   - **Content type**: `application/json`
+   - **Secret**: Paste your `GITHUB_WEBHOOK_SECRET` value
+   - **SSL verification**: Enable
+   - **Which events would you like to trigger this webhook?**:
+     - Select "Let me select individual events"
+     - Check: ☑️ Pull requests
+     - Check: ☑️ Pull request reviews
+     - Check: ☑️ Pull request review comments
+     - Check: ☑️ Issue comments (optional, for commands)
+   - **Active**: ☑️ Checked
+
+4. Click "Add webhook"
+
+5. GitHub will immediately send a `ping` event. Check "Recent Deliveries" to verify:
+   - Green checkmark = success
+   - HTTP 200 response
+   - Check AOF logs for "ping event received"
+
+## Step 8: Test It!
+
+### Create a Test PR
+
+```bash
+# Create a test branch
+git checkout -b test-pr-review
+
+# Make a simple change
+echo "# Test PR" >> README.md
+
+# Commit and push
+git add README.md
+git commit -m "test: trigger automated PR review"
+git push origin test-pr-review
+```
+
+### Open Pull Request
+
+1. Go to your repository on GitHub
+2. Click "Compare & pull request"
+3. Fill in title and description
+4. Click "Create pull request"
+
+### Watch the Magic! 🎉
+
+Within seconds, you should see:
+
+1. **In ngrok dashboard**: Webhook received
+2. **In AOF logs**:
+   ```
+   INFO aof_triggers::handler: Received GitHub pull_request event
+   INFO aof_runtime: Executing agent github-pr-reviewer
+   ```
+3. **On GitHub PR**: Automated review comment appears!
+
+## What Just Happened?
+
+Let's break down the workflow:
+
+1. **Webhook Event**: GitHub sent a `pull_request.opened` event to your AOF endpoint
+2. **Signature Verification**: AOF verified the webhook signature using `GITHUB_WEBHOOK_SECRET`
+3. **Event Parsing**: The GitHub platform adapter parsed the webhook payload
+4. **Agent Selection**: AOF matched the event to the `github-pr-reviewer` agent
+5. **Code Fetch**: The agent fetched the PR diff using GitHub CLI
+6. **AI Analysis**: Google Gemini analyzed the code changes
+7. **Review Post**: AOF posted the review as a comment on the PR
+
+## Customizing Your Reviews
+
+### Adjust Review Depth
+
+Edit `agents/github-pr-reviewer.yaml`:
+
+```yaml
+spec:
+  max_iterations: 15  # More iterations = deeper analysis
+  temperature: 0.1    # Lower = more focused and deterministic
+```
+
+### Focus on Specific Issues
+
+Modify the system prompt to emphasize certain checks:
+
+```yaml
+system_prompt: |
+  You are a security-focused code reviewer.
+
+  **PRIMARY FOCUS**: Security vulnerabilities
+  - SQL injection
+  - XSS attacks
+  - Authentication bypass
+  - Secrets in code
+
+  **SECONDARY**: Performance and best practices
+```
+
+### Use Different Models
+
+```yaml
+spec:
+  # Fast and cheap (Google Gemini Flash)
+  model: google:gemini-2.5-flash
+
+  # OR more powerful (OpenAI GPT-4)
+  model: openai:gpt-4o
+
+  # OR Claude Sonnet
+  model: anthropic:claude-sonnet-4-20250514
+```
+
+## Advanced Features
+
+### Auto-Approve Safe PRs
+
+Add a condition to auto-approve PRs that pass all checks:
+
+```yaml
+nodes:
+  - id: auto-approve
+    type: Condition
+    condition: |
+      ${{ nodes.review.output.assessment == "Approve" &&
+          nodes.review.output.critical_issues == 0 }}
+    then:
+      - type: github_review
+        event: APPROVE
+        body: "✅ Automated approval: All checks passed!"
+```
+
+### Request Changes for Critical Issues
+
+```yaml
+nodes:
+  - id: request-changes
+    type: Condition
+    condition: ${{ nodes.review.output.critical_issues > 0 }}
+    then:
+      - type: github_review
+        event: REQUEST_CHANGES
+        body: |
+          ⚠️ Critical issues found that must be addressed:
+          ${{ nodes.review.output.critical_issues_list }}
+```
+
+### Add Status Checks
+
+Create GitHub status checks for CI integration:
+
+```yaml
+nodes:
+  - id: status-check
+    type: Action
+    action:
+      type: github_check_run
+      owner: ${{ event.repository.owner.login }}
+      repo: ${{ event.repository.name }}
+      head_sha: ${{ event.pull_request.head.sha }}
+      name: "AOF Code Review"
+      status: "completed"
+      conclusion: |
+        ${{ nodes.review.output.critical_issues > 0 ? "failure" : "success" }}
+      output:
+        title: "Code Review Complete"
+        summary: ${{ nodes.review.output.summary }}
+```
+
+## Troubleshooting
+
+### "GitHub enabled but missing webhook_secret"
+
+**Problem**: Environment variable not loaded
+
+**Solution**:
+```bash
+# Check if variable is set
+echo $GITHUB_WEBHOOK_SECRET
+
+# If empty, source your config
+source ~/.zshrc  # or ~/.bashrc
+
+# Restart AOF
+./target/release/aofctl serve --config config/aof/daemon.yaml
+```
+
+### Webhook shows "Invalid signature"
+
+**Problem**: Webhook secret mismatch
+
+**Solution**:
+1. Verify secrets match exactly (no extra spaces/newlines)
+2. Check `echo $GITHUB_WEBHOOK_SECRET` matches GitHub webhook secret
+3. Regenerate secret if needed:
+   ```bash
+   export GITHUB_WEBHOOK_SECRET=$(openssl rand -hex 32)
+   # Update in GitHub webhook configuration
+   ```
+
+### Agent not responding
+
+**Check**:
+1. **Agent loaded**: Look for "Loaded agent 'github-pr-reviewer'" in logs
+2. **Webhook delivered**: Check GitHub webhook Recent Deliveries
+3. **Events configured**: Verify webhook listens to "Pull requests"
+4. **Logs**: Run with `RUST_LOG=debug` for detailed output
+
+### No comments appearing on PR
+
+**Check**:
+1. **GITHUB_TOKEN set**: Verify with `echo $GITHUB_TOKEN`
+2. **Token permissions**: Token needs `repo` scope
+3. **Repository access**: Token owner has write access to repository
+4. **Rate limits**: Check if you've exceeded GitHub API limits
+
+## Next Steps
+
+Now that you have basic PR reviews working, try:
+
+1. **Add Issue Triaging**: Create an agent that auto-labels and assigns issues
+2. **Dependency Updates**: Auto-review and approve Dependabot PRs
+3. **Release Automation**: Trigger releases when PRs are merged to main
+4. **CI/CD Integration**: Link reviews to GitHub Actions workflows
+5. **Multi-Repository**: Deploy across all your organization's repositories
+
+## Resources
+
+- [GitHub Platform Documentation](../platforms/github.md)
+- [Quick Setup Guide](../../GITHUB_SETUP.md)
+- [Agent Configuration Reference](../user-guide/agents/index.md)
+- [AgentFlow Documentation](../agentflow/README.md)
+- [AOF GitHub Issues](https://github.com/agenticdevops/aof/issues)
+
+## Need Help?
+
+- Check webhook delivery logs in GitHub
+- Enable debug logging: `RUST_LOG=debug aofctl serve ...`
+- Review AOF documentation: https://docs.aof.sh
+- File an issue: https://github.com/agenticdevops/aof/issues
+
+Happy automating! 🚀
diff --git a/examples/config/daemon.yaml b/examples/config/daemon.yaml
index 0253c7d..38d66a1 100644
--- a/examples/config/daemon.yaml
+++ b/examples/config/daemon.yaml
@@ -84,7 +84,7 @@ spec:
     # Scopes: repo, write:discussion (for PR comments/reviews)
     github:
       enabled: true
-      token_env: GITHUB_TOKEN
+      #token_env: GITHUB_TOKEN
       webhook_secret_env: GITHUB_WEBHOOK_SECRET
       # Optional: Bot name for @mentions
       bot_name: aofbot
diff --git a/flows/github-pr-review.yaml b/flows/github-pr-review.yaml
new file mode 100644
index 0000000..2e5fd10
--- /dev/null
+++ b/flows/github-pr-review.yaml
@@ -0,0 +1,25 @@
+apiVersion: aof.dev/v1
+kind: AgentFlow
+metadata:
+  name: github-pr-review
+  labels:
+    platform: github
+    event: pull_request
+
+spec:
+  description: "Automated AI code review for GitHub PRs"
+
+  nodes:
+    - id: run-review
+      type: Script
+      config:
+        scriptConfig:
+          command: ./scripts/flow-pr-review.sh
+          timeout_seconds: 180
+          fail_on_error: true
+          env:
+            AOF_TRIGGER_DATA: "${event}"
+
+  connections:
+    - from: start
+      to: run-review
diff --git a/flows/github/issue-triage-flow.yaml b/flows/github/issue-triage-flow.yaml
new file mode 100644
index 0000000..944a1f5
--- /dev/null
+++ b/flows/github/issue-triage-flow.yaml
@@ -0,0 +1,132 @@
+apiVersion: aof.sh/v1alpha1
+kind: AgentFlow
+metadata:
+  name: issue-triage
+  description: Automatically triage new issues
+
+triggers:
+  - platform: github
+    events:
+      - issues.opened
+
+steps:
+  # Analyze issue content
+  - name: analyze-issue
+    agent: issue-analyzer
+    action: analyze
+    input:
+      title: "\{\{ event.issue.title \}\}"
+      body: "\{\{ event.issue.body \}\}"
+    rules:
+      # Bug detection
+      - patterns: ["bug", "error", "crash", "not working", "broken"]
+        label: "bug"
+        priority: "high"
+      # Feature request
+      - patterns: ["feature", "enhancement", "request", "would be nice", "suggestion"]
+        label: "enhancement"
+        priority: "medium"
+      # Question
+      - patterns: ["how to", "question", "help", "?"]
+        label: "question"
+        priority: "low"
+      # Security
+      - patterns: ["security", "vulnerability", "cve", "exploit"]
+        label: "security"
+        priority: "critical"
+        notify: "security-team"
+
+  # Determine component
+  - name: determine-component
+    agent: classifier
+    action: classify
+    input:
+      text: "\{\{ event.issue.title \}\} \{\{ event.issue.body \}\}"
+    categories:
+      - name: "api"
+        patterns: ["api", "endpoint", "rest", "graphql"]
+      - name: "frontend"
+        patterns: ["ui", "frontend", "css", "react", "button", "page"]
+      - name: "database"
+        patterns: ["database", "db", "postgres", "migration", "query"]
+      - name: "infrastructure"
+        patterns: ["kubernetes", "k8s", "docker", "deployment", "ci/cd"]
+      - name: "documentation"
+        patterns: ["docs", "readme", "documentation", "example"]
+
+  # Apply labels
+  - name: apply-labels
+    agent: github
+    action: add_labels
+    input:
+      repo: "\{\{ event.repository.full_name \}\}"
+      issue_number: "\{\{ event.issue.number \}\}"
+      labels:
+        - "\{\{ steps.analyze-issue.output.label \}\}"
+        - "priority/\{\{ steps.analyze-issue.output.priority \}\}"
+        - "component/\{\{ steps.determine-component.output.category \}\}"
+        - "needs-triage"
+
+  # Assign to team
+  - name: assign-team
+    agent: github
+    action: add_assignees
+    input:
+      repo: "\{\{ event.repository.full_name \}\}"
+      issue_number: "\{\{ event.issue.number \}\}"
+      assignees: "\{\{ team_mapping[steps.determine-component.output.category] \}\}"
+    variables:
+      team_mapping:
+        api: ["backend-team"]
+        frontend: ["frontend-team"]
+        database: ["dba-team"]
+        infrastructure: ["platform-team"]
+        documentation: ["docs-team"]
+
+  # Post welcome comment
+  - name: welcome-comment
+    agent: github
+    action: post_comment
+    input:
+      repo: "\{\{ event.repository.full_name \}\}"
+      issue_number: "\{\{ event.issue.number \}\}"
+      body: |
+        Thanks for opening this issue, @\{\{ event.issue.user.login \}\}! 👋
+
+        I've automatically classified this as:
+        - **Type**: \{\{ steps.analyze-issue.output.label \}\}
+        - **Priority**: \{\{ steps.analyze-issue.output.priority \}\}
+        - **Component**: \{\{ steps.determine-component.output.category \}\}
+
+        {% if steps.analyze-issue.output.label == 'bug' %}
+        To help us investigate, please ensure you've provided:
+        - [ ] Steps to reproduce
+        - [ ] Expected behavior
+        - [ ] Actual behavior
+        - [ ] Environment details (OS, version, etc.)
+        {% elif steps.analyze-issue.output.label == 'enhancement' %}
+        To help us understand your request:
+        - [ ] Use case / problem being solved
+        - [ ] Proposed solution
+        - [ ] Alternatives considered
+        {% endif %}
+
+        A team member will review this shortly.
+
+  # Notify on critical issues
+  - name: notify-critical
+    condition: "\{\{ steps.analyze-issue.output.priority == 'critical' \}\}"
+    agent: multi-channel
+    action: notify
+    input:
+      channels:
+        - "slack:#security-alerts"
+        - "pagerduty:security-team"
+      message: |
+        🚨 Critical security issue opened
+
+        Repo: \{\{ event.repository.full_name \}\}
+        Issue: #\{\{ event.issue.number \}\} - \{\{ event.issue.title \}\}
+        Author: @\{\{ event.issue.user.login \}\}
+
+        \{\{ event.issue.html_url \}\}
diff --git a/flows/github/pr-labeler-flow.yaml b/flows/github/pr-labeler-flow.yaml
new file mode 100644
index 0000000..ffd675f
--- /dev/null
+++ b/flows/github/pr-labeler-flow.yaml
@@ -0,0 +1,73 @@
+apiVersion: aof.sh/v1alpha1
+kind: AgentFlow
+metadata:
+  name: pr-labeler
+  description: Auto-label PRs by size and risk
+
+triggers:
+  - platform: github
+    events:
+      - pull_request.opened
+      - pull_request.synchronize
+
+steps:
+  - name: analyze-size
+    agent: pr-analyzer
+    action: analyze_size
+    input:
+      additions: "\{\{ event.pull_request.additions \}\}"
+      deletions: "\{\{ event.pull_request.deletions \}\}"
+      files_changed: "\{\{ event.pull_request.changed_files \}\}"
+    rules:
+      - condition: "\{\{ additions + deletions < 50 \}\}"
+        label: "size/XS"
+      - condition: "\{\{ additions + deletions < 200 \}\}"
+        label: "size/S"
+      - condition: "\{\{ additions + deletions < 500 \}\}"
+        label: "size/M"
+      - condition: "\{\{ additions + deletions < 1000 \}\}"
+        label: "size/L"
+      - default:
+        label: "size/XL"
+
+  - name: analyze-risk
+    agent: pr-analyzer
+    action: analyze_risk
+    input:
+      files: "\{\{ event.pull_request.files \}\}"
+    rules:
+      # High risk areas
+      - pattern: "^.*/(auth|security|crypto)/"
+        label: "risk/high"
+        requires_review_from: ["security-team"]
+      - pattern: "^.*/migrations/"
+        label: "risk/high"
+        requires_review_from: ["dba-team"]
+      - pattern: "^Dockerfile|docker-compose|k8s/"
+        label: "infrastructure"
+        requires_review_from: ["platform-team"]
+      # Documentation
+      - pattern: "^docs/|README|CHANGELOG"
+        label: "documentation"
+      # Tests
+      - pattern: "^.*_test\\.go|^.*\\.test\\.(js|ts)|^test/"
+        label: "tests"
+
+  - name: apply-labels
+    agent: github
+    action: add_labels
+    input:
+      repo: "\{\{ event.repository.full_name \}\}"
+      issue_number: "\{\{ event.pull_request.number \}\}"
+      labels:
+        - "\{\{ steps.analyze-size.output.label \}\}"
+        - "\{\{ steps.analyze-risk.output.labels \}\}"
+
+  - name: request-reviewers
+    condition: "\{\{ steps.analyze-risk.output.required_reviewers | length > 0 \}\}"
+    agent: github
+    action: request_reviewers
+    input:
+      repo: "\{\{ event.repository.full_name \}\}"
+      pr_number: "\{\{ event.pull_request.number \}\}"
+      teams: "\{\{ steps.analyze-risk.output.required_reviewers \}\}"
diff --git a/flows/github/pr-review-flow.yaml b/flows/github/pr-review-flow.yaml
new file mode 100644
index 0000000..eece551
--- /dev/null
+++ b/flows/github/pr-review-flow.yaml
@@ -0,0 +1,215 @@
+apiVersion: aof.sh/v1alpha1
+kind: AgentFlow
+metadata:
+  name: pr-review-flow
+  description: Automated PR review for security, performance, and best practices
+
+triggers:
+  - platform: github
+    events:
+      - pull_request.opened
+      - pull_request.synchronize
+
+input:
+  from_event:
+    pr_number: "\{\{ event.pull_request.number \}\}"
+    repo: "\{\{ event.repository.full_name \}\}"
+    head_sha: "\{\{ event.pull_request.head.sha \}\}"
+    base_branch: "\{\{ event.pull_request.base.ref \}\}"
+    author: "\{\{ event.pull_request.user.login \}\}"
+    files_changed: "\{\{ event.pull_request.changed_files \}\}"
+
+# Skip if PR is from bot or draft
+conditions:
+  - "\{\{ not event.pull_request.draft \}\}"
+  - "\{\{ event.pull_request.user.type != 'Bot' \}\}"
+
+steps:
+  # Create initial check run
+  - name: create-check
+    agent: github
+    action: create_check_run
+    input:
+      repo: "\{\{ input.repo \}\}"
+      head_sha: "\{\{ input.head_sha \}\}"
+      name: "AOF Code Review"
+      status: "in_progress"
+      output:
+        title: "Reviewing PR..."
+        summary: "Automated code review in progress"
+
+  # Get changed files
+  - name: get-files
+    agent: github
+    action: get_pr_files
+    input:
+      repo: "\{\{ input.repo \}\}"
+      pr_number: "\{\{ input.pr_number \}\}"
+
+  # Parallel analysis
+  - name: analyze
+    parallel: true
+    steps:
+      # Security scan
+      - name: security-scan
+        agent: security-scanner
+        action: scan
+        input:
+          repo: "\{\{ input.repo \}\}"
+          ref: "\{\{ input.head_sha \}\}"
+          files: "\{\{ steps.get-files.output.files \}\}"
+        checks:
+          - type: secrets
+            severity: critical
+          - type: sql_injection
+            severity: high
+          - type: xss
+            severity: high
+          - type: dependencies
+            severity: medium
+
+      # Performance analysis
+      - name: perf-analysis
+        agent: perf-analyzer
+        action: analyze
+        input:
+          files: "\{\{ steps.get-files.output.files \}\}"
+        checks:
+          - type: n_plus_one
+          - type: missing_indexes
+          - type: large_payloads
+          - type: inefficient_loops
+
+      # Code quality
+      - name: quality-check
+        agent: code-quality
+        action: check
+        input:
+          files: "\{\{ steps.get-files.output.files \}\}"
+          config:
+            max_complexity: 10
+            max_file_length: 500
+            require_tests: true
+            coverage_threshold: 80
+
+      # Kubernetes manifest validation (if applicable)
+      - name: k8s-validation
+        agent: kubernetes-validator
+        condition: "\{\{ steps.get-files.output.files | selectattr('filename', 'match', '.*\\.ya?ml$') | list | length > 0 \}\}"
+        action: validate
+        input:
+          files: "\{\{ steps.get-files.output.files | selectattr('filename', 'match', '.*\\.ya?ml$') | list \}\}"
+        checks:
+          - type: schema
+          - type: security_context
+          - type: resource_limits
+          - type: best_practices
+
+  # Aggregate results
+  - name: aggregate-results
+    agent: review-aggregator
+    action: aggregate
+    input:
+      security: "\{\{ steps.analyze.security-scan.output \}\}"
+      performance: "\{\{ steps.analyze.perf-analysis.output \}\}"
+      quality: "\{\{ steps.analyze.quality-check.output \}\}"
+      k8s: "\{\{ steps.analyze.k8s-validation.output | default({}) \}\}"
+
+  # Determine approval status
+  - name: determine-status
+    agent: decision-maker
+    action: evaluate
+    input:
+      results: "\{\{ steps.aggregate-results.output \}\}"
+    rules:
+      - condition: "\{\{ results.security.critical_count > 0 \}\}"
+        status: "failure"
+        message: "Critical security issues found"
+      - condition: "\{\{ results.security.high_count > 0 \}\}"
+        status: "failure"
+        message: "High severity security issues found"
+      - condition: "\{\{ results.quality.coverage < 80 \}\}"
+        status: "failure"
+        message: "Test coverage below 80%"
+      - condition: "\{\{ results.performance.issues | length > 5 \}\}"
+        status: "warning"
+        message: "Multiple performance issues detected"
+      - default:
+        status: "success"
+        message: "All checks passed"
+
+  # Post review comment
+  - name: post-review
+    agent: github
+    action: post_review
+    input:
+      repo: "\{\{ input.repo \}\}"
+      pr_number: "\{\{ input.pr_number \}\}"
+      commit_id: "\{\{ input.head_sha \}\}"
+      event: "\{\{ 'APPROVE' if steps.determine-status.output.status == 'success' else 'REQUEST_CHANGES' \}\}"
+      body: |
+        ## 🤖 Automated Code Review
+
+        \{\{ '✅' if steps.determine-status.output.status == 'success' else '❌' \}\} **\{\{ steps.determine-status.output.message \}\}**
+
+        ### Security Scan
+        \{\{ '✅' if steps.aggregate-results.output.security.passed else '❌' \}\} \{\{ steps.aggregate-results.output.security.summary \}\}
+        {% if steps.aggregate-results.output.security.issues %}
+        <details>
+        <summary>Security Issues (\{\{ steps.aggregate-results.output.security.issues | length \}\})</summary>
+
+        {% for issue in steps.aggregate-results.output.security.issues %}
+        - **\{\{ issue.severity \}\}**: \{\{ issue.message \}\} (`\{\{ issue.file \}\}:\{\{ issue.line \}\}`)
+        {% endfor %}
+        </details>
+        {% endif %}
+
+        ### Performance Analysis
+        \{\{ '✅' if steps.aggregate-results.output.performance.passed else '⚠️' \}\} \{\{ steps.aggregate-results.output.performance.summary \}\}
+        {% if steps.aggregate-results.output.performance.issues %}
+        <details>
+        <summary>Performance Issues (\{\{ steps.aggregate-results.output.performance.issues | length \}\})</summary>
+
+        {% for issue in steps.aggregate-results.output.performance.issues %}
+        - **\{\{ issue.type \}\}**: \{\{ issue.message \}\} (`\{\{ issue.file \}\}:\{\{ issue.line \}\}`)
+        {% endfor %}
+        </details>
+        {% endif %}
+
+        ### Code Quality
+        - Complexity: \{\{ '✅' if steps.aggregate-results.output.quality.complexity_ok else '❌' \}\} (max: \{\{ steps.aggregate-results.output.quality.max_complexity \}\})
+        - Test Coverage: \{\{ '✅' if steps.aggregate-results.output.quality.coverage >= 80 else '❌' \}\} \{\{ steps.aggregate-results.output.quality.coverage \}\}%
+        - Lint: \{\{ '✅' if steps.aggregate-results.output.quality.lint_passed else '❌' \}\}
+
+        {% if steps.aggregate-results.output.k8s %}
+        ### Kubernetes Validation
+        \{\{ '✅' if steps.aggregate-results.output.k8s.passed else '❌' \}\} \{\{ steps.aggregate-results.output.k8s.summary \}\}
+        {% endif %}
+
+        ---
+        <sub>🤖 Review by [AOF](https://docs.aof.sh) | [Re-run review](\{\{ trigger.event.pull_request.html_url \}\}/checks)</sub>
+      comments: "\{\{ steps.aggregate-results.output.inline_comments \}\}"
+
+  # Update check run
+  - name: update-check
+    agent: github
+    action: update_check_run
+    input:
+      repo: "\{\{ input.repo \}\}"
+      check_run_id: "\{\{ steps.create-check.output.id \}\}"
+      conclusion: "\{\{ steps.determine-status.output.status \}\}"
+      output:
+        title: "\{\{ steps.determine-status.output.message \}\}"
+        summary: |
+          **Security**: \{\{ steps.aggregate-results.output.security.summary \}\}
+          **Performance**: \{\{ steps.aggregate-results.output.performance.summary \}\}
+          **Quality**: Coverage \{\{ steps.aggregate-results.output.quality.coverage \}\}%
+
+  # Add labels based on content
+  - name: add-labels
+    agent: github
+    action: add_labels
+    input:
+      repo: "\{\{ input.repo \}\}"
+      issue_number: "\{\{ input.pr_number \}\}"
+      labels: "\{\{ steps.aggregate-results.output.suggested_labels \}\}"
diff --git a/flows/github/pr-review.yaml b/flows/github/pr-review.yaml
new file mode 100644
index 0000000..2e5fd10
--- /dev/null
+++ b/flows/github/pr-review.yaml
@@ -0,0 +1,25 @@
+apiVersion: aof.dev/v1
+kind: AgentFlow
+metadata:
+  name: github-pr-review
+  labels:
+    platform: github
+    event: pull_request
+
+spec:
+  description: "Automated AI code review for GitHub PRs"
+
+  nodes:
+    - id: run-review
+      type: Script
+      config:
+        scriptConfig:
+          command: ./scripts/flow-pr-review.sh
+          timeout_seconds: 180
+          fail_on_error: true
+          env:
+            AOF_TRIGGER_DATA: "${event}"
+
+  connections:
+    - from: start
+      to: run-review
diff --git a/scripts/flow-pr-review.sh b/scripts/flow-pr-review.sh
new file mode 100755
index 0000000..0e98184
--- /dev/null
+++ b/scripts/flow-pr-review.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Flow PR Review Script - Called by AgentFlow
+# Parses trigger data and calls the actual review script
+
+set -e
+
+echo "=== AOF PR Review Flow ==="
+echo ""
+
+# Parse trigger data JSON to extract repo and PR number
+# The AOF_TRIGGER_DATA contains the full trigger data JSON like:
+# {"event":{"channel_id":"owner/repo#42","metadata":{"repo_full_name":"owner/repo","pr_number":42},...},...}
+
+if [ -z "$AOF_TRIGGER_DATA" ]; then
+  echo "Error: AOF_TRIGGER_DATA not set"
+  exit 1
+fi
+
+# Use Python to parse the JSON (more reliable than jq)
+eval "$(python3 << 'PYEOF'
+import json
+import os
+import sys
+import re
+
+try:
+    data = json.loads(os.environ.get('AOF_TRIGGER_DATA', '{}'))
+    event = data.get('event', {})
+    meta = event.get('metadata', {})
+
+    # Method 1: Get from metadata (most reliable for GitHub events)
+    repo = meta.get('repo_full_name', '')
+    pr_num = meta.get('pr_number', '')
+
+    # Method 2: Parse from channel_id (format: "owner/repo#number")
+    if not repo or not pr_num:
+        channel_id = event.get('channel_id', '')
+        if '#' in str(channel_id):
+            parts = str(channel_id).split('#')
+            if not repo:
+                repo = parts[0]
+            if not pr_num and len(parts) > 1:
+                pr_num = parts[1]
+
+    # Method 3: Parse from text (format: "pr:opened:base:head #42 Title - owner/repo")
+    if not repo or not pr_num:
+        text = event.get('text', '')
+        if not pr_num:
+            pr_match = re.search(r'#(\d+)', text)
+            if pr_match:
+                pr_num = pr_match.group(1)
+        if not repo:
+            repo_match = re.search(r' - ([a-zA-Z0-9_-]+/[a-zA-Z0-9_.-]+)\s*$', text)
+            if repo_match:
+                repo = repo_match.group(1)
+
+    # Convert pr_num to string if it's an int
+    if isinstance(pr_num, int):
+        pr_num = str(pr_num)
+
+    print(f"export REPO='{repo}'")
+    print(f"export PR_NUM='{pr_num}'")
+
+except Exception as e:
+    print(f"echo 'Parse error: {e}'", file=sys.stderr)
+    print("export REPO=''")
+    print("export PR_NUM=''")
+PYEOF
+)"
+
+if [ -z "$REPO" ] || [ -z "$PR_NUM" ]; then
+  echo "Error: Could not determine repository and PR number"
+  echo ""
+  echo "Trigger data preview:"
+  echo "${AOF_TRIGGER_DATA:0:500}..."
+  exit 1
+fi
+
+echo "Repository: $REPO"
+echo "PR Number: $PR_NUM"
+echo ""
+
+# Run the actual review script
+exec ./scripts/pr-review.sh "$REPO" "$PR_NUM"
diff --git a/scripts/pr-review.sh b/scripts/pr-review.sh
new file mode 100755
index 0000000..f65e5fa
--- /dev/null
+++ b/scripts/pr-review.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# PR Review Script - Fetches PR, generates AI review, posts to GitHub
+# Usage: ./scripts/pr-review.sh <owner/repo> <pr_number>
+
+set -e
+
+REPO="${1:-$GITHUB_REPO}"
+PR_NUM="${2:-$GITHUB_PR_NUMBER}"
+
+if [ -z "$REPO" ] || [ -z "$PR_NUM" ]; then
+  echo "Usage: $0 <owner/repo> <pr_number>"
+  echo "Or set GITHUB_REPO and GITHUB_PR_NUMBER environment variables"
+  exit 1
+fi
+
+echo "=== AOF PR Review ==="
+echo "Repository: $REPO"
+echo "PR: #$PR_NUM"
+
+# Authenticate with GitHub
+echo "$GITHUB_TOKEN" | gh auth login --with-token 2>/dev/null || true
+
+# Get PR info
+echo "Fetching PR info..."
+TITLE=$(gh pr view "$PR_NUM" --repo "$REPO" --json title -q '.title')
+AUTHOR=$(gh pr view "$PR_NUM" --repo "$REPO" --json author -q '.author.login')
+ADDITIONS=$(gh pr view "$PR_NUM" --repo "$REPO" --json additions -q '.additions')
+DELETIONS=$(gh pr view "$PR_NUM" --repo "$REPO" --json deletions -q '.deletions')
+CHANGED=$(gh pr view "$PR_NUM" --repo "$REPO" --json changedFiles -q '.changedFiles')
+BODY=$(gh pr view "$PR_NUM" --repo "$REPO" --json body -q '.body // "No description"')
+
+echo "Title: $TITLE"
+echo "Author: @$AUTHOR"
+echo "Changes: +$ADDITIONS/-$DELETIONS in $CHANGED files"
+
+# Get the diff
+echo "Fetching diff..."
+DIFF=$(gh pr diff "$PR_NUM" --repo "$REPO" 2>/dev/null || echo "Could not fetch diff")
+
+# Build prompt
+cat > /tmp/pr-review-prompt.txt << EOF
+You are an expert code reviewer. Review this pull request thoroughly.
+
+## PR Information
+- Repository: $REPO
+- PR #$PR_NUM: $TITLE
+- Author: @$AUTHOR
+- Changes: +$ADDITIONS/-$DELETIONS in $CHANGED files
+
+## Description
+$BODY
+
+## Code Changes
+\`\`\`diff
+$DIFF
+\`\`\`
+
+Provide a comprehensive code review (under 500 words) with:
+1. **Overall Assessment**: Approve ✅ / Request Changes ⚠️ / Comment 💬
+2. **Summary**: Brief overview of the changes
+3. **Analysis**: What the changes do and their impact
+4. **Concerns**: Any issues, security risks, or improvements needed
+5. **Verdict**: Final recommendation
+
+Use markdown formatting. Be constructive and specific.
+EOF
+
+echo "Generating review with Gemini..."
+
+# Call Gemini API using Python (handles JSON properly)
+python3 << 'PYEOF'
+import json
+import urllib.request
+import os
+import sys
+
+api_key = os.environ.get('GOOGLE_API_KEY')
+if not api_key:
+    print("Error: GOOGLE_API_KEY not set")
+    sys.exit(1)
+
+with open('/tmp/pr-review-prompt.txt', 'r') as f:
+    prompt = f.read()
+
+data = {"contents": [{"parts": [{"text": prompt}]}]}
+url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={api_key}"
+
+req = urllib.request.Request(url,
+    data=json.dumps(data).encode('utf-8'),
+    headers={'Content-Type': 'application/json'})
+
+try:
+    with urllib.request.urlopen(req, timeout=60) as response:
+        result = json.loads(response.read().decode('utf-8'))
+        review = result['candidates'][0]['content']['parts'][0]['text']
+
+        review_with_sig = review + "\n\n---\n_🤖 Automated review by [AOF Bot](https://docs.aof.sh) | Powered by Google Gemini 2.5 Flash_"
+
+        with open('/tmp/pr-review.md', 'w') as f:
+            f.write(review_with_sig)
+
+        print("Review generated!")
+except Exception as e:
+    print(f"API Error: {e}")
+    sys.exit(1)
+PYEOF
+
+# Post to GitHub
+if [ -f /tmp/pr-review.md ]; then
+  echo "Posting review to GitHub..."
+  gh pr comment "$PR_NUM" --repo "$REPO" --body-file /tmp/pr-review.md
+  echo ""
+  echo "✅ Review posted successfully!"
+  echo "View at: https://github.com/$REPO/pull/$PR_NUM"
+else
+  echo "❌ Failed to generate review"
+  exit 1
+fi
diff --git a/scripts/test-github-webhook.sh b/scripts/test-github-webhook.sh
new file mode 100755
index 0000000..9e1c08e
--- /dev/null
+++ b/scripts/test-github-webhook.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+# Test GitHub webhook locally
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+echo -e "${GREEN}Testing GitHub Webhook Integration${NC}"
+echo ""
+
+# Check if server is running
+echo -e "${YELLOW}1. Checking if AOF server is running...${NC}"
+if curl -s http://localhost:8080/health > /dev/null 2>&1; then
+    echo -e "${GREEN}✓ Server is running${NC}"
+else
+    echo -e "${RED}✗ Server is not running${NC}"
+    echo "Start the server with: ./target/release/aofctl serve --config config/aof/daemon.yaml"
+    exit 1
+fi
+
+# Check environment variables
+echo -e "\n${YELLOW}2. Checking environment variables...${NC}"
+if [ -z "$GITHUB_WEBHOOK_SECRET" ]; then
+    echo -e "${RED}✗ GITHUB_WEBHOOK_SECRET not set${NC}"
+    echo "Set it with: export GITHUB_WEBHOOK_SECRET=\$(openssl rand -hex 32)"
+    exit 1
+else
+    echo -e "${GREEN}✓ GITHUB_WEBHOOK_SECRET is set${NC}"
+fi
+
+if [ -z "$GITHUB_TOKEN" ]; then
+    echo -e "${YELLOW}⚠ GITHUB_TOKEN not set (API features will be disabled)${NC}"
+else
+    echo -e "${GREEN}✓ GITHUB_TOKEN is set${NC}"
+fi
+
+# Test ping event
+echo -e "\n${YELLOW}3. Testing GitHub ping event...${NC}"
+
+# Create HMAC signature
+PAYLOAD='{"zen":"AOF test webhook","hook_id":12345678,"hook":{"type":"Repository","id":12345678,"active":true},"repository":{"id":123456,"name":"test-repo","full_name":"test/test-repo","private":false},"sender":{"id":1,"login":"testuser","type":"User"}}'
+
+# Calculate signature (GitHub uses HMAC-SHA256)
+SIGNATURE="sha256=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$GITHUB_WEBHOOK_SECRET" | sed 's/^.* //')"
+
+echo "Payload: $PAYLOAD"
+echo "Signature: ${SIGNATURE:0:20}..."
+
+# Send webhook
+HTTP_CODE=$(curl -s -w "%{http_code}" -o /tmp/webhook_response.txt -X POST http://localhost:8080/webhook/github \
+  -H "Content-Type: application/json" \
+  -H "X-GitHub-Event: ping" \
+  -H "X-Hub-Signature-256: $SIGNATURE" \
+  -H "X-GitHub-Delivery: $(uuidgen)" \
+  -d "$PAYLOAD")
+
+BODY=$(cat /tmp/webhook_response.txt)
+
+if [ "$HTTP_CODE" = "200" ]; then
+    echo -e "${GREEN}✓ Ping event successful (HTTP $HTTP_CODE)${NC}"
+    echo "Response: $BODY"
+else
+    echo -e "${RED}✗ Ping event failed (HTTP $HTTP_CODE)${NC}"
+    echo "Response: $BODY"
+    exit 1
+fi
+
+# Test pull request event
+echo -e "\n${YELLOW}4. Testing GitHub pull_request event...${NC}"
+
+PR_PAYLOAD=$(cat <<'EOF'
+{
+  "action": "opened",
+  "number": 1,
+  "pull_request": {
+    "id": 1,
+    "number": 1,
+    "title": "Test PR",
+    "body": "This is a test pull request",
+    "state": "open",
+    "draft": false,
+    "merged": false,
+    "html_url": "https://github.com/test/repo/pull/1",
+    "user": {
+      "id": 12345,
+      "login": "testuser",
+      "type": "User"
+    },
+    "base": {
+      "ref": "main",
+      "sha": "abc123"
+    },
+    "head": {
+      "ref": "feature-branch",
+      "sha": "def456"
+    },
+    "additions": 10,
+    "deletions": 5,
+    "changed_files": 2
+  },
+  "repository": {
+    "id": 123456,
+    "name": "repo",
+    "full_name": "test/repo",
+    "private": false
+  },
+  "sender": {
+    "id": 12345,
+    "login": "testuser",
+    "type": "User"
+  }
+}
+EOF
+)
+
+PR_SIGNATURE="sha256=$(echo -n "$PR_PAYLOAD" | openssl dgst -sha256 -hmac "$GITHUB_WEBHOOK_SECRET" | sed 's/^.* //')"
+
+HTTP_CODE=$(curl -s -w "%{http_code}" -o /tmp/webhook_response.txt -X POST http://localhost:8080/webhook/github \
+  -H "Content-Type: application/json" \
+  -H "X-GitHub-Event: pull_request" \
+  -H "X-Hub-Signature-256: $PR_SIGNATURE" \
+  -H "X-GitHub-Delivery: $(uuidgen)" \
+  -d "$PR_PAYLOAD")
+
+BODY=$(cat /tmp/webhook_response.txt)
+
+if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "202" ]; then
+    echo -e "${GREEN}✓ Pull request event successful (HTTP $HTTP_CODE)${NC}"
+    echo "Response: $BODY"
+else
+    echo -e "${RED}✗ Pull request event failed (HTTP $HTTP_CODE)${NC}"
+    echo "Response: $BODY"
+    exit 1
+fi
+
+echo -e "\n${GREEN}All tests passed! ✓${NC}"
+echo ""
+echo "Next steps:"
+echo "1. Configure GitHub webhook: https://github.com/<owner>/<repo>/settings/hooks"
+echo "2. Use ngrok for local testing: ngrok http 8080"
+echo "3. Set Payload URL to: https://your-ngrok-url.ngrok.io/webhook/github"
+echo "4. Use the same GITHUB_WEBHOOK_SECRET in GitHub webhook configuration"

From d16285035303d006e099321de23391f3bc298754 Mon Sep 17 00:00:00 2001
From: Gourav Shah <gs@initcron.org>
Date: Fri, 23 Jan 2026 14:25:23 +0530
Subject: [PATCH 11/14] chore: Quick wins - update ROADMAP, fix GitHub test,
 add improvement plan

- Update ROADMAP.md to reflect current v0.3.2-beta state
- Mark GitHub, Jira, Telegram, WhatsApp, PagerDuty, Opsgenie as complete
- Fix GitHub platform test (channel_id includes PR number)
- Add docs/internal/improvement-plan.md with strategic roadmap
- Closed stale GitHub issues #78, #79, #80, #81, #82, #98

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 ROADMAP.md                                  | 146 ++++++++++++-------
 crates/aof-triggers/src/platforms/github.rs |   2 +-
 docs/internal/improvement-plan.md           | 154 ++++++++++++++++++++
 3 files changed, 248 insertions(+), 54 deletions(-)
 create mode 100644 docs/internal/improvement-plan.md

diff --git a/ROADMAP.md b/ROADMAP.md
index 79910c1..da01713 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -19,11 +19,11 @@ AOF is designed as a **modular, pluggable framework** where components are reusa
 │  │ WhatsApp    │          │ Agent        │      │ Discord    │  │
 │  │ Telegram    │          │ Conditional  │      │ HTTP       │  │
 │  │ GitHub      │          │ Parallel     │      │ Email      │  │
-│  │ HTTP        │          │ Join         │      │ File       │  │
-│  │ Schedule    │          │ Wait         │      │ ...        │  │
-│  │ PagerDuty   │          │ Approval     │      └────────────┘  │
-│  │ Kafka       │          │ Loop         │                       │
-│  │ ...         │          │ ...          │                       │
+│  │ Jira        │          │ Join         │      │ File       │  │
+│  │ HTTP        │          │ Wait         │      │ ...        │  │
+│  │ Schedule    │          │ Approval     │      └────────────┘  │
+│  │ PagerDuty   │          │ Loop         │                       │
+│  │ Opsgenie    │          │ ...          │                       │
 │  └─────────────┘          └──────────────┘                       │
 │                                                                  │
 │  AGENTS                    MEMORY                TOOLS           │
@@ -33,6 +33,8 @@ AOF is designed as a **modular, pluggable framework** where components are reusa
 │  │ Context     │          │ SQLite       │      │ FileSystem │  │
 │  │ Tools       │          │ Redis        │      │ MCP        │  │
 │  └─────────────┘          └──────────────┘      │ kubectl    │  │
+│                                                  │ Grafana    │  │
+│                                                  │ Datadog    │  │
 │                                                  └────────────┘  │
 ├─────────────────────────────────────────────────────────────────┤
 │                    ORCHESTRATION                                 │
@@ -72,7 +74,7 @@ trait NodeExecutor {
 
 ---
 
-## Current Release: v0.1.15
+## Current Release: v0.3.2-beta
 
 ### Implemented Features
 
@@ -85,21 +87,41 @@ trait NodeExecutor {
 | Built-in tools (Shell, HTTP, FileSystem) | ✅ Complete | |
 | AgentFleet multi-agent coordination | ✅ Complete | |
 | AgentFlow workflow orchestration | ✅ Complete | v1 schema with nodes/connections |
+| Agent Library (30 pre-built agents) | ✅ Complete | K8s, Observability, Incident, CI/CD, Security, Cloud |
 
 #### Trigger Types
-| Trigger | Status | Priority | Notes |
-|---------|--------|----------|-------|
-| Slack | ✅ Complete | - | app_mention, message, slash_command |
-| HTTP/Webhook | ✅ Complete | - | POST/GET with variable access |
-| Schedule (Cron) | ✅ Complete | - | With timezone support |
-| Manual (CLI) | ✅ Complete | - | `aofctl run` |
-| Discord | ⚠️ Partial | P2 | message_create events only |
-| Telegram | 🔄 Planned | P1 | [Issue #24](https://github.com/agenticdevops/aof/issues/24) |
-| WhatsApp | 🔄 Planned | P1 | [Issue #23](https://github.com/agenticdevops/aof/issues/23) |
-| GitHub | 🔄 Planned | P1 | [Issue #25](https://github.com/agenticdevops/aof/issues/25) |
-| PagerDuty | 🔄 Planned | P3 | [Issue #26](https://github.com/agenticdevops/aof/issues/26) |
-| Kafka | 🔄 Planned | P3 | Issue TBD |
-| SQS | 🔄 Planned | P3 | Issue TBD |
+| Trigger | Status | Notes |
+|---------|--------|-------|
+| Slack | ✅ Complete | app_mention, message, slash_command |
+| HTTP/Webhook | ✅ Complete | POST/GET with variable access |
+| Schedule (Cron) | ✅ Complete | With timezone support |
+| Manual (CLI) | ✅ Complete | `aofctl run` |
+| Telegram | ✅ Complete | Messages, inline keyboards |
+| WhatsApp | ✅ Complete | Messages, interactive buttons |
+| GitHub | ✅ Complete | PR, Issues, Push, Reviews |
+| Jira | ✅ Complete | Issues, Comments, Automation webhooks |
+| GitLab | ✅ Complete | MR, Issues, Push |
+| Bitbucket | ✅ Complete | PR, Push |
+| PagerDuty | ✅ Complete | Incidents, alerts |
+| Opsgenie | ✅ Complete | Alerts, on-call |
+| Discord | ⚠️ Partial | message_create events only |
+| ServiceNow | 🔄 Planned | [Issue #48] |
+| Kafka | 🔄 Planned | Event streaming |
+| SQS | 🔄 Planned | AWS queue integration |
+
+#### Tools
+| Tool | Status | Notes |
+|------|--------|-------|
+| Shell | ✅ Complete | Command execution |
+| HTTP | ✅ Complete | REST API calls |
+| FileSystem | ✅ Complete | File operations |
+| MCP | ✅ Complete | Model Context Protocol |
+| Grafana | ✅ Complete | Dashboards, alerts |
+| Datadog | ✅ Complete | Metrics, monitors |
+| Prometheus | ✅ Complete | Metrics queries |
+| Loki | ⚠️ Partial | Basic log queries |
+| Jaeger | 🔄 Planned | [Issue #50] |
+| Jenkins | 🔄 Planned | [Issue #55] |
 
 #### Node Types (AgentFlow)
 | Node | Status | Notes |
@@ -125,35 +147,41 @@ trait NodeExecutor {
 | Bot self-approval prevention | ✅ Complete | Auto-detects bot_user_id |
 | Conversation memory | ✅ Complete | Per-channel/thread isolation |
 | Multi-tenant routing | ✅ Complete | FlowRouter with priorities |
+| Built-in commands | ✅ Complete | /help, /agent, /fleet menus |
+| Stale message filtering | ✅ Complete | Drops old queued messages |
 | Config hot-reload | 🔄 Planned | [Issue #22] |
 
 ---
 
 ## Roadmap by Priority
 
-### P0 - Critical (Current Sprint)
+### P0 - Current Focus (v0.3.3)
 - [x] Slack approval workflow
 - [x] Conversation memory
 - [x] Multi-tenant routing
-- [ ] Fix/organize flow examples
-
-### P1 - High Priority (Individual Users)
-- [ ] **WhatsApp trigger** - Interactive buttons for approval
-- [ ] **Telegram trigger** - Inline keyboards for bots
-- [ ] **GitHub trigger** - PR/Issue webhooks
-- [ ] Tutorial documentation for individual users
-
-### P2 - Medium Priority
-- [ ] Discord full implementation
-- [ ] Loop node for batch operations
-- [ ] HTTP node full implementation
-- [ ] State persistence (checkpointing)
-
-### P3 - Lower Priority (Enterprise/SRE)
-- [ ] PagerDuty trigger
+- [x] GitHub/Jira triggers
+- [ ] **Structured I/O Schemas** - Standardize agent outputs ([#74], [#75], [#76])
+- [ ] **MCP Server Catalog** - Document available integrations ([#71])
+
+### P1 - Developer Experience
+- [ ] Structured output schemas for agents
+- [ ] MCP server catalog documentation
+- [ ] More real-world flow examples
+- [ ] Improved error messages (serde_path_to_error)
+
+### P2 - Enterprise Features
+- [ ] **Horizontal scaling** - Redis/NATS message queue ([#47])
+- [ ] **Multi-org support** - Per-org credentials ([#46])
+- [ ] **Config hot-reload** - No restart updates ([#22])
+- [ ] **ServiceNow trigger** - Enterprise ITSM ([#48])
+
+### P3 - Additional Integrations
 - [ ] Kafka trigger
 - [ ] SQS trigger
-- [ ] AgentFleet integration in flows (v1alpha1 syntax)
+- [ ] Jaeger tool ([#50])
+- [ ] Jenkins tool ([#55])
+- [ ] Loki enhancement ([#49])
+- [ ] Loop node for batch operations
 
 ---
 
@@ -205,18 +233,15 @@ spec:
 | `multi-tenant/slack-prod-k8s-bot.yaml` | Slack | Channel filtering |
 | `multi-tenant/slack-staging-k8s-bot.yaml` | Slack | Environment context |
 | `multi-tenant/slack-dev-local-bot.yaml` | Slack | Local development |
+| `flows/github/pr-review-flow.yaml` | GitHub | PR review automation |
+| `flows/github/issue-triage-flow.yaml` | GitHub | Issue labeling |
 
-### Planned Examples (v1alpha1 schema)
-These examples demonstrate future syntax and require additional implementation:
-
+### Planned Examples
 | Example | Requires | Status |
 |---------|----------|--------|
-| `planned/incident-auto-remediation-flow.yaml` | PagerDuty, Fleet | Planned |
-| `planned/pr-review-flow.yaml` | GitHub, Fleet | Planned |
-| `planned/daily-standup-report-flow.yaml` | Cron, Fleet, Jira | Planned |
-| `planned/slack-qa-bot-flow.yaml` | Inline agent spec | Planned |
-| `planned/cost-optimization-flow.yaml` | Schedule, Fleet | Planned |
-| `planned/deploy-notification-flow.yaml` | GitHub, Fleet | Planned |
+| `incident-auto-remediation-flow.yaml` | PagerDuty, Fleet | Planned |
+| `daily-standup-report-flow.yaml` | Cron, Fleet, Jira | Planned |
+| `cost-optimization-flow.yaml` | Schedule, Fleet | Planned |
 
 ---
 
@@ -224,13 +249,28 @@ These examples demonstrate future syntax and require additional implementation:
 
 Track progress on GitHub: https://github.com/agenticdevops/aof/issues
 
-| Issue | Title | Priority | Labels |
-|-------|-------|----------|--------|
-| [#22](https://github.com/agenticdevops/aof/issues/22) | Config hot-reload | P2 | enhancement |
-| [#23](https://github.com/agenticdevops/aof/issues/23) | WhatsApp trigger support | P1 | enhancement |
-| [#24](https://github.com/agenticdevops/aof/issues/24) | Telegram trigger support | P1 | enhancement |
-| [#25](https://github.com/agenticdevops/aof/issues/25) | GitHub webhook trigger | P1 | enhancement |
-| [#26](https://github.com/agenticdevops/aof/issues/26) | PagerDuty trigger | P3 | enhancement |
+### Open Issues
+| Issue | Title | Priority |
+|-------|-------|----------|
+| [#22](https://github.com/agenticdevops/aof/issues/22) | Config hot-reload | P2 |
+| [#46](https://github.com/agenticdevops/aof/issues/46) | Multi-org support | P1 |
+| [#47](https://github.com/agenticdevops/aof/issues/47) | Horizontal scaling | P1 |
+| [#48](https://github.com/agenticdevops/aof/issues/48) | ServiceNow trigger | P2 |
+| [#49](https://github.com/agenticdevops/aof/issues/49) | Loki enhancement | P1 |
+| [#50](https://github.com/agenticdevops/aof/issues/50) | Jaeger tool | P2 |
+| [#55](https://github.com/agenticdevops/aof/issues/55) | Jenkins tool | P2 |
+| [#71](https://github.com/agenticdevops/aof/issues/71) | MCP Server Catalog | P0 |
+| [#74](https://github.com/agenticdevops/aof/issues/74) | Structured I/O | P0 |
+
+### Recently Closed
+| Issue | Title | Release |
+|-------|-------|---------|
+| [#78](https://github.com/agenticdevops/aof/issues/78) | Grafana tool | v0.3.0 |
+| [#79](https://github.com/agenticdevops/aof/issues/79) | PagerDuty trigger | v0.3.0 |
+| [#80](https://github.com/agenticdevops/aof/issues/80) | Datadog tool | v0.3.0 |
+| [#81](https://github.com/agenticdevops/aof/issues/81) | Incident agents | v0.3.0 |
+| [#82](https://github.com/agenticdevops/aof/issues/82) | Opsgenie trigger | v0.3.0 |
+| [#98](https://github.com/agenticdevops/aof/issues/98) | Jira Automation | v0.3.3 |
 
 ---
 
diff --git a/crates/aof-triggers/src/platforms/github.rs b/crates/aof-triggers/src/platforms/github.rs
index cedca79..0e21bb2 100644
--- a/crates/aof-triggers/src/platforms/github.rs
+++ b/crates/aof-triggers/src/platforms/github.rs
@@ -1401,7 +1401,7 @@ mod tests {
 
         let message = result.unwrap();
         assert_eq!(message.platform, "github");
-        assert_eq!(message.channel_id, "owner/repo");
+        assert_eq!(message.channel_id, "owner/repo#42"); // Includes PR number for comment posting
         assert!(message.text.contains("pr:opened"));
         assert_eq!(message.user.id, "456");
         assert_eq!(message.user.username, Some("testuser".to_string()));
diff --git a/docs/internal/improvement-plan.md b/docs/internal/improvement-plan.md
new file mode 100644
index 0000000..d7001fd
--- /dev/null
+++ b/docs/internal/improvement-plan.md
@@ -0,0 +1,154 @@
+# AOF Improvement Plan
+
+> Last Updated: 2026-01-23
+
+## Current State
+
+- **Version**: v0.3.2-beta
+- **Tests**: 139 passing
+- **Platforms**: Slack, Telegram, Discord, GitHub, Jira, WhatsApp, Bitbucket, GitLab
+- **Agents**: 30 pre-built in library
+- **Open Issues**: ~20
+
+## Strategic Priorities
+
+### Phase 1: Stabilization & Polish (v0.3.3)
+
+| Priority | Task | Issue | Status |
+|----------|------|-------|--------|
+| P0 | Close stale issues marked `[DONE]` | #82, #81, #80, #79, #78 | Pending |
+| P0 | Update ROADMAP.md (GitHub/Jira implemented) | - | Pending |
+| P1 | **Structured I/O Schemas** | #74, #75, #76 | In Progress |
+| P1 | **MCP Server Catalog** | #71 | In Progress |
+| P2 | Improve error messages with serde_path_to_error | - | Pending |
+
+### Phase 2: Enterprise Features (v0.4.0)
+
+| Priority | Task | Issue | Effort |
+|----------|------|-------|--------|
+| P1 | **Horizontal scaling** - Redis/NATS message queue | #47 | 1 week |
+| P1 | **Multi-org support** - per-org credentials | #46 | 3 days |
+| P2 | **ServiceNow trigger** - enterprise ITSM | #48 | 3 days |
+| P2 | **Config hot-reload** - no restart updates | #22 | 2 days |
+
+### Phase 3: Observability & Tools (v0.4.x)
+
+| Priority | Task | Issue | Effort |
+|----------|------|-------|--------|
+| P1 | **Loki tool enhancement** - better log queries | #49 | 2 days |
+| P2 | **Jaeger tool** - trace analysis | #50 | 2 days |
+| P2 | **Jenkins tool** - CI/CD integration | #55 | 2 days |
+| P3 | **NewRelic integration** | - | 3 days |
+
+### Phase 4: Agent Intelligence (v0.5.0)
+
+| Task | Description | Effort |
+|------|-------------|--------|
+| **Loop node** | Iterate over collections in flows | 2 days |
+| **State checkpointing** | Persist flow state for recovery | 3 days |
+| **AgentFleet v2** | Better multi-agent coordination | 1 week |
+| **Learning/feedback** | Agents learn from outcomes | 2 weeks |
+
+## Developer Experience Focus (Current Priority)
+
+### 1. Structured I/O Schemas
+
+**Goal**: Standardize agent inputs/outputs for better composability.
+
+**Design**:
+```yaml
+# Agent with structured output
+apiVersion: aof.dev/v1
+kind: Agent
+metadata:
+  name: pod-analyzer
+spec:
+  output_schema:
+    type: object
+    properties:
+      status:
+        type: string
+        enum: [healthy, degraded, critical]
+      issues:
+        type: array
+        items:
+          type: object
+          properties:
+            severity: { type: string }
+            message: { type: string }
+            recommendation: { type: string }
+```
+
+**Benefits**:
+- Type-safe flow connections
+- Better error handling
+- Auto-generated documentation
+- IDE autocomplete support
+
+### 2. MCP Server Catalog
+
+**Goal**: Document all available MCP servers and their capabilities.
+
+**Structure**:
+```
+docs/mcp-catalog/
+├── index.md           # Overview and quick reference
+├── kubernetes.md      # kubectl, helm, k9s
+├── observability.md   # prometheus, grafana, datadog
+├── cloud.md           # aws, gcp, azure
+├── databases.md       # postgres, redis, mongodb
+└── development.md     # git, github, filesystem
+```
+
+**Each entry includes**:
+- Installation instructions
+- Available tools
+- Example usage
+- Configuration options
+
+## Quick Wins Checklist
+
+- [ ] Close GitHub issues #78, #79, #80, #81, #82
+- [ ] Update ROADMAP.md with current status
+- [ ] Add `serde_path_to_error` to remaining YAML parsers
+- [ ] Add more real-world flow examples
+- [ ] Fix GitHub platform test (channel_id format)
+
+## Architecture Decisions
+
+### ADR-001: Structured I/O Schema Format
+
+**Decision**: Use JSON Schema for output definitions, embedded in YAML.
+
+**Rationale**:
+- Industry standard
+- Tool support (validation, generation)
+- Compatible with OpenAPI
+
+### ADR-002: MCP Catalog Organization
+
+**Decision**: Organize by domain (k8s, observability, cloud) not by server.
+
+**Rationale**:
+- Users think in terms of what they want to do
+- Easier to find relevant tools
+- Supports multiple servers per domain
+
+## Success Metrics
+
+| Metric | Current | Target |
+|--------|---------|--------|
+| Test coverage | ~60% | 80% |
+| Doc pages | 40+ | 60+ |
+| Example flows | 10 | 25 |
+| MCP servers documented | 5 | 15 |
+| GitHub stars | - | 500 |
+
+## Timeline
+
+```
+Week 1-2: Quick wins + Structured I/O design
+Week 3-4: Structured I/O implementation + MCP Catalog
+Week 5-6: Testing, documentation, v0.3.3 release
+Week 7+:  Enterprise features (Phase 2)
+```

From 8bd9d7e3fb13cc96908c4ac90bdc19bc1c4d6ff0 Mon Sep 17 00:00:00 2001
From: Gourav Shah <gs@initcron.org>
Date: Fri, 23 Jan 2026 14:29:21 +0530
Subject: [PATCH 12/14] docs: Expand MCP Server Catalog with Kubernetes,
 observability, and cloud servers

Added comprehensive documentation for:
- Kubernetes MCP server (kubectl, pods, logs, describe)
- Prometheus MCP server (PromQL, alerts, rules)
- Grafana MCP server (dashboards, annotations, data sources)
- Datadog MCP server (metrics, monitors, logs)
- AWS MCP server (EC2, S3, Lambda, CloudWatch)

Updated index with:
- Organized catalog by category
- Configuration patterns (agent-level, daemon-level)
- Security best practices
- Custom MCP server template
- Troubleshooting guide

Addresses #71 MCP Server Catalog

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/mcp-catalog/aws.md        | 384 +++++++++++++++++++++++++++++++++
 docs/mcp-catalog/datadog.md    | 343 +++++++++++++++++++++++++++++
 docs/mcp-catalog/grafana.md    | 333 ++++++++++++++++++++++++++++
 docs/mcp-catalog/index.md      | 214 ++++++++++++++----
 docs/mcp-catalog/kubernetes.md | 279 ++++++++++++++++++++++++
 docs/mcp-catalog/prometheus.md | 285 ++++++++++++++++++++++++
 6 files changed, 1797 insertions(+), 41 deletions(-)
 create mode 100644 docs/mcp-catalog/aws.md
 create mode 100644 docs/mcp-catalog/datadog.md
 create mode 100644 docs/mcp-catalog/grafana.md
 create mode 100644 docs/mcp-catalog/kubernetes.md
 create mode 100644 docs/mcp-catalog/prometheus.md

diff --git a/docs/mcp-catalog/aws.md b/docs/mcp-catalog/aws.md
new file mode 100644
index 0000000..54e8ae9
--- /dev/null
+++ b/docs/mcp-catalog/aws.md
@@ -0,0 +1,384 @@
+---
+sidebar_position: 13
+sidebar_label: AWS
+---
+
+# AWS MCP Server
+
+Interact with AWS services including EC2, S3, Lambda, CloudWatch, and more.
+
+## Installation
+
+```bash
+# Using npx
+npx -y @anthropic/mcp-server-aws
+
+# Or via npm
+npm install -g @anthropic/mcp-server-aws
+```
+
+## Configuration
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: aws-agent
+spec:
+  model: google:gemini-2.5-flash
+  mcp_servers:
+    - name: aws
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-aws"]
+      env:
+        AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
+        AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
+        AWS_REGION: us-east-1
+```
+
+### With IAM Role (EKS/EC2)
+
+```yaml
+mcp_servers:
+  - name: aws
+    command: npx
+    args: ["-y", "@anthropic/mcp-server-aws"]
+    env:
+      AWS_REGION: us-east-1
+      # Uses IAM role attached to pod/instance
+```
+
+### With Assumed Role
+
+```yaml
+mcp_servers:
+  - name: aws
+    command: npx
+    args: ["-y", "@anthropic/mcp-server-aws"]
+    env:
+      AWS_REGION: us-east-1
+      AWS_ROLE_ARN: arn:aws:iam::123456789:role/aof-agent-role
+```
+
+## Available Tools
+
+### EC2
+
+#### list_instances
+
+List EC2 instances with filters.
+
+```json
+{
+  "name": "list_instances",
+  "arguments": {
+    "filters": [
+      {"Name": "tag:Environment", "Values": ["production"]},
+      {"Name": "instance-state-name", "Values": ["running"]}
+    ],
+    "max_results": 50
+  }
+}
+```
+
+#### describe_instance
+
+Get detailed instance information.
+
+```json
+{
+  "name": "describe_instance",
+  "arguments": {
+    "instance_id": "i-1234567890abcdef0"
+  }
+}
+```
+
+#### get_instance_status
+
+Get instance status checks.
+
+```json
+{
+  "name": "get_instance_status",
+  "arguments": {
+    "instance_ids": ["i-1234567890abcdef0"]
+  }
+}
+```
+
+### S3
+
+#### list_buckets
+
+List S3 buckets.
+
+```json
+{
+  "name": "list_buckets",
+  "arguments": {}
+}
+```
+
+#### list_objects
+
+List objects in a bucket.
+
+```json
+{
+  "name": "list_objects",
+  "arguments": {
+    "bucket": "my-bucket",
+    "prefix": "logs/2024/",
+    "max_keys": 100
+  }
+}
+```
+
+#### get_object
+
+Get object content (text files only).
+
+```json
+{
+  "name": "get_object",
+  "arguments": {
+    "bucket": "my-bucket",
+    "key": "config/settings.json"
+  }
+}
+```
+
+### CloudWatch
+
+#### get_metrics
+
+Query CloudWatch metrics.
+
+```json
+{
+  "name": "get_metrics",
+  "arguments": {
+    "namespace": "AWS/EC2",
+    "metric_name": "CPUUtilization",
+    "dimensions": [
+      {"Name": "InstanceId", "Value": "i-1234567890abcdef0"}
+    ],
+    "start_time": "2024-01-15T00:00:00Z",
+    "end_time": "2024-01-15T12:00:00Z",
+    "period": 300,
+    "statistic": "Average"
+  }
+}
+```
+
+#### get_alarms
+
+List CloudWatch alarms.
+
+```json
+{
+  "name": "get_alarms",
+  "arguments": {
+    "state_value": "ALARM",
+    "alarm_name_prefix": "Production-"
+  }
+}
+```
+
+#### get_log_events
+
+Get CloudWatch log events.
+
+```json
+{
+  "name": "get_log_events",
+  "arguments": {
+    "log_group": "/aws/lambda/my-function",
+    "log_stream": "2024/01/15/[$LATEST]abc123",
+    "start_time": "2024-01-15T11:00:00Z",
+    "end_time": "2024-01-15T12:00:00Z",
+    "limit": 100
+  }
+}
+```
+
+### Lambda
+
+#### list_functions
+
+List Lambda functions.
+
+```json
+{
+  "name": "list_functions",
+  "arguments": {
+    "max_items": 50
+  }
+}
+```
+
+#### get_function
+
+Get function configuration.
+
+```json
+{
+  "name": "get_function",
+  "arguments": {
+    "function_name": "my-function"
+  }
+}
+```
+
+#### invoke_function
+
+Invoke a Lambda function.
+
+```json
+{
+  "name": "invoke_function",
+  "arguments": {
+    "function_name": "my-function",
+    "payload": {"key": "value"},
+    "invocation_type": "RequestResponse"
+  }
+}
+```
+
+### Cost Explorer
+
+#### get_cost_and_usage
+
+Get cost and usage data.
+
+```json
+{
+  "name": "get_cost_and_usage",
+  "arguments": {
+    "start": "2024-01-01",
+    "end": "2024-01-31",
+    "granularity": "DAILY",
+    "metrics": ["UnblendedCost"],
+    "group_by": [
+      {"Type": "DIMENSION", "Key": "SERVICE"}
+    ]
+  }
+}
+```
+
+## Use Cases
+
+### Cost Optimizer Agent
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: cost-optimizer
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Analyze AWS costs and recommend optimizations.
+
+    Check for:
+    - Unused/idle EC2 instances
+    - Unattached EBS volumes
+    - Old snapshots
+    - Right-sizing opportunities
+    - Reserved instance recommendations
+  mcp_servers:
+    - name: aws
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-aws"]
+      env:
+        AWS_REGION: us-east-1
+```
+
+### Infrastructure Monitor Agent
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: infra-monitor
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Monitor AWS infrastructure health.
+
+    When alerted:
+    1. Check CloudWatch alarms
+    2. Get relevant metrics
+    3. Check instance status
+    4. Review recent logs
+    5. Suggest remediation
+  mcp_servers:
+    - name: aws
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-aws"]
+      env:
+        AWS_REGION: us-east-1
+```
+
+## Security Considerations
+
+1. **IAM Policies**: Use least-privilege IAM policies
+2. **Role Assumption**: Prefer IAM roles over static credentials
+3. **Resource Tagging**: Restrict access by resource tags
+4. **Audit Logging**: Enable CloudTrail for API auditing
+
+### Example IAM Policy
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Action": [
+        "ec2:Describe*",
+        "cloudwatch:GetMetricData",
+        "cloudwatch:DescribeAlarms",
+        "logs:GetLogEvents",
+        "s3:GetObject",
+        "s3:ListBucket"
+      ],
+      "Resource": "*",
+      "Condition": {
+        "StringEquals": {
+          "aws:ResourceTag/Environment": "production"
+        }
+      }
+    }
+  ]
+}
+```
+
+## Troubleshooting
+
+### Authentication Issues
+
+```bash
+# Verify credentials
+aws sts get-caller-identity
+
+# Test specific permission
+aws ec2 describe-instances --dry-run
+```
+
+### Region Issues
+
+```bash
+# Check configured region
+aws configure get region
+
+# List available regions
+aws ec2 describe-regions --output table
+```
+
+## Related
+
+- [Cost Optimizer Agent](/docs/agent-library/cloud/cost-optimizer)
+- [Capacity Planner Agent](/docs/agent-library/cloud/capacity-planner)
+- [AWS Triggers](/docs/triggers/aws)
diff --git a/docs/mcp-catalog/datadog.md b/docs/mcp-catalog/datadog.md
new file mode 100644
index 0000000..950078e
--- /dev/null
+++ b/docs/mcp-catalog/datadog.md
@@ -0,0 +1,343 @@
+---
+sidebar_position: 14
+sidebar_label: Datadog
+---
+
+# Datadog MCP Server
+
+Query metrics, monitors, and events from Datadog for observability automation.
+
+## Installation
+
+```bash
+# Using npx
+npx -y @anthropic/mcp-server-datadog
+
+# Or via npm
+npm install -g @anthropic/mcp-server-datadog
+```
+
+## Configuration
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: datadog-agent
+spec:
+  model: google:gemini-2.5-flash
+  mcp_servers:
+    - name: datadog
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-datadog"]
+      env:
+        DD_API_KEY: ${DD_API_KEY}
+        DD_APP_KEY: ${DD_APP_KEY}
+        DD_SITE: datadoghq.com  # or datadoghq.eu, us3.datadoghq.com, etc.
+```
+
+## Available Tools
+
+### query_metrics
+
+Query timeseries metrics.
+
+```json
+{
+  "name": "query_metrics",
+  "arguments": {
+    "query": "avg:system.cpu.user{env:production} by {host}",
+    "from": 1705276800,
+    "to": 1705320000
+  }
+}
+```
+
+**Parameters**:
+- `query` (required): Datadog metrics query
+- `from` (required): Start timestamp (epoch seconds)
+- `to` (required): End timestamp (epoch seconds)
+
+### get_monitors
+
+List monitors with filters.
+
+```json
+{
+  "name": "get_monitors",
+  "arguments": {
+    "tags": ["env:production", "team:platform"],
+    "monitor_tags": ["service:api"],
+    "group_states": ["Alert", "Warn"]
+  }
+}
+```
+
+**Parameters**:
+- `tags` (optional): Filter by tags
+- `monitor_tags` (optional): Filter by monitor tags
+- `group_states` (optional): Filter by states (Alert, Warn, No Data, OK)
+
+### get_monitor
+
+Get specific monitor details.
+
+```json
+{
+  "name": "get_monitor",
+  "arguments": {
+    "monitor_id": 12345678
+  }
+}
+```
+
+### get_events
+
+Get events from event stream.
+
+```json
+{
+  "name": "get_events",
+  "arguments": {
+    "start": 1705276800,
+    "end": 1705320000,
+    "tags": ["env:production"],
+    "priority": "normal",
+    "sources": ["kubernetes", "cloudwatch"]
+  }
+}
+```
+
+**Parameters**:
+- `start` (required): Start timestamp
+- `end` (required): End timestamp
+- `tags` (optional): Filter by tags
+- `priority` (optional): Filter by priority (low, normal)
+- `sources` (optional): Filter by source
+
+### post_event
+
+Create an event.
+
+```json
+{
+  "name": "post_event",
+  "arguments": {
+    "title": "Deployment: api-v1.2.3",
+    "text": "Deployed new version of API service",
+    "tags": ["env:production", "service:api"],
+    "alert_type": "info",
+    "source_type_name": "aof"
+  }
+}
+```
+
+**Parameters**:
+- `title` (required): Event title
+- `text` (required): Event body (supports markdown)
+- `tags` (optional): Event tags
+- `alert_type` (optional): error, warning, info, success
+- `source_type_name` (optional): Source name
+
+### get_dashboards
+
+List dashboards.
+
+```json
+{
+  "name": "get_dashboards",
+  "arguments": {
+    "filter_shared": false,
+    "filter_deleted": false
+  }
+}
+```
+
+### get_hosts
+
+Get host information.
+
+```json
+{
+  "name": "get_hosts",
+  "arguments": {
+    "filter": "env:production",
+    "sort_field": "cpu",
+    "sort_dir": "desc",
+    "count": 100
+  }
+}
+```
+
+**Parameters**:
+- `filter` (optional): Tag filter string
+- `sort_field` (optional): Sort by field (cpu, iowait, load)
+- `sort_dir` (optional): Sort direction (asc, desc)
+- `count` (optional): Max results
+
+### search_logs
+
+Search log data.
+
+```json
+{
+  "name": "search_logs",
+  "arguments": {
+    "query": "service:api status:error",
+    "from": "now-1h",
+    "to": "now",
+    "limit": 100,
+    "sort": "desc"
+  }
+}
+```
+
+**Parameters**:
+- `query` (required): Log search query
+- `from` (required): Start time (relative or absolute)
+- `to` (required): End time
+- `limit` (optional): Max results
+- `sort` (optional): Sort direction
+
+## Common Query Patterns
+
+### Infrastructure Metrics
+
+```
+# CPU by host
+avg:system.cpu.user{env:production} by {host}
+
+# Memory usage
+avg:system.mem.used{*} / avg:system.mem.total{*} * 100
+
+# Disk usage
+max:system.disk.in_use{*} by {device,host}
+```
+
+### Application Metrics
+
+```
+# Request rate
+sum:trace.servlet.request.hits{env:production}.as_rate()
+
+# Error rate
+sum:trace.servlet.request.errors{env:production}.as_rate()
+  / sum:trace.servlet.request.hits{env:production}.as_rate() * 100
+
+# P99 latency
+p99:trace.servlet.request{env:production}
+```
+
+### Container Metrics
+
+```
+# Container CPU
+avg:docker.cpu.usage{*} by {container_name}
+
+# Kubernetes pod restarts
+sum:kubernetes.containers.restarts{*} by {pod_name}
+```
+
+## Use Cases
+
+### Datadog Monitor Agent
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: dd-monitor
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Monitor Datadog alerts and investigate issues.
+
+    When asked about an alert:
+    1. Get monitor details and history
+    2. Query related metrics
+    3. Search for correlated events
+    4. Check affected hosts
+    5. Provide root cause analysis
+  mcp_servers:
+    - name: datadog
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-datadog"]
+      env:
+        DD_API_KEY: ${DD_API_KEY}
+        DD_APP_KEY: ${DD_APP_KEY}
+```
+
+### Log Analyzer Agent
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: log-analyzer
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Analyze logs from Datadog to identify issues.
+
+    Focus on:
+    - Error patterns
+    - Anomalies
+    - Performance degradation
+    - Security events
+  mcp_servers:
+    - name: datadog
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-datadog"]
+      env:
+        DD_API_KEY: ${DD_API_KEY}
+        DD_APP_KEY: ${DD_APP_KEY}
+```
+
+## Security Considerations
+
+1. **API Keys**: Use application keys with limited scope
+2. **Scoped Access**: Create keys with specific permissions
+3. **Key Rotation**: Rotate application keys regularly
+4. **Audit Logs**: Monitor API key usage in Datadog
+
+### Key Permissions
+
+| Use Case | Required Permissions |
+|----------|---------------------|
+| Read metrics | `metrics_read` |
+| Read monitors | `monitors_read` |
+| Post events | `events_write` |
+| Read logs | `logs_read` |
+
+## Troubleshooting
+
+### Authentication Issues
+
+```bash
+# Test API key
+curl -X GET "https://api.datadoghq.com/api/v1/validate" \
+  -H "DD-API-KEY: ${DD_API_KEY}"
+
+# Test app key
+curl -X GET "https://api.datadoghq.com/api/v1/dashboard" \
+  -H "DD-API-KEY: ${DD_API_KEY}" \
+  -H "DD-APPLICATION-KEY: ${DD_APP_KEY}"
+```
+
+### Site Configuration
+
+Different Datadog sites require different endpoints:
+
+| Site | DD_SITE |
+|------|---------|
+| US1 | datadoghq.com |
+| US3 | us3.datadoghq.com |
+| US5 | us5.datadoghq.com |
+| EU | datadoghq.eu |
+| AP1 | ap1.datadoghq.com |
+
+## Related
+
+- [Prometheus MCP Server](./prometheus.md)
+- [Grafana MCP Server](./grafana.md)
+- [Alert Manager Agent](/docs/agent-library/observability/alert-manager)
diff --git a/docs/mcp-catalog/grafana.md b/docs/mcp-catalog/grafana.md
new file mode 100644
index 0000000..5cb5aba
--- /dev/null
+++ b/docs/mcp-catalog/grafana.md
@@ -0,0 +1,333 @@
+---
+sidebar_position: 12
+sidebar_label: Grafana
+---
+
+# Grafana MCP Server
+
+Interact with Grafana dashboards, alerts, and data sources for observability automation.
+
+## Installation
+
+```bash
+# Using npx
+npx -y @anthropic/mcp-server-grafana
+
+# Or via npm
+npm install -g @anthropic/mcp-server-grafana
+```
+
+## Configuration
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: grafana-agent
+spec:
+  model: google:gemini-2.5-flash
+  mcp_servers:
+    - name: grafana
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-grafana"]
+      env:
+        GRAFANA_URL: https://grafana.example.com
+        GRAFANA_API_KEY: ${GRAFANA_API_KEY}
+```
+
+### With Service Account Token
+
+```yaml
+mcp_servers:
+  - name: grafana
+    command: npx
+    args: ["-y", "@anthropic/mcp-server-grafana"]
+    env:
+      GRAFANA_URL: https://grafana.example.com
+      GRAFANA_SERVICE_ACCOUNT_TOKEN: ${GRAFANA_SA_TOKEN}
+```
+
+## Available Tools
+
+### search_dashboards
+
+Search for dashboards by query or tags.
+
+```json
+{
+  "name": "search_dashboards",
+  "arguments": {
+    "query": "kubernetes",
+    "tags": ["production", "k8s"],
+    "type": "dash-db"
+  }
+}
+```
+
+**Parameters**:
+- `query` (optional): Search query string
+- `tags` (optional): Filter by tags
+- `type` (optional): Type filter (dash-db, dash-folder)
+- `limit` (optional): Max results (default: 100)
+
+### get_dashboard
+
+Get dashboard by UID.
+
+```json
+{
+  "name": "get_dashboard",
+  "arguments": {
+    "uid": "k8s-cluster-overview"
+  }
+}
+```
+
+**Parameters**:
+- `uid` (required): Dashboard UID
+
+### get_dashboard_panels
+
+Get panel definitions from a dashboard.
+
+```json
+{
+  "name": "get_dashboard_panels",
+  "arguments": {
+    "uid": "k8s-cluster-overview"
+  }
+}
+```
+
+### query_data_source
+
+Query a Grafana data source directly.
+
+```json
+{
+  "name": "query_data_source",
+  "arguments": {
+    "datasource_uid": "prometheus",
+    "query": "up{job='kubernetes-pods'}",
+    "from": "now-1h",
+    "to": "now"
+  }
+}
+```
+
+**Parameters**:
+- `datasource_uid` (required): Data source UID
+- `query` (required): Query string (format depends on data source type)
+- `from` (optional): Start time (default: now-6h)
+- `to` (optional): End time (default: now)
+
+### get_alerts
+
+Get firing alerts from Grafana Alerting.
+
+```json
+{
+  "name": "get_alerts",
+  "arguments": {
+    "state": "firing",
+    "labels": {"severity": "critical"}
+  }
+}
+```
+
+**Parameters**:
+- `state` (optional): Alert state filter (firing, pending, normal)
+- `labels` (optional): Label filters
+
+### get_alert_rules
+
+List alert rules.
+
+```json
+{
+  "name": "get_alert_rules",
+  "arguments": {
+    "folder_uid": "production-alerts"
+  }
+}
+```
+
+**Parameters**:
+- `folder_uid` (optional): Filter by folder
+- `dashboard_uid` (optional): Filter by dashboard
+
+### create_annotation
+
+Create an annotation on a dashboard.
+
+```json
+{
+  "name": "create_annotation",
+  "arguments": {
+    "dashboard_uid": "k8s-overview",
+    "text": "Deployment: v1.2.3 rolled out",
+    "tags": ["deployment", "production"],
+    "time": 1705320000000
+  }
+}
+```
+
+**Parameters**:
+- `text` (required): Annotation text
+- `dashboard_uid` (optional): Dashboard to annotate
+- `panel_id` (optional): Specific panel
+- `tags` (optional): Annotation tags
+- `time` (optional): Timestamp (epoch ms, default: now)
+
+### get_annotations
+
+Get annotations for time range.
+
+```json
+{
+  "name": "get_annotations",
+  "arguments": {
+    "dashboard_uid": "k8s-overview",
+    "from": "now-24h",
+    "to": "now",
+    "tags": ["deployment"]
+  }
+}
+```
+
+### list_data_sources
+
+List configured data sources.
+
+```json
+{
+  "name": "list_data_sources",
+  "arguments": {}
+}
+```
+
+## Use Cases
+
+### Dashboard Navigator Agent
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: dashboard-navigator
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Help users find and understand Grafana dashboards.
+
+    When asked about metrics or dashboards:
+    1. Search for relevant dashboards
+    2. Explain what each dashboard shows
+    3. Provide direct links to dashboards
+    4. Query specific metrics if needed
+  mcp_servers:
+    - name: grafana
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-grafana"]
+      env:
+        GRAFANA_URL: ${GRAFANA_URL}
+        GRAFANA_API_KEY: ${GRAFANA_API_KEY}
+```
+
+### Alert Manager Agent
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: alert-manager
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Monitor and manage Grafana alerts.
+
+    Responsibilities:
+    - Check firing alerts regularly
+    - Correlate alerts with metrics
+    - Create annotations for incidents
+    - Provide runbook guidance
+  mcp_servers:
+    - name: grafana
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-grafana"]
+      env:
+        GRAFANA_URL: ${GRAFANA_URL}
+        GRAFANA_API_KEY: ${GRAFANA_API_KEY}
+```
+
+### Incident Annotator
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: AgentFlow
+metadata:
+  name: incident-annotator
+spec:
+  trigger:
+    type: PagerDuty
+    config:
+      events: [incident.triggered]
+  nodes:
+    - id: annotate
+      type: Agent
+      config:
+        agent: annotator
+        prompt: |
+          Create a Grafana annotation for this incident:
+          Title: {{trigger.incident.title}}
+          Service: {{trigger.incident.service.name}}
+
+          Tag with: incident, {{trigger.incident.urgency}}
+```
+
+## Security Considerations
+
+1. **API Key Scope**: Use minimal permissions (Viewer for read-only agents)
+2. **Service Accounts**: Prefer service accounts over user API keys
+3. **Folder Permissions**: Restrict access to sensitive dashboards
+4. **Audit Trail**: Grafana logs all API access
+
+### Permission Levels
+
+| Use Case | Required Permission |
+|----------|-------------------|
+| Read dashboards/alerts | Viewer |
+| Create annotations | Editor |
+| Modify alert rules | Admin |
+
+## Troubleshooting
+
+### Authentication Issues
+
+```bash
+# Test API key
+curl -H "Authorization: Bearer ${GRAFANA_API_KEY}" \
+  ${GRAFANA_URL}/api/org
+
+# Check key permissions
+curl -H "Authorization: Bearer ${GRAFANA_API_KEY}" \
+  ${GRAFANA_URL}/api/user/permissions
+```
+
+### Data Source Queries
+
+```bash
+# List data sources
+curl -H "Authorization: Bearer ${GRAFANA_API_KEY}" \
+  ${GRAFANA_URL}/api/datasources
+
+# Test data source
+curl -H "Authorization: Bearer ${GRAFANA_API_KEY}" \
+  ${GRAFANA_URL}/api/datasources/uid/prometheus/health
+```
+
+## Related
+
+- [Prometheus MCP Server](./prometheus.md)
+- [Alert Manager Agent](/docs/agent-library/observability/alert-manager)
+- [SLO Guardian Agent](/docs/agent-library/observability/slo-guardian)
diff --git a/docs/mcp-catalog/index.md b/docs/mcp-catalog/index.md
index 177bdf2..007900d 100644
--- a/docs/mcp-catalog/index.md
+++ b/docs/mcp-catalog/index.md
@@ -15,11 +15,7 @@ The [Model Context Protocol](https://modelcontextprotocol.io/) is an open standa
 - **Resources**: Data the agent can read (e.g., files, database schemas)
 - **Prompts**: Pre-defined prompt templates
 
-## Using MCP Servers with AOF
-
-### Configuration
-
-Add MCP servers to your agent or daemon configuration:
+## Quick Start
 
 ```yaml
 apiVersion: aof.sh/v1alpha1
@@ -39,81 +35,217 @@ spec:
         GITHUB_TOKEN: ${GITHUB_TOKEN}
 ```
 
-### Daemon-Level Configuration
+## Catalog by Category
 
-For shared MCP servers across all agents:
-
-```yaml
-# daemon.yaml
-mcp_servers:
-  - name: postgres
-    command: npx
-    args: ["-y", "@modelcontextprotocol/server-postgres"]
-    env:
-      DATABASE_URL: ${DATABASE_URL}
-```
+### Infrastructure & Kubernetes
 
-## Catalog Overview
+| Server | Description | Key Tools |
+|--------|-------------|-----------|
+| [Kubernetes](./kubernetes.md) | Query and manage K8s clusters | kubectl, get_pods, get_logs, describe_resource |
+| [AWS](./aws.md) | EC2, S3, Lambda, CloudWatch | list_instances, get_metrics, invoke_function |
 
-### Core Servers
+### Observability
 
 | Server | Description | Key Tools |
 |--------|-------------|-----------|
-| [Filesystem](./filesystem.md) | Read/write files on the local filesystem | read_file, write_file, list_directory, search_files |
-| [Fetch](./fetch.md) | Make HTTP requests and fetch web content | fetch (GET with auto markdown conversion) |
-| [Puppeteer](./puppeteer.md) | Browser automation for scraping and testing | navigate, screenshot, click, fill, evaluate |
+| [Prometheus](./prometheus.md) | PromQL queries and alerts | query, query_range, get_alerts |
+| [Grafana](./grafana.md) | Dashboards and annotations | search_dashboards, query_data_source, create_annotation |
+| [Datadog](./datadog.md) | Metrics, monitors, logs | query_metrics, get_monitors, search_logs |
 
-### Development
+### Development & Git
 
 | Server | Description | Key Tools |
 |--------|-------------|-----------|
-| [GitHub](./github.md) | GitHub repos, issues, PRs | create_issue, create_pull_request, get_file_contents, search_code |
-| [GitLab](./gitlab.md) | GitLab projects, MRs, CI/CD | create_issue, create_merge_request, get_file_contents |
+| [GitHub](./github.md) | Repos, issues, PRs | create_issue, create_pull_request, get_file_contents, search_code |
+| [GitLab](./gitlab.md) | Projects, MRs, CI/CD | create_issue, create_merge_request, get_file_contents |
+| [Filesystem](./filesystem.md) | Read/write local files | read_file, write_file, list_directory, search_files |
 
 ### Databases
 
 | Server | Description | Key Tools |
 |--------|-------------|-----------|
-| [PostgreSQL](./postgres.md) | Query PostgreSQL databases (read-only) | query |
-| [SQLite](./sqlite.md) | Query and modify SQLite databases | read_query, write_query, create_table, list_tables |
+| [PostgreSQL](./postgres.md) | Query PostgreSQL (read-only) | query |
+| [SQLite](./sqlite.md) | Query and modify SQLite | read_query, write_query, create_table, list_tables |
 
 ### Communication
 
 | Server | Description | Key Tools |
 |--------|-------------|-----------|
-| [Slack](./slack.md) | Send messages and interact with Slack | slack_post_message, slack_list_channels, slack_add_reaction |
+| [Slack](./slack.md) | Send messages, interact | slack_post_message, slack_list_channels, slack_add_reaction |
 
-### Search
+### Web & Search
 
 | Server | Description | Key Tools |
 |--------|-------------|-----------|
-| [Brave Search](./brave-search.md) | Web search using Brave Search API | brave_web_search, brave_local_search |
+| [Fetch](./fetch.md) | Make HTTP requests | fetch (GET with auto markdown conversion) |
+| [Puppeteer](./puppeteer.md) | Browser automation | navigate, screenshot, click, fill, evaluate |
+| [Brave Search](./brave-search.md) | Web search | brave_web_search, brave_local_search |
+
+## Configuration Patterns
+
+### Agent-Level Configuration
+
+Add MCP servers to individual agents:
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: k8s-debugger
+spec:
+  model: google:gemini-2.5-flash
+  mcp_servers:
+    - name: kubernetes
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-kubernetes"]
+    - name: prometheus
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-prometheus"]
+      env:
+        PROMETHEUS_URL: ${PROMETHEUS_URL}
+```
+
+### Daemon-Level Configuration
+
+Share MCP servers across all agents:
+
+```yaml
+# daemon.yaml
+spec:
+  mcp_servers:
+    - name: postgres
+      command: npx
+      args: ["-y", "@modelcontextprotocol/server-postgres"]
+      env:
+        DATABASE_URL: ${DATABASE_URL}
+    - name: github
+      command: npx
+      args: ["-y", "@modelcontextprotocol/server-github"]
+      env:
+        GITHUB_TOKEN: ${GITHUB_TOKEN}
+```
+
+### Environment Variables
+
+Always use environment variables for secrets:
+
+```yaml
+mcp_servers:
+  - name: aws
+    command: npx
+    args: ["-y", "@anthropic/mcp-server-aws"]
+    env:
+      AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
+      AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
+      AWS_REGION: us-east-1
+```
 
 ## Installation
 
-All official MCP servers can be installed via npx:
+All MCP servers can be installed via npx (no pre-installation needed):
 
 ```bash
-# No installation needed - npx downloads on first use
+# npx downloads and runs on first use
 npx -y @modelcontextprotocol/server-filesystem /path
 
-# Or install globally
+# Or install globally for faster startup
 npm install -g @modelcontextprotocol/server-filesystem
 ```
 
-## Security Considerations
+## Security Best Practices
+
+### 1. Credential Management
 
-1. **Credential Management**: Use environment variables for secrets
-2. **Scope Limitation**: Restrict filesystem access to specific directories
-3. **Network Access**: Use firewalls to limit puppeteer/fetch targets
-4. **Audit Logging**: AOF logs all MCP tool invocations
+- Use environment variables for all secrets
+- Rotate API keys regularly
+- Use service accounts where possible
+
+### 2. Scope Limitation
+
+- Restrict filesystem access to specific directories
+- Use read-only database connections when possible
+- Apply least-privilege IAM policies
+
+### 3. Network Security
+
+- Use firewalls to limit outbound connections
+- Restrict puppeteer/fetch to allowed domains
+- Use VPC endpoints for cloud services
+
+### 4. Audit Logging
+
+- AOF logs all MCP tool invocations
+- Enable cloud provider audit logs (CloudTrail, etc.)
+- Monitor for unusual access patterns
 
 ## Creating Custom MCP Servers
 
-See the [MCP Integration Guide](../tools/mcp-integration.md) for building custom servers.
+For custom integrations, see the [MCP Integration Guide](../guides/mcp-integration.md).
+
+### Basic Server Template
+
+```typescript
+import { Server } from "@modelcontextprotocol/sdk/server/index.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+
+const server = new Server(
+  { name: "my-server", version: "1.0.0" },
+  { capabilities: { tools: {} } }
+);
+
+server.setRequestHandler("tools/list", async () => ({
+  tools: [{
+    name: "my_tool",
+    description: "Does something useful",
+    inputSchema: {
+      type: "object",
+      properties: {
+        param: { type: "string", description: "Parameter" }
+      },
+      required: ["param"]
+    }
+  }]
+}));
+
+server.setRequestHandler("tools/call", async (request) => {
+  if (request.params.name === "my_tool") {
+    return { content: [{ type: "text", text: "Result" }] };
+  }
+  throw new Error("Unknown tool");
+});
+
+const transport = new StdioServerTransport();
+await server.connect(transport);
+```
+
+## Troubleshooting
+
+### MCP Server Not Starting
+
+```bash
+# Test server directly
+npx -y @modelcontextprotocol/server-filesystem /tmp
+
+# Check for errors
+DEBUG=* npx -y @modelcontextprotocol/server-github
+```
+
+### Tool Calls Failing
+
+1. Check environment variables are set
+2. Verify credentials have required permissions
+3. Check network connectivity
+4. Review AOF logs for detailed errors
+
+### Performance Issues
+
+- Use daemon-level MCP config to share server instances
+- Install servers globally to avoid npx download time
+- Use connection pooling for database servers
 
 ## Next Steps
 
-- [Filesystem Server](./filesystem.md) - File operations
+- [Kubernetes Server](./kubernetes.md) - K8s cluster management
 - [GitHub Server](./github.md) - Repository automation
-- [PostgreSQL Server](./postgres.md) - Database queries
+- [Prometheus Server](./prometheus.md) - Metrics queries
+- [AWS Server](./aws.md) - Cloud infrastructure
diff --git a/docs/mcp-catalog/kubernetes.md b/docs/mcp-catalog/kubernetes.md
new file mode 100644
index 0000000..f224488
--- /dev/null
+++ b/docs/mcp-catalog/kubernetes.md
@@ -0,0 +1,279 @@
+---
+sidebar_position: 10
+sidebar_label: Kubernetes
+---
+
+# Kubernetes MCP Server
+
+Query and manage Kubernetes clusters through kubectl and the Kubernetes API.
+
+## Installation
+
+```bash
+# Using npx (recommended)
+npx -y @anthropic/mcp-server-kubernetes
+
+# Or install globally
+npm install -g @anthropic/mcp-server-kubernetes
+```
+
+## Configuration
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: k8s-agent
+spec:
+  model: google:gemini-2.5-flash
+  mcp_servers:
+    - name: kubernetes
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-kubernetes"]
+      env:
+        KUBECONFIG: ${KUBECONFIG}  # Optional, uses default if not set
+```
+
+### Multi-Cluster Configuration
+
+```yaml
+mcp_servers:
+  - name: k8s-prod
+    command: npx
+    args: ["-y", "@anthropic/mcp-server-kubernetes"]
+    env:
+      KUBECONFIG: /path/to/prod-kubeconfig
+  - name: k8s-staging
+    command: npx
+    args: ["-y", "@anthropic/mcp-server-kubernetes"]
+    env:
+      KUBECONFIG: /path/to/staging-kubeconfig
+```
+
+## Available Tools
+
+### kubectl
+
+Execute any kubectl command with full output parsing.
+
+```json
+{
+  "name": "kubectl",
+  "arguments": {
+    "command": "get pods -n default -o json"
+  }
+}
+```
+
+**Parameters**:
+- `command` (required): kubectl command to execute (without `kubectl` prefix)
+
+**Examples**:
+```yaml
+# Get all pods in a namespace
+command: "get pods -n production"
+
+# Describe a deployment
+command: "describe deployment/nginx -n default"
+
+# Get events sorted by time
+command: "get events --sort-by='.lastTimestamp'"
+
+# Apply a manifest
+command: "apply -f /path/to/manifest.yaml"
+
+# Scale a deployment
+command: "scale deployment/web --replicas=3"
+```
+
+### get_namespaces
+
+List all namespaces in the cluster.
+
+```json
+{
+  "name": "get_namespaces",
+  "arguments": {}
+}
+```
+
+### get_pods
+
+List pods with filtering options.
+
+```json
+{
+  "name": "get_pods",
+  "arguments": {
+    "namespace": "default",
+    "label_selector": "app=nginx",
+    "field_selector": "status.phase=Running"
+  }
+}
+```
+
+**Parameters**:
+- `namespace` (optional): Namespace to query (default: all namespaces)
+- `label_selector` (optional): Label selector (e.g., `app=nginx,env=prod`)
+- `field_selector` (optional): Field selector (e.g., `status.phase=Running`)
+
+### get_logs
+
+Get logs from a pod or container.
+
+```json
+{
+  "name": "get_logs",
+  "arguments": {
+    "pod": "nginx-abc123",
+    "namespace": "default",
+    "container": "nginx",
+    "tail": 100,
+    "since": "1h"
+  }
+}
+```
+
+**Parameters**:
+- `pod` (required): Pod name
+- `namespace` (optional): Namespace (default: default)
+- `container` (optional): Container name (for multi-container pods)
+- `tail` (optional): Number of lines from end
+- `since` (optional): Duration (e.g., `1h`, `30m`, `2h30m`)
+- `previous` (optional): Get logs from previous container instance
+
+### describe_resource
+
+Get detailed information about a resource.
+
+```json
+{
+  "name": "describe_resource",
+  "arguments": {
+    "kind": "deployment",
+    "name": "nginx",
+    "namespace": "default"
+  }
+}
+```
+
+**Parameters**:
+- `kind` (required): Resource kind (pod, deployment, service, etc.)
+- `name` (required): Resource name
+- `namespace` (optional): Namespace
+
+### get_events
+
+Get events for troubleshooting.
+
+```json
+{
+  "name": "get_events",
+  "arguments": {
+    "namespace": "default",
+    "involved_object": "pod/nginx-abc123",
+    "types": ["Warning"]
+  }
+}
+```
+
+**Parameters**:
+- `namespace` (optional): Namespace filter
+- `involved_object` (optional): Filter by involved object
+- `types` (optional): Event types (`Normal`, `Warning`)
+
+## Use Cases
+
+### Pod Debugging Agent
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: pod-debugger
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    You are a Kubernetes pod debugging specialist.
+
+    When asked about pod issues:
+    1. Get pod status and events
+    2. Check logs for errors
+    3. Describe the pod for configuration issues
+    4. Suggest remediation steps
+  mcp_servers:
+    - name: kubernetes
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-kubernetes"]
+```
+
+### Deployment Status Checker
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: deploy-checker
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Check deployment status and report:
+    - Replica status
+    - Pod health
+    - Recent events
+    - Resource usage
+  mcp_servers:
+    - name: kubernetes
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-kubernetes"]
+```
+
+## Security Considerations
+
+1. **RBAC**: Use service accounts with minimal required permissions
+2. **Namespace Isolation**: Restrict agents to specific namespaces
+3. **Audit Logging**: Enable Kubernetes audit logs for MCP actions
+4. **Read-Only Mode**: Use read-only service accounts for monitoring agents
+
+### Example RBAC
+
+```yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: aof-agent-readonly
+  namespace: production
+rules:
+  - apiGroups: [""]
+    resources: ["pods", "pods/log", "services", "events"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["apps"]
+    resources: ["deployments", "replicasets"]
+    verbs: ["get", "list", "watch"]
+```
+
+## Troubleshooting
+
+### Connection Issues
+
+```bash
+# Verify kubeconfig
+kubectl config current-context
+kubectl cluster-info
+
+# Test MCP server directly
+npx -y @anthropic/mcp-server-kubernetes
+```
+
+### Permission Errors
+
+Check service account permissions:
+```bash
+kubectl auth can-i get pods --as=system:serviceaccount:default:aof-agent
+```
+
+## Related
+
+- [Pod Debugger Agent](/docs/agent-library/kubernetes/pod-debugger)
+- [Deployment Guardian Agent](/docs/agent-library/kubernetes/deploy-guardian)
+- [Kubernetes Triggers](/docs/triggers/kubernetes)
diff --git a/docs/mcp-catalog/prometheus.md b/docs/mcp-catalog/prometheus.md
new file mode 100644
index 0000000..952894d
--- /dev/null
+++ b/docs/mcp-catalog/prometheus.md
@@ -0,0 +1,285 @@
+---
+sidebar_position: 11
+sidebar_label: Prometheus
+---
+
+# Prometheus MCP Server
+
+Query Prometheus metrics and alerts for observability automation.
+
+## Installation
+
+```bash
+# Using npx
+npx -y @anthropic/mcp-server-prometheus
+
+# Or via npm
+npm install -g @anthropic/mcp-server-prometheus
+```
+
+## Configuration
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: metrics-agent
+spec:
+  model: google:gemini-2.5-flash
+  mcp_servers:
+    - name: prometheus
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-prometheus"]
+      env:
+        PROMETHEUS_URL: http://prometheus:9090
+        # Optional: For authentication
+        PROMETHEUS_USERNAME: ${PROM_USER}
+        PROMETHEUS_PASSWORD: ${PROM_PASS}
+```
+
+### With Thanos/Cortex
+
+```yaml
+mcp_servers:
+  - name: prometheus
+    command: npx
+    args: ["-y", "@anthropic/mcp-server-prometheus"]
+    env:
+      PROMETHEUS_URL: http://thanos-query:9090
+      PROMETHEUS_TIMEOUT: "60s"  # Longer timeout for Thanos
+```
+
+## Available Tools
+
+### query
+
+Execute instant PromQL queries.
+
+```json
+{
+  "name": "query",
+  "arguments": {
+    "query": "up{job='kubernetes-pods'}",
+    "time": "2024-01-15T12:00:00Z"
+  }
+}
+```
+
+**Parameters**:
+- `query` (required): PromQL query string
+- `time` (optional): Evaluation timestamp (default: now)
+
+### query_range
+
+Execute range queries for time series data.
+
+```json
+{
+  "name": "query_range",
+  "arguments": {
+    "query": "rate(http_requests_total[5m])",
+    "start": "2024-01-15T11:00:00Z",
+    "end": "2024-01-15T12:00:00Z",
+    "step": "1m"
+  }
+}
+```
+
+**Parameters**:
+- `query` (required): PromQL query string
+- `start` (required): Start timestamp
+- `end` (required): End timestamp
+- `step` (optional): Query resolution step (default: 15s)
+
+### get_alerts
+
+Get current alerts from Prometheus.
+
+```json
+{
+  "name": "get_alerts",
+  "arguments": {
+    "state": "firing",
+    "labels": {"severity": "critical"}
+  }
+}
+```
+
+**Parameters**:
+- `state` (optional): Filter by state (firing, pending, inactive)
+- `labels` (optional): Filter by labels
+
+### get_rules
+
+List alerting and recording rules.
+
+```json
+{
+  "name": "get_rules",
+  "arguments": {
+    "type": "alert"
+  }
+}
+```
+
+**Parameters**:
+- `type` (optional): Rule type (alert, record)
+
+### get_targets
+
+Get scrape target status.
+
+```json
+{
+  "name": "get_targets",
+  "arguments": {
+    "state": "active"
+  }
+}
+```
+
+**Parameters**:
+- `state` (optional): Filter by state (active, dropped, any)
+
+### get_labels
+
+Get all label names or values.
+
+```json
+{
+  "name": "get_labels",
+  "arguments": {
+    "label": "job"
+  }
+}
+```
+
+**Parameters**:
+- `label` (optional): Get values for specific label
+
+## Common PromQL Patterns
+
+### Resource Usage
+
+```promql
+# CPU usage by pod
+sum(rate(container_cpu_usage_seconds_total{namespace="production"}[5m])) by (pod)
+
+# Memory usage percentage
+100 * sum(container_memory_usage_bytes{namespace="production"})
+  / sum(machine_memory_bytes)
+
+# Disk usage
+100 - (node_filesystem_avail_bytes / node_filesystem_size_bytes * 100)
+```
+
+### Request Metrics
+
+```promql
+# Request rate
+sum(rate(http_requests_total[5m])) by (service)
+
+# Error rate
+sum(rate(http_requests_total{status=~"5.."}[5m]))
+  / sum(rate(http_requests_total[5m])) * 100
+
+# P99 latency
+histogram_quantile(0.99,
+  sum(rate(http_request_duration_seconds_bucket[5m])) by (le, service))
+```
+
+### SLO Calculations
+
+```promql
+# Availability (uptime)
+avg_over_time(up{job="api"}[30d]) * 100
+
+# Error budget consumed
+1 - (
+  sum(rate(http_requests_total{status!~"5.."}[30d]))
+  / sum(rate(http_requests_total[30d]))
+) / 0.001  # 99.9% SLO
+```
+
+## Use Cases
+
+### SLO Guardian Agent
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: slo-guardian
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Monitor SLO compliance using Prometheus.
+
+    Track these SLIs:
+    - Availability: 99.9% uptime
+    - Latency: P99 < 200ms
+    - Error rate: < 0.1%
+
+    Report on error budget consumption and burn rate.
+  mcp_servers:
+    - name: prometheus
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-prometheus"]
+      env:
+        PROMETHEUS_URL: ${PROMETHEUS_URL}
+```
+
+### Alert Investigator Agent
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: alert-investigator
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    When investigating alerts:
+    1. Query current alert status
+    2. Check related metrics history
+    3. Identify anomalies and patterns
+    4. Correlate with recent deployments
+    5. Suggest remediation steps
+  mcp_servers:
+    - name: prometheus
+      command: npx
+      args: ["-y", "@anthropic/mcp-server-prometheus"]
+      env:
+        PROMETHEUS_URL: ${PROMETHEUS_URL}
+```
+
+## Security Considerations
+
+1. **Authentication**: Use basic auth or bearer tokens for secured Prometheus
+2. **Query Limits**: Set timeout and max query length
+3. **Read-Only**: MCP server only supports read operations
+4. **Network**: Restrict access to internal Prometheus instances
+
+## Troubleshooting
+
+### Connection Issues
+
+```bash
+# Test Prometheus connectivity
+curl ${PROMETHEUS_URL}/api/v1/status/config
+
+# Verify MCP server
+PROMETHEUS_URL=http://localhost:9090 npx -y @anthropic/mcp-server-prometheus
+```
+
+### Query Performance
+
+- Use `step` parameter for range queries
+- Limit time ranges for historical queries
+- Use recording rules for complex queries
+
+## Related
+
+- [Grafana MCP Server](./grafana.md)
+- [SLO Guardian Agent](/docs/agent-library/observability/slo-guardian)
+- [Metrics Explorer Agent](/docs/agent-library/observability/metrics-explorer)

From ff276f3e80887c44b1571dc4c0b384339a3c5b28 Mon Sep 17 00:00:00 2001
From: Gourav Shah <gs@initcron.org>
Date: Fri, 23 Jan 2026 14:44:06 +0530
Subject: [PATCH 13/14] feat: Add Structured I/O (Output Schemas) support

Implements structured output validation for agents using JSON Schema:
- Add OutputSchemaSpec type for YAML config parsing
- Add output_schema field to AgentConfig, AgentSpec, FlatAgentConfig
- Add conversion from OutputSchemaSpec to schema::OutputSchema for runtime
- Update all AgentConfig initializations across crates
- Add comprehensive documentation in docs/reference/structured-io.md

Features:
- JSON Schema-based output validation (type, properties, required, enum)
- Validation modes: strict (default), lenient, coerce
- Error handling: fail (default), retry, passthrough
- Pre-built schemas in schema::schemas module

Closes #74, #75, #76

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 crates/aof-core/src/agent.rs                  | 144 ++++++
 crates/aof-core/src/lib.rs                    |   5 +-
 crates/aof-core/src/registry.rs               |   1 +
 .../src/executor/agent_executor.rs            |   2 +
 .../src/executor/agentflow_executor.rs        |   1 +
 crates/aof-runtime/src/executor/runtime.rs    |   2 +
 crates/aof-runtime/src/fleet/mod.rs           |   1 +
 crates/aof-runtime/tests/executor_tests.rs    |  20 +-
 crates/aof-triggers/src/handler/mod.rs        |   1 +
 docs/reference/structured-io.md               | 456 ++++++++++++++++++
 10 files changed, 627 insertions(+), 6 deletions(-)
 create mode 100644 docs/reference/structured-io.md

diff --git a/crates/aof-core/src/agent.rs b/crates/aof-core/src/agent.rs
index 1228dae..d8364d5 100644
--- a/crates/aof-core/src/agent.rs
+++ b/crates/aof-core/src/agent.rs
@@ -7,6 +7,141 @@ use std::sync::Arc;
 use crate::mcp::McpServerConfig;
 use crate::AofResult;
 
+/// Output schema specification using JSON Schema format
+/// Enables structured, validated agent responses
+///
+/// This is the YAML-friendly version for config files. It gets converted
+/// to `crate::schema::OutputSchema` for runtime use.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OutputSchemaSpec {
+    /// JSON Schema type (object, array, string, number, boolean)
+    #[serde(rename = "type")]
+    pub schema_type: String,
+
+    /// Properties for object type
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub properties: Option<HashMap<String, serde_json::Value>>,
+
+    /// Required properties for object type
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub required: Option<Vec<String>>,
+
+    /// Items schema for array type
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub items: Option<Box<serde_json::Value>>,
+
+    /// Enum values for string type
+    #[serde(rename = "enum", skip_serializing_if = "Option::is_none")]
+    pub enum_values: Option<Vec<String>>,
+
+    /// Description of the schema
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+
+    /// Allow additional properties (default: false for strict validation)
+    #[serde(default, rename = "additionalProperties")]
+    pub additional_properties: Option<bool>,
+
+    /// Validation mode: strict (default), lenient, coerce
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub validation_mode: Option<String>,
+
+    /// Behavior on validation error: fail (default), retry, passthrough
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub on_validation_error: Option<String>,
+
+    /// Max retries if on_validation_error is "retry"
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub max_retries: Option<u32>,
+
+    /// Additional JSON Schema properties (oneOf, anyOf, etc.)
+    #[serde(flatten)]
+    pub extra: HashMap<String, serde_json::Value>,
+}
+
+impl OutputSchemaSpec {
+    /// Convert to JSON Schema Value for validation
+    pub fn to_json_schema(&self) -> serde_json::Value {
+        let mut schema = serde_json::json!({
+            "type": self.schema_type
+        });
+
+        if let Some(props) = &self.properties {
+            schema["properties"] = serde_json::json!(props);
+        }
+
+        if let Some(req) = &self.required {
+            schema["required"] = serde_json::json!(req);
+        }
+
+        if let Some(items) = &self.items {
+            schema["items"] = serde_json::json!(items);
+        }
+
+        if let Some(enum_vals) = &self.enum_values {
+            schema["enum"] = serde_json::json!(enum_vals);
+        }
+
+        if let Some(desc) = &self.description {
+            schema["description"] = serde_json::json!(desc);
+        }
+
+        if let Some(additional) = &self.additional_properties {
+            schema["additionalProperties"] = serde_json::json!(additional);
+        }
+
+        // Merge extra fields (oneOf, anyOf, etc.)
+        if let serde_json::Value::Object(ref mut map) = schema {
+            for (key, value) in &self.extra {
+                map.insert(key.clone(), value.clone());
+            }
+        }
+
+        schema
+    }
+
+    /// Get validation mode (defaults to "strict")
+    pub fn get_validation_mode(&self) -> &str {
+        self.validation_mode.as_deref().unwrap_or("strict")
+    }
+
+    /// Get error handling behavior (defaults to "fail")
+    pub fn get_error_behavior(&self) -> &str {
+        self.on_validation_error.as_deref().unwrap_or("fail")
+    }
+
+    /// Generate schema instructions for the LLM
+    pub fn to_instructions(&self) -> String {
+        let schema = self.to_json_schema();
+        format!(
+            "You MUST respond with valid JSON matching this schema:\n```json\n{}\n```\nDo not include any text outside the JSON object.",
+            serde_json::to_string_pretty(&schema).unwrap_or_default()
+        )
+    }
+}
+
+/// Convert YAML-friendly OutputSchemaSpec to runtime OutputSchema
+impl From<OutputSchemaSpec> for crate::schema::OutputSchema {
+    fn from(spec: OutputSchemaSpec) -> Self {
+        // Get validation mode before moving spec
+        let strict = spec.get_validation_mode() == "strict";
+        let description = spec.description.clone();
+
+        let schema = spec.to_json_schema();
+        let mut output = crate::schema::OutputSchema::from_json_schema(schema);
+
+        // Transfer description if present
+        if let Some(desc) = description {
+            output = output.with_description(desc);
+        }
+
+        // Set strict mode based on validation_mode
+        output = output.with_strict(strict);
+
+        output
+    }
+}
+
 /// Memory specification - unified way to configure memory backends
 ///
 /// Supports multiple formats:
@@ -526,6 +661,11 @@ pub struct AgentConfig {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub max_tokens: Option<usize>,
 
+    /// Output schema for structured responses (JSON Schema format)
+    /// When specified, agent responses will be validated against this schema
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output_schema: Option<OutputSchemaSpec>,
+
     /// Custom configuration
     #[serde(flatten)]
     pub extra: HashMap<String, serde_json::Value>,
@@ -768,6 +908,7 @@ struct AgentSpec {
     #[serde(default = "default_temperature")]
     temperature: f32,
     max_tokens: Option<usize>,
+    output_schema: Option<OutputSchemaSpec>,
     #[serde(flatten)]
     extra: HashMap<String, serde_json::Value>,
 }
@@ -791,6 +932,7 @@ struct FlatAgentConfig {
     #[serde(default = "default_temperature")]
     temperature: f32,
     max_tokens: Option<usize>,
+    output_schema: Option<OutputSchemaSpec>,
     #[serde(flatten)]
     extra: HashMap<String, serde_json::Value>,
 }
@@ -810,6 +952,7 @@ impl From<AgentConfigInput> for AgentConfig {
                 max_iterations: flat.max_iterations,
                 temperature: flat.temperature,
                 max_tokens: flat.max_tokens,
+                output_schema: flat.output_schema,
                 extra: flat.extra,
             },
             AgentConfigInput::Kubernetes(k8s) => {
@@ -825,6 +968,7 @@ impl From<AgentConfigInput> for AgentConfig {
                     max_iterations: k8s.spec.max_iterations,
                     temperature: k8s.spec.temperature,
                     max_tokens: k8s.spec.max_tokens,
+                    output_schema: k8s.spec.output_schema,
                     extra: k8s.spec.extra,
                 }
             }
diff --git a/crates/aof-core/src/lib.rs b/crates/aof-core/src/lib.rs
index af654bc..862653a 100644
--- a/crates/aof-core/src/lib.rs
+++ b/crates/aof-core/src/lib.rs
@@ -22,8 +22,9 @@ pub mod workflow;
 // Re-export core types
 pub use agent::{
     Agent, AgentConfig, AgentContext, AgentMetadata, ExecutionMetadata, HttpToolConfig,
-    MemorySpec, Message, MessageRole, QualifiedToolSpec, ShellToolConfig, StructuredMemoryConfig,
-    ToolResult as AgentToolResult, ToolSource, ToolSpec, TypeBasedToolSpec, TypeBasedToolType,
+    MemorySpec, Message, MessageRole, OutputSchemaSpec, QualifiedToolSpec, ShellToolConfig,
+    StructuredMemoryConfig, ToolResult as AgentToolResult, ToolSource, ToolSpec, TypeBasedToolSpec,
+    TypeBasedToolType,
 };
 pub use error::{AofError, AofResult};
 pub use error_tracker::{ErrorKnowledgeBase, ErrorRecord, ErrorStats};
diff --git a/crates/aof-core/src/registry.rs b/crates/aof-core/src/registry.rs
index 60231cb..18aed0a 100644
--- a/crates/aof-core/src/registry.rs
+++ b/crates/aof-core/src/registry.rs
@@ -711,6 +711,7 @@ mod tests {
             max_iterations: 10,
             temperature: 0.7,
             max_tokens: None,
+            output_schema: None,
             extra: HashMap::new(),
         };
 
diff --git a/crates/aof-runtime/src/executor/agent_executor.rs b/crates/aof-runtime/src/executor/agent_executor.rs
index 1b77915..83c48f1 100644
--- a/crates/aof-runtime/src/executor/agent_executor.rs
+++ b/crates/aof-runtime/src/executor/agent_executor.rs
@@ -1549,6 +1549,7 @@ mod tests {
             max_iterations: 10,
             temperature: 0.7,
             max_tokens: Some(1000),
+            output_schema: None,
             extra: HashMap::new(),
         };
 
@@ -1587,6 +1588,7 @@ mod tests {
             max_iterations: 2,
             temperature: 0.7,
             max_tokens: None,
+            output_schema: None,
             extra: HashMap::new(),
         };
 
diff --git a/crates/aof-runtime/src/executor/agentflow_executor.rs b/crates/aof-runtime/src/executor/agentflow_executor.rs
index 9d1dddc..afb92d4 100644
--- a/crates/aof-runtime/src/executor/agentflow_executor.rs
+++ b/crates/aof-runtime/src/executor/agentflow_executor.rs
@@ -430,6 +430,7 @@ impl AgentFlowExecutor {
             max_iterations: 10,
             temperature: inline.temperature.unwrap_or(0.7),
             max_tokens: inline.max_tokens,
+            output_schema: None,
             extra: std::collections::HashMap::new(),
         };
 
diff --git a/crates/aof-runtime/src/executor/runtime.rs b/crates/aof-runtime/src/executor/runtime.rs
index 2d3999f..d79628d 100644
--- a/crates/aof-runtime/src/executor/runtime.rs
+++ b/crates/aof-runtime/src/executor/runtime.rs
@@ -1342,6 +1342,7 @@ mod tests {
             max_iterations: 10,
             temperature: 0.7,
             max_tokens: None,
+            output_schema: None,
             extra: HashMap::new(),
         };
 
@@ -1367,6 +1368,7 @@ mod tests {
             max_iterations: 10,
             temperature: 0.7,
             max_tokens: None,
+            output_schema: None,
             extra: HashMap::new(),
         };
 
diff --git a/crates/aof-runtime/src/fleet/mod.rs b/crates/aof-runtime/src/fleet/mod.rs
index ae63431..210ca33 100644
--- a/crates/aof-runtime/src/fleet/mod.rs
+++ b/crates/aof-runtime/src/fleet/mod.rs
@@ -275,6 +275,7 @@ impl FleetCoordinator {
                 max_iterations: spec.max_iterations.map(|v| v as usize).unwrap_or(10),
                 temperature: spec.temperature.unwrap_or(0.7),
                 max_tokens: None,
+                output_schema: None,
                 extra: std::collections::HashMap::new(),
             })
         } else {
diff --git a/crates/aof-runtime/tests/executor_tests.rs b/crates/aof-runtime/tests/executor_tests.rs
index 05acce0..87806f6 100644
--- a/crates/aof-runtime/tests/executor_tests.rs
+++ b/crates/aof-runtime/tests/executor_tests.rs
@@ -3,7 +3,7 @@
 use aof_core::{
     AgentConfig, AgentContext, AofResult, Model, ModelConfig, ModelProvider, ModelRequest,
     ModelResponse, RequestMessage, StopReason, StreamChunk, ToolCall, ToolDefinition,
-    ToolExecutor, ToolInput, ToolResult, Usage,
+    ToolExecutor, ToolInput, ToolResult, ToolSpec, Usage,
 };
 use aof_runtime::executor::AgentExecutor;
 use async_trait::async_trait;
@@ -120,9 +120,11 @@ async fn test_executor_simple_execution() {
         tools: vec![],
         mcp_servers: vec![],
         memory: None,
+        max_context_messages: 10,
         max_iterations: 10,
         temperature: 0.7,
         max_tokens: Some(1000),
+        output_schema: None,
         extra: HashMap::new(),
     };
 
@@ -155,12 +157,14 @@ async fn test_executor_with_tool_calls() {
         system_prompt: None,
         model: "test-model".to_string(),
         provider: None,
-        tools: vec!["test_tool".to_string()],
+        tools: vec![ToolSpec::Simple("test_tool".to_string())],
         mcp_servers: vec![],
         memory: None,
+        max_context_messages: 10,
         max_iterations: 10,
         temperature: 0.7,
         max_tokens: None,
+        output_schema: None,
         extra: HashMap::new(),
     };
 
@@ -213,12 +217,14 @@ async fn test_executor_max_iterations() {
         system_prompt: None,
         model: "test-model".to_string(),
         provider: None,
-        tools: vec!["test_tool".to_string()],
+        tools: vec![ToolSpec::Simple("test_tool".to_string())],
         mcp_servers: vec![],
         memory: None,
+        max_context_messages: 10,
         max_iterations: 2,
         temperature: 0.7,
         max_tokens: None,
+        output_schema: None,
         extra: HashMap::new(),
     };
 
@@ -278,12 +284,14 @@ async fn test_executor_tool_failure() {
         system_prompt: None,
         model: "test-model".to_string(),
         provider: None,
-        tools: vec!["test_tool".to_string()],
+        tools: vec![ToolSpec::Simple("test_tool".to_string())],
         mcp_servers: vec![],
         memory: None,
+        max_context_messages: 10,
         max_iterations: 10,
         temperature: 0.7,
         max_tokens: None,
+        output_schema: None,
         extra: HashMap::new(),
     };
 
@@ -338,9 +346,11 @@ async fn test_executor_stop_reasons() {
             tools: vec![],
             mcp_servers: vec![],
             memory: None,
+            max_context_messages: 10,
             max_iterations: 10,
             temperature: 0.7,
             max_tokens: None,
+            output_schema: None,
             extra: HashMap::new(),
         };
 
@@ -372,9 +382,11 @@ async fn test_executor_content_filter() {
         tools: vec![],
         mcp_servers: vec![],
         memory: None,
+        max_context_messages: 10,
         max_iterations: 10,
         temperature: 0.7,
         max_tokens: None,
+        output_schema: None,
         extra: HashMap::new(),
     };
 
diff --git a/crates/aof-triggers/src/handler/mod.rs b/crates/aof-triggers/src/handler/mod.rs
index a0b23bc..b77edb8 100644
--- a/crates/aof-triggers/src/handler/mod.rs
+++ b/crates/aof-triggers/src/handler/mod.rs
@@ -2289,6 +2289,7 @@ impl TriggerHandler {
             max_iterations: 5,
             temperature: 0.7,
             max_tokens: Some(2000),
+            output_schema: None,
             extra: std::collections::HashMap::new(),
         };
 
diff --git a/docs/reference/structured-io.md b/docs/reference/structured-io.md
new file mode 100644
index 0000000..d241714
--- /dev/null
+++ b/docs/reference/structured-io.md
@@ -0,0 +1,456 @@
+---
+sidebar_position: 15
+sidebar_label: Structured I/O
+---
+
+# Structured I/O (Output Schemas)
+
+Structured I/O allows you to define expected output formats for agents, enabling type-safe workflows and better composability.
+
+## Overview
+
+By default, agents return free-form text responses. With Structured I/O, you can:
+
+- Define expected output structure using JSON Schema
+- Get validated, parseable responses
+- Chain agents with type-safe data flow
+- Auto-generate documentation from schemas
+
+## Basic Usage
+
+### Defining Output Schema
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: pod-analyzer
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Analyze Kubernetes pods and report their status.
+    Always respond in the specified JSON format.
+  output_schema:
+    type: object
+    properties:
+      status:
+        type: string
+        enum: [healthy, degraded, critical, unknown]
+        description: Overall pod health status
+      pods_checked:
+        type: integer
+        description: Number of pods analyzed
+      issues:
+        type: array
+        items:
+          type: object
+          properties:
+            pod_name:
+              type: string
+            namespace:
+              type: string
+            severity:
+              type: string
+              enum: [low, medium, high, critical]
+            message:
+              type: string
+            recommendation:
+              type: string
+          required: [pod_name, severity, message]
+      summary:
+        type: string
+        description: Human-readable summary
+    required: [status, pods_checked, issues, summary]
+```
+
+### Agent Response
+
+When an agent has an `output_schema`, its response will be validated JSON:
+
+```json
+{
+  "status": "degraded",
+  "pods_checked": 12,
+  "issues": [
+    {
+      "pod_name": "api-server-abc123",
+      "namespace": "production",
+      "severity": "high",
+      "message": "Container restarted 5 times in the last hour",
+      "recommendation": "Check application logs for OOM or crash errors"
+    }
+  ],
+  "summary": "12 pods checked, 1 high-severity issue found in production namespace"
+}
+```
+
+## Schema Types
+
+### Simple Types
+
+```yaml
+output_schema:
+  type: string
+  description: A simple text response
+```
+
+```yaml
+output_schema:
+  type: number
+  minimum: 0
+  maximum: 100
+  description: A percentage value
+```
+
+```yaml
+output_schema:
+  type: boolean
+  description: Success indicator
+```
+
+### Object Types
+
+```yaml
+output_schema:
+  type: object
+  properties:
+    name:
+      type: string
+    count:
+      type: integer
+    enabled:
+      type: boolean
+  required: [name, count]
+  additionalProperties: false
+```
+
+### Array Types
+
+```yaml
+output_schema:
+  type: array
+  items:
+    type: object
+    properties:
+      id: { type: string }
+      value: { type: number }
+  minItems: 1
+  maxItems: 100
+```
+
+### Enum Types
+
+```yaml
+output_schema:
+  type: string
+  enum: [approved, rejected, pending, needs_review]
+```
+
+### Union Types (oneOf)
+
+```yaml
+output_schema:
+  oneOf:
+    - type: object
+      properties:
+        success: { type: boolean, const: true }
+        data: { type: object }
+      required: [success, data]
+    - type: object
+      properties:
+        success: { type: boolean, const: false }
+        error: { type: string }
+      required: [success, error]
+```
+
+## Use Cases
+
+### Incident Classification
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: incident-classifier
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Classify incidents by severity and category.
+  output_schema:
+    type: object
+    properties:
+      severity:
+        type: string
+        enum: [P1, P2, P3, P4]
+      category:
+        type: string
+        enum: [infrastructure, application, security, network, database]
+      affected_services:
+        type: array
+        items: { type: string }
+      estimated_impact:
+        type: string
+      recommended_runbook:
+        type: string
+    required: [severity, category, affected_services]
+```
+
+### Code Review
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: code-reviewer
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Review code changes and provide structured feedback.
+  output_schema:
+    type: object
+    properties:
+      verdict:
+        type: string
+        enum: [approve, request_changes, comment]
+      score:
+        type: integer
+        minimum: 1
+        maximum: 10
+      findings:
+        type: array
+        items:
+          type: object
+          properties:
+            file:
+              type: string
+            line:
+              type: integer
+            type:
+              type: string
+              enum: [bug, style, performance, security, suggestion]
+            message:
+              type: string
+          required: [file, type, message]
+      summary:
+        type: string
+    required: [verdict, score, findings, summary]
+```
+
+### Cost Analysis
+
+```yaml
+apiVersion: aof.sh/v1alpha1
+kind: Agent
+metadata:
+  name: cost-analyzer
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Analyze cloud costs and provide optimization recommendations.
+  output_schema:
+    type: object
+    properties:
+      total_cost:
+        type: number
+        description: Total cost in USD
+      period:
+        type: string
+        description: Analysis period (e.g., "last 30 days")
+      breakdown:
+        type: array
+        items:
+          type: object
+          properties:
+            service:
+              type: string
+            cost:
+              type: number
+            percentage:
+              type: number
+            trend:
+              type: string
+              enum: [increasing, stable, decreasing]
+          required: [service, cost]
+      recommendations:
+        type: array
+        items:
+          type: object
+          properties:
+            action:
+              type: string
+            estimated_savings:
+              type: number
+            effort:
+              type: string
+              enum: [low, medium, high]
+            priority:
+              type: integer
+              minimum: 1
+              maximum: 5
+          required: [action, estimated_savings]
+    required: [total_cost, period, breakdown, recommendations]
+```
+
+## Schema in Flows
+
+When using agents with output schemas in flows, the structured output is available in variables:
+
+```yaml
+apiVersion: aof.sh/v1
+kind: AgentFlow
+spec:
+  nodes:
+    - id: analyze
+      type: Agent
+      config:
+        agent: pod-analyzer
+        prompt: "Analyze pods in namespace {{namespace}}"
+    - id: route
+      type: Conditional
+      config:
+        conditions:
+          - condition: "{{analyze.output.status}} == 'critical'"
+            target: alert
+          - condition: "{{analyze.output.status}} == 'degraded'"
+            target: investigate
+          - condition: "true"
+            target: log
+    - id: alert
+      type: Slack
+      config:
+        channel: "#incidents"
+        text: |
+          🚨 Critical: {{analyze.output.summary}}
+          Issues: {{analyze.output.issues | length}}
+```
+
+## Validation Behavior
+
+### Strict Mode (Default)
+
+By default, responses that don't match the schema will fail:
+
+```yaml
+output_schema:
+  type: object
+  properties:
+    status: { type: string }
+  required: [status]
+  # additionalProperties: false  # Implicit in strict mode
+```
+
+### Lenient Mode
+
+Allow additional properties and partial matches:
+
+```yaml
+output_schema:
+  type: object
+  properties:
+    status: { type: string }
+  additionalProperties: true
+  validation_mode: lenient  # Allows missing optional fields
+```
+
+### Coercion Mode
+
+Attempt to coerce response into schema:
+
+```yaml
+output_schema:
+  type: object
+  properties:
+    count: { type: integer }
+  validation_mode: coerce  # Will parse "42" as 42
+```
+
+## Error Handling
+
+When validation fails, the agent will:
+
+1. Log the validation error
+2. Return the raw response with `_validation_error` field
+3. Optionally retry with schema instructions (if configured)
+
+```yaml
+output_schema:
+  type: object
+  properties:
+    status: { type: string }
+  on_validation_error: retry  # Options: fail, retry, passthrough
+  max_retries: 2
+```
+
+## Best Practices
+
+### 1. Provide Clear Instructions
+
+Include schema expectations in agent instructions:
+
+```yaml
+instructions: |
+  Analyze the given data and respond in JSON format with:
+  - status: one of "healthy", "degraded", "critical"
+  - issues: array of found problems
+  - summary: brief text summary
+
+  Always respond with valid JSON matching the output schema.
+```
+
+### 2. Use Descriptive Field Names
+
+```yaml
+# Good
+properties:
+  estimated_completion_time: { type: string }
+  risk_assessment_score: { type: number }
+
+# Avoid
+properties:
+  ect: { type: string }
+  ras: { type: number }
+```
+
+### 3. Add Descriptions
+
+```yaml
+properties:
+  severity:
+    type: string
+    enum: [P1, P2, P3, P4]
+    description: |
+      Incident priority level:
+      - P1: Critical, immediate response required
+      - P2: High, respond within 1 hour
+      - P3: Medium, respond within 4 hours
+      - P4: Low, respond within 24 hours
+```
+
+### 4. Start Simple
+
+Begin with minimal schemas and expand as needed:
+
+```yaml
+# Start simple
+output_schema:
+  type: object
+  properties:
+    success: { type: boolean }
+    message: { type: string }
+  required: [success]
+
+# Expand later
+output_schema:
+  type: object
+  properties:
+    success: { type: boolean }
+    message: { type: string }
+    data: { ... }
+    metadata: { ... }
+```
+
+## Related
+
+- [Agent Configuration](/docs/reference/agent-spec)
+- [AgentFlow Variables](/docs/agentflow/variables)
+- [JSON Schema Reference](https://json-schema.org/)

From 774907ed628ab910094051baebb22b1768d2387a Mon Sep 17 00:00:00 2001
From: Gourav Shah <gs@initcron.org>
Date: Fri, 23 Jan 2026 19:48:47 +0530
Subject: [PATCH 14/14] feat: Interactive TUI mode with session persistence

Major enhancements to aofctl agent experience:

## Interactive TUI Mode
- Launch with `aofctl run agent <config.yaml>` (no --input flag)
- Chat panel with syntax-highlighted conversation history
- Activity log showing real-time agent events (thinking, tool use, LLM calls)
- Context gauge displaying token usage and execution time
- Help overlay with keyboard shortcuts (press ?)
- LazyGit-inspired styling with clear visual hierarchy

## Agent Cancellation
- ESC key to stop running agents gracefully
- Clean abort of LLM calls and tool executions
- Status indicator shows "Cancelling..." during abort

## Session Persistence
- Conversations saved automatically to ~/.aof/sessions/
- JSON format with message history, tokens, activity logs

## Session Resume
- `--resume` flag to continue latest session
- `--session <id>` flag for specific session
- Restored sessions maintain previous context

## Session Management
- `aofctl get sessions` - List all saved sessions
- `aofctl get sessions <agent>` - Filter by agent
- Supports JSON/YAML output formats

## Activity Event System
- New ActivityEvent enum in aof-core
- Real-time tracking: Thinking, Analyzing, LlmCall, ToolUse, ToolComplete
- ActivitySender/Receiver for runtime-TUI communication

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CHANGELOG.md                          |  52 +++
 Cargo.toml                            |   2 +-
 crates/aof-core/src/activity.rs       | 536 ++++++++++++++++++++++++
 crates/aof-core/src/lib.rs            |   4 +
 crates/aofctl/Cargo.toml              |   3 +
 crates/aofctl/src/cli.rs              |  12 +
 crates/aofctl/src/commands/get.rs     | 172 ++++++++
 crates/aofctl/src/commands/run.rs     | 572 +++++++++++++++++++++++---
 crates/aofctl/src/main.rs             |   1 +
 crates/aofctl/src/resources.rs        |  11 +-
 crates/aofctl/src/session.rs          | 485 ++++++++++++++++++++++
 docs/getting-started.md               |  40 +-
 docs/internal/tui-enhancement-plan.md | 185 +++++++++
 docs/reference/agent-spec.md          | 165 ++++++++
 docs/reference/aofctl.md              |  86 +++-
 docs/reference/structured-io.md       |   4 +-
 docs/user/CLI_REFERENCE.md            |  80 ++++
 docusaurus-site/docusaurus.config.ts  |   4 +-
 docusaurus-site/sidebars.ts           |   1 +
 19 files changed, 2354 insertions(+), 61 deletions(-)
 create mode 100644 crates/aof-core/src/activity.rs
 create mode 100644 crates/aofctl/src/session.rs
 create mode 100644 docs/internal/tui-enhancement-plan.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1c7103d..0783188 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,58 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.4.0-beta] - 2026-01-23
+
+### Added
+- **Interactive TUI Mode** - Full-featured terminal user interface for agent conversations
+  - Launch with `aofctl run agent <config.yaml>` (no `--input` flag)
+  - Chat panel with syntax-highlighted conversation history
+  - Activity log showing real-time agent events (thinking, analyzing, tool use, LLM calls)
+  - Context gauge displaying token usage and execution time
+  - Help overlay with keyboard shortcuts (press `?`)
+  - LazyGit-inspired styling with clear visual hierarchy
+
+- **Agent Cancellation** - Stop running agents with ESC key
+  - Graceful cancellation using tokio CancellationToken
+  - Clean abort of LLM calls and tool executions
+  - Status indicator shows "Cancelling..." during abort
+
+- **Session Persistence** - Conversation history saved automatically
+  - Sessions stored in `~/.aof/sessions/<agent-name>/`
+  - Includes complete message history, token usage, activity logs
+  - JSON format for easy inspection and backup
+
+- **Session Resume** - Continue previous conversations
+  - `--resume` flag to continue latest session: `aofctl run agent config.yaml --resume`
+  - `--session <id>` flag to resume specific session
+  - Restored sessions show previous context to the agent
+
+- **Session Management Commands**
+  - `aofctl get sessions` - List all saved sessions across agents
+  - `aofctl get sessions <agent>` - List sessions for specific agent
+  - Output shows session ID, agent, model, message count, tokens, age
+  - Supports `-o json` and `-o yaml` output formats
+
+- **Activity Event System** - Real-time agent activity tracking
+  - New `ActivityEvent` enum in aof-core with event types:
+    - Thinking, Analyzing, LlmCall, ToolUse, ToolComplete, Warning, Error
+  - `ActivitySender` for emitting events from runtime
+  - `ActivityReceiver` for consuming events in TUI
+
+### Changed
+- TUI keyboard shortcuts updated:
+  - `ESC` now cancels running agent (was: do nothing)
+  - `Ctrl+S` saves session manually
+  - `Ctrl+L` clears chat and starts new session
+  - `Shift+↑/↓` scrolls chat history
+  - `PageUp/Down` scrolls 5 lines
+
+### Documentation
+- Updated getting-started guide with interactive mode examples
+- Added TUI keyboard shortcuts to CLI reference
+- Added session management documentation
+- Updated aofctl reference with --resume and --session flags
+
 ## [0.3.2-beta] - 2026-01-02
 
 ### Added
diff --git a/Cargo.toml b/Cargo.toml
index f2b7cb9..a34f037 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,7 +15,7 @@ members = [
 ]
 
 [workspace.package]
-version = "0.3.2-beta"
+version = "0.4.0-beta"
 edition = "2021"
 rust-version = "1.75"
 license = "Apache-2.0"
diff --git a/crates/aof-core/src/activity.rs b/crates/aof-core/src/activity.rs
new file mode 100644
index 0000000..f979056
--- /dev/null
+++ b/crates/aof-core/src/activity.rs
@@ -0,0 +1,536 @@
+//! Agent Activity Events for TUI logging
+//!
+//! This module provides activity event types that agents emit during execution,
+//! allowing the TUI to display real-time agent thinking, analyzing, and tool usage.
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use std::sync::mpsc::Sender;
+
+/// Activity event types for agent execution
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub enum ActivityType {
+    /// Agent is processing/thinking
+    Thinking,
+    /// Agent is analyzing context or data
+    Analyzing,
+    /// Agent is calling an LLM
+    LlmCall,
+    /// Agent is waiting for LLM response
+    LlmWaiting,
+    /// LLM response received
+    LlmResponse,
+    /// Agent is discovering/loading tools
+    ToolDiscovery,
+    /// Agent is executing a tool
+    ToolExecuting,
+    /// Tool execution completed
+    ToolComplete,
+    /// Tool execution failed
+    ToolFailed,
+    /// Memory operation (read/write)
+    Memory,
+    /// MCP server communication
+    McpCall,
+    /// Validation (schema, output)
+    Validation,
+    /// Warning condition
+    Warning,
+    /// Error condition
+    Error,
+    /// Information message
+    Info,
+    /// Debug message
+    Debug,
+    /// Execution started
+    Started,
+    /// Execution completed
+    Completed,
+    /// Execution cancelled
+    Cancelled,
+}
+
+impl ActivityType {
+    /// Get emoji/icon for this activity type
+    pub fn icon(&self) -> &'static str {
+        match self {
+            ActivityType::Thinking => "🧠",
+            ActivityType::Analyzing => "🔍",
+            ActivityType::LlmCall => "📤",
+            ActivityType::LlmWaiting => "⏳",
+            ActivityType::LlmResponse => "📥",
+            ActivityType::ToolDiscovery => "🔧",
+            ActivityType::ToolExecuting => "⚙️",
+            ActivityType::ToolComplete => "✓",
+            ActivityType::ToolFailed => "✗",
+            ActivityType::Memory => "💾",
+            ActivityType::McpCall => "🔌",
+            ActivityType::Validation => "📋",
+            ActivityType::Warning => "⚠️",
+            ActivityType::Error => "❌",
+            ActivityType::Info => "ℹ️",
+            ActivityType::Debug => "🐛",
+            ActivityType::Started => "▶",
+            ActivityType::Completed => "●",
+            ActivityType::Cancelled => "⏹",
+        }
+    }
+
+    /// Get ANSI color code for TUI display
+    pub fn color(&self) -> &'static str {
+        match self {
+            ActivityType::Thinking | ActivityType::Analyzing => "cyan",
+            ActivityType::LlmCall | ActivityType::LlmWaiting | ActivityType::LlmResponse => "blue",
+            ActivityType::ToolDiscovery => "magenta",
+            ActivityType::ToolExecuting => "yellow",
+            ActivityType::ToolComplete | ActivityType::Completed => "green",
+            ActivityType::ToolFailed | ActivityType::Error => "red",
+            ActivityType::Memory => "cyan",
+            ActivityType::McpCall => "magenta",
+            ActivityType::Validation => "blue",
+            ActivityType::Warning => "yellow",
+            ActivityType::Info | ActivityType::Debug => "gray",
+            ActivityType::Started => "green",
+            ActivityType::Cancelled => "yellow",
+        }
+    }
+
+    /// Get short label for this activity type
+    pub fn label(&self) -> &'static str {
+        match self {
+            ActivityType::Thinking => "THINK",
+            ActivityType::Analyzing => "ANALYZE",
+            ActivityType::LlmCall => "LLM→",
+            ActivityType::LlmWaiting => "WAIT",
+            ActivityType::LlmResponse => "LLM←",
+            ActivityType::ToolDiscovery => "TOOLS",
+            ActivityType::ToolExecuting => "EXEC",
+            ActivityType::ToolComplete => "DONE",
+            ActivityType::ToolFailed => "FAIL",
+            ActivityType::Memory => "MEM",
+            ActivityType::McpCall => "MCP",
+            ActivityType::Validation => "VALID",
+            ActivityType::Warning => "WARN",
+            ActivityType::Error => "ERROR",
+            ActivityType::Info => "INFO",
+            ActivityType::Debug => "DEBUG",
+            ActivityType::Started => "START",
+            ActivityType::Completed => "DONE",
+            ActivityType::Cancelled => "CANCEL",
+        }
+    }
+}
+
+/// An activity event emitted during agent execution
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ActivityEvent {
+    /// Type of activity
+    pub activity_type: ActivityType,
+    /// Human-readable message
+    pub message: String,
+    /// Timestamp when the activity occurred
+    pub timestamp: DateTime<Utc>,
+    /// Optional additional details (e.g., tool name, duration)
+    pub details: Option<ActivityDetails>,
+}
+
+/// Additional details for an activity event
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ActivityDetails {
+    /// Tool name (for tool-related activities)
+    pub tool_name: Option<String>,
+    /// Tool arguments (for tool execution)
+    pub tool_args: Option<String>,
+    /// Duration in milliseconds
+    pub duration_ms: Option<u64>,
+    /// Token counts
+    pub tokens: Option<TokenCount>,
+    /// Error message
+    pub error: Option<String>,
+    /// Additional key-value metadata
+    pub metadata: Option<std::collections::HashMap<String, String>>,
+}
+
+/// Token count details
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TokenCount {
+    pub input: u32,
+    pub output: u32,
+}
+
+impl ActivityEvent {
+    /// Create a new activity event
+    pub fn new(activity_type: ActivityType, message: impl Into<String>) -> Self {
+        Self {
+            activity_type,
+            message: message.into(),
+            timestamp: Utc::now(),
+            details: None,
+        }
+    }
+
+    /// Add details to this event
+    pub fn with_details(mut self, details: ActivityDetails) -> Self {
+        self.details = Some(details);
+        self
+    }
+
+    /// Add tool name
+    pub fn with_tool(mut self, tool_name: impl Into<String>) -> Self {
+        let details = self.details.get_or_insert(ActivityDetails {
+            tool_name: None,
+            tool_args: None,
+            duration_ms: None,
+            tokens: None,
+            error: None,
+            metadata: None,
+        });
+        details.tool_name = Some(tool_name.into());
+        self
+    }
+
+    /// Add tool arguments
+    pub fn with_args(mut self, args: impl Into<String>) -> Self {
+        let details = self.details.get_or_insert(ActivityDetails {
+            tool_name: None,
+            tool_args: None,
+            duration_ms: None,
+            tokens: None,
+            error: None,
+            metadata: None,
+        });
+        details.tool_args = Some(args.into());
+        self
+    }
+
+    /// Add duration
+    pub fn with_duration(mut self, duration_ms: u64) -> Self {
+        let details = self.details.get_or_insert(ActivityDetails {
+            tool_name: None,
+            tool_args: None,
+            duration_ms: None,
+            tokens: None,
+            error: None,
+            metadata: None,
+        });
+        details.duration_ms = Some(duration_ms);
+        self
+    }
+
+    /// Add token counts
+    pub fn with_tokens(mut self, input: u32, output: u32) -> Self {
+        let details = self.details.get_or_insert(ActivityDetails {
+            tool_name: None,
+            tool_args: None,
+            duration_ms: None,
+            tokens: None,
+            error: None,
+            metadata: None,
+        });
+        details.tokens = Some(TokenCount { input, output });
+        self
+    }
+
+    /// Add error message
+    pub fn with_error(mut self, error: impl Into<String>) -> Self {
+        let details = self.details.get_or_insert(ActivityDetails {
+            tool_name: None,
+            tool_args: None,
+            duration_ms: None,
+            tokens: None,
+            error: None,
+            metadata: None,
+        });
+        details.error = Some(error.into());
+        self
+    }
+
+    /// Format for display in TUI
+    pub fn format_display(&self) -> String {
+        let icon = self.activity_type.icon();
+        let label = self.activity_type.label();
+        let time = self.timestamp.format("%H:%M:%S");
+
+        let mut output = format!("[{}] {} {}: {}", time, icon, label, self.message);
+
+        if let Some(ref details) = self.details {
+            if let Some(ref tool) = details.tool_name {
+                output.push_str(&format!(" [{}]", tool));
+            }
+            if let Some(duration) = details.duration_ms {
+                output.push_str(&format!(" ({}ms)", duration));
+            }
+            if let Some(ref tokens) = details.tokens {
+                output.push_str(&format!(" [{}→{}]", tokens.input, tokens.output));
+            }
+        }
+
+        output
+    }
+
+    /// Format for display without timestamp (compact)
+    pub fn format_compact(&self) -> String {
+        let icon = self.activity_type.icon();
+
+        let mut output = format!("{} {}", icon, self.message);
+
+        if let Some(ref details) = self.details {
+            if let Some(ref tool) = details.tool_name {
+                output.push_str(&format!(" [{}]", tool));
+            }
+            if let Some(duration) = details.duration_ms {
+                output.push_str(&format!(" ({}ms)", duration));
+            }
+        }
+
+        output
+    }
+}
+
+// Convenience constructors for common activity types
+impl ActivityEvent {
+    pub fn thinking(message: impl Into<String>) -> Self {
+        Self::new(ActivityType::Thinking, message)
+    }
+
+    pub fn analyzing(message: impl Into<String>) -> Self {
+        Self::new(ActivityType::Analyzing, message)
+    }
+
+    pub fn llm_call(message: impl Into<String>) -> Self {
+        Self::new(ActivityType::LlmCall, message)
+    }
+
+    pub fn llm_waiting() -> Self {
+        Self::new(ActivityType::LlmWaiting, "Waiting for LLM response...")
+    }
+
+    pub fn llm_response(input_tokens: u32, output_tokens: u32) -> Self {
+        Self::new(ActivityType::LlmResponse, "Received LLM response")
+            .with_tokens(input_tokens, output_tokens)
+    }
+
+    pub fn tool_discovery(count: usize) -> Self {
+        Self::new(
+            ActivityType::ToolDiscovery,
+            format!("Discovered {} available tools", count),
+        )
+    }
+
+    pub fn tool_executing(tool_name: impl Into<String>, args: Option<String>) -> Self {
+        let name = tool_name.into();
+        let msg = format!("Executing tool: {}", name);
+        let mut event = Self::new(ActivityType::ToolExecuting, msg).with_tool(&name);
+        if let Some(a) = args {
+            // Truncate args for display
+            let truncated = if a.len() > 100 {
+                format!("{}...", &a[..100])
+            } else {
+                a
+            };
+            event = event.with_args(truncated);
+        }
+        event
+    }
+
+    pub fn tool_complete(tool_name: impl Into<String>, duration_ms: u64) -> Self {
+        let name = tool_name.into();
+        Self::new(ActivityType::ToolComplete, format!("Tool completed: {}", name))
+            .with_tool(name)
+            .with_duration(duration_ms)
+    }
+
+    pub fn tool_failed(tool_name: impl Into<String>, error: impl Into<String>) -> Self {
+        let name = tool_name.into();
+        Self::new(ActivityType::ToolFailed, format!("Tool failed: {}", name))
+            .with_tool(name)
+            .with_error(error)
+    }
+
+    pub fn memory(operation: &str, key: &str) -> Self {
+        Self::new(
+            ActivityType::Memory,
+            format!("Memory {}: {}", operation, key),
+        )
+    }
+
+    pub fn mcp_call(server: &str, method: &str) -> Self {
+        Self::new(ActivityType::McpCall, format!("MCP {} → {}", server, method))
+    }
+
+    pub fn warning(message: impl Into<String>) -> Self {
+        Self::new(ActivityType::Warning, message)
+    }
+
+    pub fn error(message: impl Into<String>) -> Self {
+        Self::new(ActivityType::Error, message)
+    }
+
+    pub fn info(message: impl Into<String>) -> Self {
+        Self::new(ActivityType::Info, message)
+    }
+
+    pub fn started(agent_name: &str) -> Self {
+        Self::new(
+            ActivityType::Started,
+            format!("Starting execution for agent: {}", agent_name),
+        )
+    }
+
+    pub fn completed(duration_ms: u64) -> Self {
+        Self::new(
+            ActivityType::Completed,
+            format!("Execution completed in {}ms", duration_ms),
+        )
+        .with_duration(duration_ms)
+    }
+
+    pub fn cancelled() -> Self {
+        Self::new(ActivityType::Cancelled, "Execution cancelled by user")
+    }
+}
+
+/// Activity logger that can be passed to executors
+#[derive(Clone)]
+pub struct ActivityLogger {
+    sender: Sender<ActivityEvent>,
+}
+
+impl ActivityLogger {
+    /// Create a new activity logger with a channel sender
+    pub fn new(sender: Sender<ActivityEvent>) -> Self {
+        Self { sender }
+    }
+
+    /// Log an activity event
+    pub fn log(&self, event: ActivityEvent) {
+        // Ignore send errors (receiver may be dropped)
+        let _ = self.sender.send(event);
+    }
+
+    /// Log a thinking activity
+    pub fn thinking(&self, message: impl Into<String>) {
+        self.log(ActivityEvent::thinking(message));
+    }
+
+    /// Log an analyzing activity
+    pub fn analyzing(&self, message: impl Into<String>) {
+        self.log(ActivityEvent::analyzing(message));
+    }
+
+    /// Log an LLM call
+    pub fn llm_call(&self, message: impl Into<String>) {
+        self.log(ActivityEvent::llm_call(message));
+    }
+
+    /// Log LLM waiting
+    pub fn llm_waiting(&self) {
+        self.log(ActivityEvent::llm_waiting());
+    }
+
+    /// Log LLM response
+    pub fn llm_response(&self, input_tokens: u32, output_tokens: u32) {
+        self.log(ActivityEvent::llm_response(input_tokens, output_tokens));
+    }
+
+    /// Log tool execution start
+    pub fn tool_executing(&self, tool_name: impl Into<String>, args: Option<String>) {
+        self.log(ActivityEvent::tool_executing(tool_name, args));
+    }
+
+    /// Log tool completion
+    pub fn tool_complete(&self, tool_name: impl Into<String>, duration_ms: u64) {
+        self.log(ActivityEvent::tool_complete(tool_name, duration_ms));
+    }
+
+    /// Log tool failure
+    pub fn tool_failed(&self, tool_name: impl Into<String>, error: impl Into<String>) {
+        self.log(ActivityEvent::tool_failed(tool_name, error));
+    }
+
+    /// Log warning
+    pub fn warning(&self, message: impl Into<String>) {
+        self.log(ActivityEvent::warning(message));
+    }
+
+    /// Log error
+    pub fn error(&self, message: impl Into<String>) {
+        self.log(ActivityEvent::error(message));
+    }
+
+    /// Log info
+    pub fn info(&self, message: impl Into<String>) {
+        self.log(ActivityEvent::info(message));
+    }
+
+    /// Log execution started
+    pub fn started(&self, agent_name: &str) {
+        self.log(ActivityEvent::started(agent_name));
+    }
+
+    /// Log execution completed
+    pub fn completed(&self, duration_ms: u64) {
+        self.log(ActivityEvent::completed(duration_ms));
+    }
+
+    /// Log execution cancelled
+    pub fn cancelled(&self) {
+        self.log(ActivityEvent::cancelled());
+    }
+}
+
+/// No-op activity logger for when activity logging is disabled
+pub struct NoopActivityLogger;
+
+impl NoopActivityLogger {
+    pub fn log(&self, _event: ActivityEvent) {}
+    pub fn thinking(&self, _message: impl Into<String>) {}
+    pub fn analyzing(&self, _message: impl Into<String>) {}
+    pub fn llm_call(&self, _message: impl Into<String>) {}
+    pub fn llm_waiting(&self) {}
+    pub fn llm_response(&self, _input_tokens: u32, _output_tokens: u32) {}
+    pub fn tool_executing(&self, _tool_name: impl Into<String>, _args: Option<String>) {}
+    pub fn tool_complete(&self, _tool_name: impl Into<String>, _duration_ms: u64) {}
+    pub fn tool_failed(&self, _tool_name: impl Into<String>, _error: impl Into<String>) {}
+    pub fn warning(&self, _message: impl Into<String>) {}
+    pub fn error(&self, _message: impl Into<String>) {}
+    pub fn info(&self, _message: impl Into<String>) {}
+    pub fn started(&self, _agent_name: &str) {}
+    pub fn completed(&self, _duration_ms: u64) {}
+    pub fn cancelled(&self) {}
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_activity_event_creation() {
+        let event = ActivityEvent::thinking("Processing user request");
+        assert_eq!(event.activity_type, ActivityType::Thinking);
+        assert_eq!(event.message, "Processing user request");
+    }
+
+    #[test]
+    fn test_activity_event_with_details() {
+        let event = ActivityEvent::tool_executing("kubectl", Some("get pods".to_string()));
+        assert!(event.details.is_some());
+        let details = event.details.unwrap();
+        assert_eq!(details.tool_name, Some("kubectl".to_string()));
+    }
+
+    #[test]
+    fn test_activity_event_formatting() {
+        let event = ActivityEvent::tool_complete("kubectl", 234);
+        let formatted = event.format_compact();
+        assert!(formatted.contains("✓"));
+        assert!(formatted.contains("234ms"));
+    }
+
+    #[test]
+    fn test_activity_type_icons() {
+        assert_eq!(ActivityType::Thinking.icon(), "🧠");
+        assert_eq!(ActivityType::ToolExecuting.icon(), "⚙️");
+        assert_eq!(ActivityType::Error.icon(), "❌");
+    }
+}
diff --git a/crates/aof-core/src/lib.rs b/crates/aof-core/src/lib.rs
index 862653a..3599d56 100644
--- a/crates/aof-core/src/lib.rs
+++ b/crates/aof-core/src/lib.rs
@@ -3,6 +3,7 @@
 // This crate provides zero-cost abstractions for building high-performance
 // agentic systems targeting DevOps and SRE workflows.
 
+pub mod activity;
 pub mod agent;
 pub mod agentflow;
 pub mod binding;
@@ -73,6 +74,9 @@ pub use trigger::{
     CommandBinding, StandaloneTriggerConfig, StandaloneTriggerType, Trigger, TriggerMetadata,
     TriggerSpec,
 };
+pub use activity::{
+    ActivityDetails, ActivityEvent, ActivityLogger, ActivityType, NoopActivityLogger, TokenCount,
+};
 
 /// Version information
 pub const VERSION: &str = env!("CARGO_PKG_VERSION");
diff --git a/crates/aofctl/Cargo.toml b/crates/aofctl/Cargo.toml
index 7b55858..48cab07 100644
--- a/crates/aofctl/Cargo.toml
+++ b/crates/aofctl/Cargo.toml
@@ -41,6 +41,9 @@ ratatui = "0.26"
 atty = "0.2"
 comfy-table = "7.1"
 colored = "2.1"
+uuid = { version = "1.6", features = ["v4"] }
+dirs = "5.0"
+tokio-util = { version = "0.7", features = ["rt"] }
 
 [dev-dependencies]
 tokio = { workspace = true, features = ["test-util"] }
diff --git a/crates/aofctl/src/cli.rs b/crates/aofctl/src/cli.rs
index 6f183da..770df8d 100644
--- a/crates/aofctl/src/cli.rs
+++ b/crates/aofctl/src/cli.rs
@@ -54,6 +54,14 @@ pub enum Commands {
         /// Path to JSON schema file for output validation
         #[arg(long, conflicts_with = "output_schema")]
         output_schema_file: Option<String>,
+
+        /// Resume the latest session for this agent (interactive mode only)
+        #[arg(long)]
+        resume: bool,
+
+        /// Resume a specific session by ID (interactive mode only)
+        #[arg(long, conflicts_with = "resume")]
+        session: Option<String>,
     },
 
     /// Get resources (verb-first: get agents, get agent <name>)
@@ -240,6 +248,8 @@ impl Cli {
                 output,
                 output_schema,
                 output_schema_file,
+                resume,
+                session,
             } => {
                 commands::run::execute(
                     &resource_type,
@@ -249,6 +259,8 @@ impl Cli {
                     output_schema.as_deref(),
                     output_schema_file.as_deref(),
                     context.as_ref(),
+                    resume,
+                    session.as_deref(),
                 )
                 .await
             }
diff --git a/crates/aofctl/src/commands/get.rs b/crates/aofctl/src/commands/get.rs
index 94aa39f..c5e7e42 100644
--- a/crates/aofctl/src/commands/get.rs
+++ b/crates/aofctl/src/commands/get.rs
@@ -13,6 +13,11 @@ pub async fn execute(
     let rt = ResourceType::from_str(resource_type)
         .ok_or_else(|| anyhow::anyhow!("Unknown resource type: {}", resource_type))?;
 
+    // Handle Session resource type specially
+    if matches!(rt, ResourceType::Session) {
+        return list_sessions(name, output).await;
+    }
+
     // Build resource list - either from library or mock data
     let resources = if library {
         get_library_resources(&rt, name)?
@@ -63,6 +68,173 @@ pub async fn execute(
     Ok(())
 }
 
+/// List sessions for agents
+async fn list_sessions(agent_name: Option<&str>, output: &str) -> Result<()> {
+    use crate::session::SessionManager;
+    use chrono::Utc;
+
+    let manager = SessionManager::new()?;
+
+    // If agent name provided, list sessions for that agent
+    // Otherwise, list all agents with sessions
+    let sessions_data: Vec<serde_json::Value> = if let Some(agent) = agent_name {
+        let sessions = manager.list(agent)?;
+        sessions.iter().map(|s| {
+            let age = format_age(s.updated_at);
+            serde_json::json!({
+                "metadata": {
+                    "name": &s.id[..8],
+                    "fullId": &s.id,
+                    "agent": &s.agent_name,
+                    "createdAt": s.created_at.to_rfc3339(),
+                    "updatedAt": s.updated_at.to_rfc3339(),
+                },
+                "spec": {
+                    "model": &s.model,
+                    "messageCount": s.message_count,
+                    "totalTokens": s.total_tokens,
+                },
+                "status": {
+                    "phase": "Saved",
+                    "age": age,
+                }
+            })
+        }).collect()
+    } else {
+        // List all agents with sessions
+        let agents = manager.list_agents()?;
+        let mut all_sessions = Vec::new();
+        for agent in agents {
+            if let Ok(sessions) = manager.list(&agent) {
+                for s in sessions {
+                    let age = format_age(s.updated_at);
+                    all_sessions.push(serde_json::json!({
+                        "metadata": {
+                            "name": &s.id[..8],
+                            "fullId": &s.id,
+                            "agent": &s.agent_name,
+                            "createdAt": s.created_at.to_rfc3339(),
+                            "updatedAt": s.updated_at.to_rfc3339(),
+                        },
+                        "spec": {
+                            "model": &s.model,
+                            "messageCount": s.message_count,
+                            "totalTokens": s.total_tokens,
+                        },
+                        "status": {
+                            "phase": "Saved",
+                            "age": age,
+                        }
+                    }));
+                }
+            }
+        }
+        all_sessions
+    };
+
+    match output {
+        "json" => {
+            let output = serde_json::json!({
+                "apiVersion": "cli/v1",
+                "kind": "SessionList",
+                "items": sessions_data
+            });
+            println!("{}", serde_json::to_string_pretty(&output)?);
+        }
+        "yaml" => {
+            let output = serde_json::json!({
+                "apiVersion": "cli/v1",
+                "kind": "SessionList",
+                "items": sessions_data
+            });
+            println!("{}", serde_yaml::to_string(&output)?);
+        }
+        "name" => {
+            for session in &sessions_data {
+                if let Some(name) = session.get("metadata")
+                    .and_then(|m| m.get("fullId"))
+                    .and_then(|n| n.as_str()) {
+                    let agent = session.get("metadata")
+                        .and_then(|m| m.get("agent"))
+                        .and_then(|a| a.as_str())
+                        .unwrap_or("unknown");
+                    println!("session/{}/{}", agent, name);
+                }
+            }
+        }
+        "wide" | _ => {
+            if sessions_data.is_empty() {
+                println!("\nNo sessions found.");
+                println!("Run an agent interactively to create a session:");
+                println!("  aofctl run agent <config.yaml>");
+                return Ok(());
+            }
+
+            // Table format
+            println!("\n{:<10} {:<18} {:<24} {:>6} {:>8} {:<10}",
+                "ID", "AGENT", "MODEL", "MSGS", "TOKENS", "AGE");
+            println!("{}", "=".repeat(85));
+
+            for session in &sessions_data {
+                let id = session.get("metadata")
+                    .and_then(|m| m.get("name"))
+                    .and_then(|n| n.as_str())
+                    .unwrap_or("unknown");
+                let agent = session.get("metadata")
+                    .and_then(|m| m.get("agent"))
+                    .and_then(|a| a.as_str())
+                    .unwrap_or("unknown");
+                let model = session.get("spec")
+                    .and_then(|s| s.get("model"))
+                    .and_then(|m| m.as_str())
+                    .unwrap_or("unknown");
+                let msgs = session.get("spec")
+                    .and_then(|s| s.get("messageCount"))
+                    .and_then(|m| m.as_u64())
+                    .unwrap_or(0);
+                let tokens = session.get("spec")
+                    .and_then(|s| s.get("totalTokens"))
+                    .and_then(|t| t.as_u64())
+                    .unwrap_or(0);
+                let age = session.get("status")
+                    .and_then(|s| s.get("age"))
+                    .and_then(|a| a.as_str())
+                    .unwrap_or("-");
+
+                // Truncate model if too long
+                let model_display = if model.len() > 24 {
+                    format!("{}...", &model[..21])
+                } else {
+                    model.to_string()
+                };
+
+                println!("{:<10} {:<18} {:<24} {:>6} {:>8} {:<10}",
+                    id, agent, model_display, msgs, tokens, age);
+            }
+
+            println!("\nTo resume a session:");
+            println!("  aofctl run agent <config.yaml> --resume");
+            println!("  aofctl run agent <config.yaml> --session <session-id>");
+        }
+    }
+
+    Ok(())
+}
+
+/// Format age from DateTime to human-readable string
+fn format_age(dt: chrono::DateTime<chrono::Utc>) -> String {
+    let age = chrono::Utc::now().signed_duration_since(dt);
+    if age.num_days() > 0 {
+        format!("{}d", age.num_days())
+    } else if age.num_hours() > 0 {
+        format!("{}h", age.num_hours())
+    } else if age.num_minutes() > 0 {
+        format!("{}m", age.num_minutes())
+    } else {
+        "now".to_string()
+    }
+}
+
 /// Get resources from the built-in library directory
 fn get_library_resources(
     rt: &ResourceType,
diff --git a/crates/aofctl/src/commands/run.rs b/crates/aofctl/src/commands/run.rs
index 56354b0..c8642f1 100644
--- a/crates/aofctl/src/commands/run.rs
+++ b/crates/aofctl/src/commands/run.rs
@@ -1,14 +1,17 @@
 use anyhow::{Context as AnyhowContext, Result, anyhow};
 use aof_core::{AgentConfig, AgentContext, Context as AofContext, OutputSchema};
+use aof_core::{ActivityEvent, ActivityType};
 use aof_runtime::Runtime;
 use std::fs;
 use std::io::{self, IsTerminal, Write};
 use std::sync::{Arc, Mutex};
 use std::sync::mpsc::{channel, Receiver, Sender};
 use tokio::sync::{mpsc as tokio_mpsc, RwLock};
-use tracing::info;
+use tokio_util::sync::CancellationToken;
+use tracing::{info, warn};
 use crate::resources::ResourceType;
 use crate::output::{Spinner, FlowOutput};
+use crate::session::{Session, SessionManager, MessageTokens};
 
 /// Internal struct to try parsing K8s format explicitly for better errors
 #[derive(serde::Deserialize)]
@@ -229,6 +232,8 @@ pub async fn execute(
     output_schema: Option<&str>,
     output_schema_file: Option<&str>,
     context: Option<&AofContext>,
+    resume: bool,
+    session_id: Option<&str>,
 ) -> Result<()> {
     // Log context if provided
     if let Some(ctx) = context {
@@ -249,7 +254,7 @@ pub async fn execute(
     let schema = parse_output_schema(output_schema, output_schema_file)?;
 
     match rt {
-        ResourceType::Agent => run_agent(name_or_config, input, output, schema, context).await,
+        ResourceType::Agent => run_agent(name_or_config, input, output, schema, context, resume, session_id).await,
         ResourceType::Workflow | ResourceType::Flow => run_workflow(name_or_config, input, output).await,
         ResourceType::Fleet => run_fleet(name_or_config, input, output).await,
         ResourceType::Job => run_job(name_or_config, input, output).await,
@@ -391,7 +396,11 @@ async fn run_agent(
     output: &str,
     schema: Option<OutputSchema>,
     context: Option<&AofContext>,
+    resume: bool,
+    session_id: Option<&str>,
 ) -> Result<()> {
+    use crate::session::SessionManager;
+
     // Resolve library:// URIs to actual file paths
     let config_path = if config.starts_with("library://") {
         resolve_library_uri(config)?
@@ -419,8 +428,40 @@ async fn run_agent(
             .await
             .with_context(|| "Failed to load agent")?;
 
+        // Handle session resume
+        let resume_session = if resume || session_id.is_some() {
+            let manager = SessionManager::new()?;
+            if let Some(sid) = session_id {
+                // Resume specific session
+                match manager.load(&agent_name, sid) {
+                    Ok(session) => {
+                        info!("Resuming session: {} ({} messages)", sid, session.messages.len());
+                        Some(session)
+                    }
+                    Err(e) => {
+                        warn!("Failed to load session '{}': {}", sid, e);
+                        None
+                    }
+                }
+            } else {
+                // Resume latest session
+                match manager.load_latest(&agent_name) {
+                    Ok(session) => {
+                        info!("Resuming latest session: {} ({} messages)", session.id, session.messages.len());
+                        Some(session)
+                    }
+                    Err(e) => {
+                        info!("No previous session found: {}", e);
+                        None
+                    }
+                }
+            }
+        } else {
+            None
+        };
+
         // Launch interactive REPL mode with TUI log capture
-        run_agent_interactive(&runtime, &agent_name, output).await?;
+        run_agent_interactive_with_resume(&runtime, &agent_name, output, resume_session).await?;
         return Ok(());
     }
 
@@ -492,6 +533,7 @@ struct AppState {
     chat_history: Vec<(String, String)>, // (role, message)
     current_input: String,
     logs: Vec<String>,
+    activities: Vec<ActivityEvent>, // Agent activity events
     agent_busy: bool,
     last_error: Option<String>,
     execution_start: Option<Instant>,
@@ -499,6 +541,7 @@ struct AppState {
     message_count: usize,
     spinner_state: u8,
     log_receiver: Receiver<String>,
+    activity_receiver: Receiver<ActivityEvent>, // Activity event receiver
     model_name: String,
     tools: Vec<String>,
     execution_result_rx: tokio_mpsc::Receiver<Result<String, String>>,
@@ -506,10 +549,20 @@ struct AppState {
     output_tokens: u32,
     context_window: u32, // Max context window for model
     chat_scroll_offset: u16, // Scroll offset for chat history
+    show_help: bool, // Toggle help panel
+    session: Session, // Current session for persistence
+    cancellation_token: CancellationToken, // For stopping execution
+    agent_name: String, // Agent name for session
 }
 
 impl AppState {
-    fn new(log_receiver: Receiver<String>, model_name: String, tools: Vec<String>) -> Self {
+    fn new(
+        log_receiver: Receiver<String>,
+        activity_receiver: Receiver<ActivityEvent>,
+        model_name: String,
+        tools: Vec<String>,
+        agent_name: String,
+    ) -> Self {
         let (tx, rx) = tokio_mpsc::channel(1);
         let _ = tx; // Drop sender since we only use the receiver
 
@@ -532,15 +585,21 @@ impl AppState {
 ╚═╝ ╚═╝ ╚═══╝ ╚═╝
 
 Agentic Ops Framework
-aof.sh"#;
+aof.sh
+
+Press ? for help │ ESC to cancel │ Ctrl+C to quit"#;
 
         let mut chat_history = Vec::new();
         chat_history.push(("system".to_string(), greeting.to_string()));
 
+        // Create a new session
+        let session = Session::new(&agent_name, &model_name);
+
         Self {
             chat_history,
             current_input: String::new(),
             logs: Vec::new(),
+            activities: Vec::new(),
             agent_busy: false,
             last_error: None,
             execution_start: None,
@@ -548,6 +607,7 @@ aof.sh"#;
             message_count: 0,
             spinner_state: 0,
             log_receiver,
+            activity_receiver,
             model_name,
             tools,
             execution_result_rx: rx,
@@ -555,6 +615,62 @@ aof.sh"#;
             output_tokens: 0,
             context_window,
             chat_scroll_offset: 0,
+            show_help: false,
+            session,
+            cancellation_token: CancellationToken::new(),
+            agent_name,
+        }
+    }
+
+    fn restore_from_session(
+        log_receiver: Receiver<String>,
+        activity_receiver: Receiver<ActivityEvent>,
+        model_name: String,
+        tools: Vec<String>,
+        agent_name: String,
+        session: Session,
+    ) -> Self {
+        let (tx, rx) = tokio_mpsc::channel(1);
+        let _ = tx;
+
+        let context_window = match model_name.as_str() {
+            "google:gemini-2.5-flash" => 1000000,
+            "google:gemini-2.0-flash" => 1000000,
+            "openai:gpt-4-turbo" => 128000,
+            "openai:gpt-4" => 8192,
+            _ => 128000,
+        };
+
+        // Convert session messages to chat history
+        let mut chat_history: Vec<(String, String)> = session.to_chat_history();
+
+        // Add resume indicator
+        chat_history.push(("system".to_string(), "── Session Resumed ──".to_string()));
+
+        Self {
+            chat_history,
+            current_input: String::new(),
+            logs: Vec::new(),
+            activities: Vec::new(),
+            agent_busy: false,
+            last_error: None,
+            execution_start: None,
+            execution_time_ms: 0,
+            message_count: session.message_count(),
+            spinner_state: 0,
+            log_receiver,
+            activity_receiver,
+            model_name,
+            tools,
+            execution_result_rx: rx,
+            input_tokens: session.token_usage.total_input,
+            output_tokens: session.token_usage.total_output,
+            context_window,
+            chat_scroll_offset: 0,
+            show_help: false,
+            session,
+            cancellation_token: CancellationToken::new(),
+            agent_name,
         }
     }
 
@@ -569,6 +685,43 @@ aof.sh"#;
         }
     }
 
+    fn consume_activities(&mut self) {
+        // Drain all available activities from the receiver (non-blocking)
+        while let Ok(activity) = self.activity_receiver.try_recv() {
+            // Add to session activity log
+            self.session.add_activity(
+                activity.activity_type.label(),
+                &activity.message,
+            );
+            // Keep only last 500 activities to avoid memory bloat
+            if self.activities.len() >= 500 {
+                self.activities.remove(0);
+            }
+            self.activities.push(activity);
+        }
+    }
+
+    fn add_activity(&mut self, activity: ActivityEvent) {
+        self.session.add_activity(
+            activity.activity_type.label(),
+            &activity.message,
+        );
+        if self.activities.len() >= 500 {
+            self.activities.remove(0);
+        }
+        self.activities.push(activity);
+    }
+
+    fn toggle_help(&mut self) {
+        self.show_help = !self.show_help;
+    }
+
+    fn save_session(&mut self) -> Result<()> {
+        let manager = SessionManager::new()?;
+        manager.save(&self.session)?;
+        Ok(())
+    }
+
     fn update_execution_time(&mut self) {
         if let Some(start) = self.execution_start {
             self.execution_time_ms = start.elapsed().as_millis();
@@ -610,6 +763,16 @@ aof.sh"#;
 
 /// Run agent in interactive REPL mode with two-column TUI
 async fn run_agent_interactive(runtime: &Runtime, agent_name: &str, _output: &str) -> Result<()> {
+    run_agent_interactive_with_resume(runtime, agent_name, _output, None).await
+}
+
+/// Run agent in interactive REPL mode with optional session resume
+async fn run_agent_interactive_with_resume(
+    runtime: &Runtime,
+    agent_name: &str,
+    _output: &str,
+    resume_session: Option<Session>,
+) -> Result<()> {
     // Extract model and tools from runtime
     let model_name = runtime
         .get_agent(agent_name)
@@ -624,6 +787,9 @@ async fn run_agent_interactive(runtime: &Runtime, agent_name: &str, _output: &st
     // Create log channel
     let (log_tx, log_rx) = channel::<String>();
 
+    // Create activity channel
+    let (activity_tx, activity_rx) = channel::<ActivityEvent>();
+
     // Setup tracing to capture logs into the channel instead of stdout
     let log_tx_clone = Arc::new(Mutex::new(log_tx));
     let fmt_layer = tracing_subscriber::fmt::layer()
@@ -659,13 +825,30 @@ async fn run_agent_interactive(runtime: &Runtime, agent_name: &str, _output: &st
     let backend = CrosstermBackend::new(stdout);
     let mut terminal = Terminal::new(backend)?;
 
-    // Initialize app state with log receiver
-    let mut app_state = AppState::new(log_rx, model_name, tools);
+    // Initialize app state with log and activity receivers
+    let mut app_state = if let Some(session) = resume_session {
+        AppState::restore_from_session(
+            log_rx,
+            activity_rx,
+            model_name,
+            tools,
+            agent_name.to_string(),
+            session,
+        )
+    } else {
+        AppState::new(
+            log_rx,
+            activity_rx,
+            model_name,
+            tools,
+            agent_name.to_string(),
+        )
+    };
+
     let should_quit = Arc::new(Mutex::new(false));
 
-    // Don't add welcome message yet - it will show after greeting is dismissed
-    // app_state.chat_history.push(("system".to_string(),
-    //     format!("Connected to agent: {}\nType your query and press Enter. Commands: help, exit, quit", agent_name)));
+    // Store activity sender for use during execution
+    let activity_sender = Arc::new(Mutex::new(activity_tx));
 
     // Draw initial screen with greeting
     terminal.draw(|f| ui(f, agent_name, &app_state))?;
@@ -684,8 +867,48 @@ async fn run_agent_interactive(runtime: &Runtime, agent_name: &str, _output: &st
                 Event::Key(key) => {
                     match key.code {
                         KeyCode::Char('c') if key.modifiers == crossterm::event::KeyModifiers::CONTROL => {
+                            // Save session before quitting
+                            if let Err(e) = app_state.save_session() {
+                                eprintln!("Failed to save session: {}", e);
+                            }
                             break;
                         }
+                        KeyCode::Esc => {
+                            if app_state.show_help {
+                                // Close help panel
+                                app_state.show_help = false;
+                            } else if app_state.agent_busy {
+                                // Cancel running execution
+                                app_state.cancellation_token.cancel();
+                                app_state.add_activity(ActivityEvent::cancelled());
+                            }
+                        }
+                        KeyCode::Char('?') if !app_state.agent_busy => {
+                            // Toggle help panel
+                            app_state.toggle_help();
+                        }
+                        KeyCode::Char('s') if key.modifiers == crossterm::event::KeyModifiers::CONTROL => {
+                            // Manual save session
+                            if let Err(e) = app_state.save_session() {
+                                app_state.add_activity(ActivityEvent::error(format!("Failed to save: {}", e)));
+                            } else {
+                                app_state.add_activity(ActivityEvent::info("Session saved".to_string()));
+                            }
+                        }
+                        KeyCode::Char('l') if key.modifiers == crossterm::event::KeyModifiers::CONTROL => {
+                            // Clear chat (start new session)
+                            if !app_state.agent_busy {
+                                // Save current session first
+                                let _ = app_state.save_session();
+                                // Create new session
+                                app_state.session = Session::new(&app_state.agent_name, &app_state.model_name);
+                                app_state.chat_history.clear();
+                                app_state.activities.clear();
+                                app_state.input_tokens = 0;
+                                app_state.output_tokens = 0;
+                                app_state.chat_history.push(("system".to_string(), "── New Session ──".to_string()));
+                            }
+                        }
                         KeyCode::PageUp => {
                             app_state.scroll_up(5);
                         }
@@ -698,56 +921,107 @@ async fn run_agent_interactive(runtime: &Runtime, agent_name: &str, _output: &st
                         KeyCode::Down if key.modifiers == crossterm::event::KeyModifiers::SHIFT => {
                             app_state.scroll_down(1);
                         }
+                        KeyCode::Enter if app_state.show_help => {
+                            // Close help with Enter
+                            app_state.show_help = false;
+                        }
                         KeyCode::Enter => {
-                        let trimmed = app_state.current_input.trim();
+                        // Clone input early to avoid borrow issues
+                        let input_str = app_state.current_input.trim().to_string();
 
-                        if trimmed.is_empty() {
+                        if input_str.is_empty() {
                             // Do nothing for empty input
-                        } else if trimmed.to_lowercase() == "exit" || trimmed.to_lowercase() == "quit" {
+                        } else if input_str.to_lowercase() == "exit" || input_str.to_lowercase() == "quit" {
                             break;
-                        } else if trimmed.to_lowercase() == "help" {
+                        } else if input_str.to_lowercase() == "help" {
                             app_state.chat_history.push(("system".to_string(),
                                 "Available: help, exit, quit. Type normally to chat with agent.".to_string()));
                         } else {
                             // Execute agent with timer updates during execution
-                            app_state.chat_history.push(("user".to_string(), trimmed.to_string()));
-                            // Update input tokens based on user query length
-                            let input_tokens_estimate = (trimmed.len() / 4) as u32;
+                            app_state.chat_history.push(("user".to_string(), input_str.clone()));
+
+                            // Add to session
+                            let input_tokens_estimate = (input_str.len() / 4) as u32;
+                            app_state.session.add_message(
+                                "user",
+                                &input_str,
+                                Some(MessageTokens { input: input_tokens_estimate, output: 0 }),
+                            );
+
                             app_state.input_tokens = app_state.input_tokens.saturating_add(input_tokens_estimate);
                             app_state.agent_busy = true;
                             app_state.last_error = None;
                             app_state.execution_start = Some(Instant::now());
                             app_state.message_count = app_state.chat_history.len();
 
+                            // Reset cancellation token for new execution
+                            app_state.cancellation_token = CancellationToken::new();
+
+                            // Emit activity events
+                            app_state.add_activity(ActivityEvent::started(agent_name));
+                            app_state.add_activity(ActivityEvent::thinking("Processing user request..."));
+
                             // Draw busy state before execution
                             terminal.draw(|f| ui(f, agent_name, &app_state))?;
-
-                            // Execute with periodic UI updates using select! for timer
-                            let input_str = trimmed.to_string();
                             let mut exec_future = Box::pin(runtime.execute(agent_name, &input_str));
                             let mut timer_handle = tokio::time::interval(std::time::Duration::from_millis(100));
+                            let cancel_token = app_state.cancellation_token.clone();
 
+                            // Emit LLM call activity
+                            app_state.add_activity(ActivityEvent::llm_call(format!("Calling {}", app_state.model_name)));
+
+                            let mut cancelled = false;
                             loop {
                                 tokio::select! {
+                                    biased;
+
+                                    // Check for cancellation
+                                    _ = cancel_token.cancelled() => {
+                                        cancelled = true;
+                                        app_state.chat_history.push(("system".to_string(), "⏹ Execution cancelled by user".to_string()));
+                                        app_state.session.add_message("system", "Execution cancelled by user", None);
+                                        app_state.agent_busy = false;
+                                        app_state.update_execution_time();
+                                        break;
+                                    }
+
                                     result = &mut exec_future => {
+                                        let duration_ms = app_state.execution_time_ms as u64;
                                         match result {
                                             Ok(response) => {
                                                 if response.is_empty() {
                                                     let error_msg = "Error: Empty response from agent".to_string();
                                                     app_state.chat_history.push(("error".to_string(), error_msg.clone()));
+                                                    app_state.session.add_message("error", &error_msg, None);
                                                     app_state.last_error = Some(error_msg);
+                                                    app_state.add_activity(ActivityEvent::error("Empty response received"));
                                                 } else {
                                                     // Update output tokens based on response length
+                                                    let output_tokens = (response.len() / 4) as u32;
                                                     app_state.update_token_count(&response);
-                                                    app_state.chat_history.push(("assistant".to_string(), response));
+                                                    app_state.chat_history.push(("assistant".to_string(), response.clone()));
+
+                                                    // Add to session
+                                                    app_state.session.add_message(
+                                                        "assistant",
+                                                        &response,
+                                                        Some(MessageTokens { input: 0, output: output_tokens }),
+                                                    );
+
                                                     // Auto-scroll to latest message
                                                     app_state.auto_scroll_to_bottom();
+
+                                                    // Emit completion activity
+                                                    app_state.add_activity(ActivityEvent::llm_response(input_tokens_estimate, output_tokens));
+                                                    app_state.add_activity(ActivityEvent::completed(duration_ms));
                                                 }
                                             }
                                             Err(e) => {
                                                 let error_msg = format!("Error: {}", e);
                                                 app_state.chat_history.push(("error".to_string(), error_msg.clone()));
-                                                app_state.last_error = Some(error_msg);
+                                                app_state.session.add_message("error", &error_msg, None);
+                                                app_state.last_error = Some(error_msg.clone());
+                                                app_state.add_activity(ActivityEvent::error(error_msg));
                                             }
                                         }
                                         app_state.agent_busy = false;
@@ -759,8 +1033,18 @@ async fn run_agent_interactive(runtime: &Runtime, agent_name: &str, _output: &st
                                         app_state.next_spinner();
                                         app_state.update_execution_time();
 
-                                        // Consume any new logs
+                                        // Consume any new logs and activities
                                         app_state.consume_logs();
+                                        app_state.consume_activities();
+
+                                        // Check for key events during execution (for ESC)
+                                        if crossterm::event::poll(std::time::Duration::from_millis(0))? {
+                                            if let Event::Key(key) = event::read()? {
+                                                if key.code == KeyCode::Esc {
+                                                    cancel_token.cancel();
+                                                }
+                                            }
+                                        }
 
                                         // Redraw to show timer updates
                                         terminal.draw(|f| ui(f, agent_name, &app_state))?;
@@ -802,13 +1086,19 @@ async fn run_agent_interactive(runtime: &Runtime, agent_name: &str, _output: &st
             app_state.update_execution_time();
         }
 
-        // Consume any new log messages from the channel
+        // Consume any new log messages and activities from the channels
         app_state.consume_logs();
+        app_state.consume_activities();
 
         // Redraw UI
         terminal.draw(|f| ui(f, agent_name, &app_state))?;
     }
 
+    // Save session before exit
+    if let Err(e) = app_state.save_session() {
+        eprintln!("Warning: Failed to save session: {}", e);
+    }
+
     // Restore terminal
     disable_raw_mode()?;
     execute!(
@@ -818,7 +1108,8 @@ async fn run_agent_interactive(runtime: &Runtime, agent_name: &str, _output: &st
     )?;
     terminal.show_cursor()?;
 
-    println!("\n-- Exiting Agentic Ops Framework --\n");
+    println!("\n-- Exiting Agentic Ops Framework --");
+    println!("Session saved. Use --resume to continue later.\n");
     Ok(())
 }
 
@@ -957,10 +1248,16 @@ fn ui(f: &mut Frame, agent_name: &str, app: &AppState) {
         .constraints([Constraint::Percentage(80), Constraint::Percentage(20)])
         .split(chunks[1]);
 
-    // Top row - System Logs
+    // Top row - Agent Activity Log (replaced System Logs)
+    let activity_title = if app.activities.is_empty() {
+        " AGENT ACTIVITY "
+    } else {
+        " AGENT ACTIVITY "
+    };
+
     let logs_block = Block::default()
         .title(Span::styled(
-            " SYSTEM LOG ",
+            activity_title,
             Style::default().fg(primary_white).add_modifier(Modifier::BOLD),
         ))
         .title_alignment(Alignment::Left)
@@ -969,30 +1266,87 @@ fn ui(f: &mut Frame, agent_name: &str, app: &AppState) {
         .border_style(Style::default().fg(primary_white))
         .padding(ratatui::widgets::Padding::symmetric(1, 0));
 
-    let log_lines: Vec<Line> = app.logs.iter()
-        .map(|log| {
-            let style = if log.contains("ERROR") {
-                Style::default().fg(Color::White).add_modifier(Modifier::BOLD)
-            } else if log.contains("WARN") {
-                Style::default().fg(Color::White)
-            } else if log.contains("DEBUG") {
-                Style::default().fg(Color::Gray).add_modifier(Modifier::DIM)
-            } else if log.contains("INFO") {
-                Style::default().fg(Color::White).add_modifier(Modifier::DIM)
-            } else {
-                Style::default().fg(Color::Gray)
-            };
+    // Render activities with color coding
+    let activity_lines: Vec<Line> = if app.activities.is_empty() {
+        // Show placeholder when no activities
+        vec![
+            Line::from(Span::styled(
+                "Waiting for agent activity...",
+                Style::default().fg(Color::DarkGray).add_modifier(Modifier::ITALIC),
+            )),
+            Line::from(""),
+            Line::from(Span::styled(
+                "Activity types:",
+                Style::default().fg(Color::DarkGray),
+            )),
+            Line::from(vec![
+                Span::styled("  🧠 ", Style::default()),
+                Span::styled("Thinking", Style::default().fg(Color::Cyan)),
+            ]),
+            Line::from(vec![
+                Span::styled("  ⚙️ ", Style::default()),
+                Span::styled("Tool execution", Style::default().fg(Color::Yellow)),
+            ]),
+            Line::from(vec![
+                Span::styled("  📤 ", Style::default()),
+                Span::styled("LLM calls", Style::default().fg(Color::Blue)),
+            ]),
+            Line::from(vec![
+                Span::styled("  ✓ ", Style::default()),
+                Span::styled("Completed", Style::default().fg(Color::Green)),
+            ]),
+        ]
+    } else {
+        app.activities.iter()
+            .map(|activity| {
+                let (icon, color) = match &activity.activity_type {
+                    ActivityType::Thinking | ActivityType::Analyzing => ("🧠", Color::Cyan),
+                    ActivityType::LlmCall | ActivityType::LlmWaiting => ("📤", Color::Blue),
+                    ActivityType::LlmResponse => ("📥", Color::Blue),
+                    ActivityType::ToolDiscovery => ("🔧", Color::Magenta),
+                    ActivityType::ToolExecuting => ("⚙️", Color::Yellow),
+                    ActivityType::ToolComplete => ("✓", Color::Green),
+                    ActivityType::ToolFailed => ("✗", Color::Red),
+                    ActivityType::Memory => ("💾", Color::Cyan),
+                    ActivityType::McpCall => ("🔌", Color::Magenta),
+                    ActivityType::Validation => ("📋", Color::Blue),
+                    ActivityType::Warning => ("⚠", Color::Yellow),
+                    ActivityType::Error => ("❌", Color::Red),
+                    ActivityType::Info | ActivityType::Debug => ("ℹ", Color::Gray),
+                    ActivityType::Started => ("▶", Color::Green),
+                    ActivityType::Completed => ("●", Color::Green),
+                    ActivityType::Cancelled => ("⏹", Color::Yellow),
+                };
 
-            let trimmed = log.chars().take(right_panel[0].width.saturating_sub(4) as usize).collect::<String>();
-            Line::from(Span::styled(trimmed, style))
-        })
-        .collect();
+                let time_str = activity.timestamp.format("%H:%M:%S").to_string();
+                let max_width = right_panel[0].width.saturating_sub(14) as usize;
+                let msg = if activity.message.len() > max_width {
+                    format!("{}...", &activity.message[..max_width.saturating_sub(3)])
+                } else {
+                    activity.message.clone()
+                };
 
-    let logs_para = Paragraph::new(log_lines)
+                // Add duration if available
+                let duration_str = activity.details.as_ref()
+                    .and_then(|d| d.duration_ms)
+                    .map(|ms| format!(" ({}ms)", ms))
+                    .unwrap_or_default();
+
+                Line::from(vec![
+                    Span::styled(format!("{} ", time_str), Style::default().fg(Color::DarkGray)),
+                    Span::styled(format!("{} ", icon), Style::default()),
+                    Span::styled(msg, Style::default().fg(color)),
+                    Span::styled(duration_str, Style::default().fg(Color::DarkGray)),
+                ])
+            })
+            .collect()
+    };
+
+    let logs_para = Paragraph::new(activity_lines)
         .block(logs_block)
         .wrap(Wrap { trim: true })
         .scroll((
-            (app.logs.len() as u16).saturating_sub(right_panel[0].height.saturating_sub(3) / 2),
+            (app.activities.len() as u16).saturating_sub(right_panel[0].height.saturating_sub(3)),
             0,
         ));
 
@@ -1028,23 +1382,22 @@ fn ui(f: &mut Frame, agent_name: &str, app: &AppState) {
 
     f.render_widget(gauge, right_panel[1]);
 
-    // Footer metrics bar
+    // Footer metrics bar with keybinding hints
     let metrics_text = if app.agent_busy {
         format!(
-            "  ⧖ {:>5}ms  │  {} {} messages  │  Model: {}  │  Tools: {}  │  Status: Active",
-            app.execution_time_ms,
+            "  {} {:>5}ms │ {} msgs │ {} │ {} │ ESC:cancel  Ctrl+C:quit",
             app.get_spinner(),
+            app.execution_time_ms,
             app.message_count / 2,
             app.model_name,
             tools_str
         )
     } else {
         format!(
-            "  ✓ Completed  │  {} messages  │  Model: {}  │  Tools: {}  │  Last execution: {}ms",
+            "  ✓ {} msgs │ {} │ {} │ ?:help  Ctrl+S:save  Ctrl+L:new  Ctrl+C:quit",
             app.message_count / 2,
             app.model_name,
-            tools_str,
-            app.execution_time_ms
+            tools_str
         )
     };
 
@@ -1054,9 +1407,122 @@ fn ui(f: &mut Frame, agent_name: &str, app: &AppState) {
 
     let metrics_para = Paragraph::new(metrics_text)
         .block(metrics_block)
-        .style(Style::default().fg(Color::White));
+        .style(Style::default().fg(Color::Green));
 
     f.render_widget(metrics_para, main_layout[1]);
+
+    // Render help overlay if enabled
+    if app.show_help {
+        render_help_overlay(f);
+    }
+}
+
+/// Render the help overlay panel
+fn render_help_overlay(f: &mut Frame) {
+    let area = f.size();
+
+    // Create centered popup area (60% width, 70% height)
+    let popup_width = (area.width as f32 * 0.6) as u16;
+    let popup_height = (area.height as f32 * 0.7) as u16;
+    let popup_x = (area.width - popup_width) / 2;
+    let popup_y = (area.height - popup_height) / 2;
+
+    let popup_area = Rect::new(popup_x, popup_y, popup_width, popup_height);
+
+    // Clear the area with a semi-transparent background
+    let clear_block = Block::default()
+        .style(Style::default().bg(Color::Black));
+    f.render_widget(clear_block, popup_area);
+
+    // Create help content
+    let help_block = Block::default()
+        .title(Span::styled(
+            " ⌨ KEYBOARD SHORTCUTS ",
+            Style::default().fg(Color::Cyan).add_modifier(Modifier::BOLD),
+        ))
+        .title_alignment(Alignment::Center)
+        .borders(Borders::ALL)
+        .border_type(ratatui::widgets::BorderType::Rounded)
+        .border_style(Style::default().fg(Color::Cyan))
+        .padding(ratatui::widgets::Padding::uniform(1));
+
+    let help_lines = vec![
+        Line::from(""),
+        Line::from(vec![
+            Span::styled("  NAVIGATION", Style::default().fg(Color::Yellow).add_modifier(Modifier::BOLD)),
+        ]),
+        Line::from(vec![
+            Span::styled("    Shift+↑/↓    ", Style::default().fg(Color::White)),
+            Span::styled("Scroll chat history", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(vec![
+            Span::styled("    PageUp/Down  ", Style::default().fg(Color::White)),
+            Span::styled("Scroll 5 lines", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(vec![
+            Span::styled("    Mouse scroll ", Style::default().fg(Color::White)),
+            Span::styled("Scroll chat history", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(""),
+        Line::from(vec![
+            Span::styled("  EXECUTION", Style::default().fg(Color::Yellow).add_modifier(Modifier::BOLD)),
+        ]),
+        Line::from(vec![
+            Span::styled("    Enter        ", Style::default().fg(Color::White)),
+            Span::styled("Send message to agent", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(vec![
+            Span::styled("    ESC          ", Style::default().fg(Color::White)),
+            Span::styled("Cancel running execution", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(""),
+        Line::from(vec![
+            Span::styled("  SESSION", Style::default().fg(Color::Yellow).add_modifier(Modifier::BOLD)),
+        ]),
+        Line::from(vec![
+            Span::styled("    Ctrl+S       ", Style::default().fg(Color::White)),
+            Span::styled("Save session manually", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(vec![
+            Span::styled("    Ctrl+L       ", Style::default().fg(Color::White)),
+            Span::styled("Clear chat / new session", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(""),
+        Line::from(vec![
+            Span::styled("  GENERAL", Style::default().fg(Color::Yellow).add_modifier(Modifier::BOLD)),
+        ]),
+        Line::from(vec![
+            Span::styled("    ?            ", Style::default().fg(Color::White)),
+            Span::styled("Toggle this help panel", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(vec![
+            Span::styled("    Ctrl+C       ", Style::default().fg(Color::White)),
+            Span::styled("Quit application", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(""),
+        Line::from(vec![
+            Span::styled("  COMMANDS", Style::default().fg(Color::Yellow).add_modifier(Modifier::BOLD)),
+        ]),
+        Line::from(vec![
+            Span::styled("    help         ", Style::default().fg(Color::White)),
+            Span::styled("Show available commands", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(vec![
+            Span::styled("    exit/quit    ", Style::default().fg(Color::White)),
+            Span::styled("Exit the application", Style::default().fg(Color::Gray)),
+        ]),
+        Line::from(""),
+        Line::from(Span::styled(
+            "           Press ESC or Enter to close           ",
+            Style::default().fg(Color::DarkGray).add_modifier(Modifier::ITALIC),
+        )),
+    ];
+
+    let help_para = Paragraph::new(help_lines)
+        .block(help_block)
+        .wrap(Wrap { trim: true });
+
+    f.render_widget(help_para, popup_area);
 }
 
 /// Extract JSON from markdown code blocks (```json ... ```)
diff --git a/crates/aofctl/src/main.rs b/crates/aofctl/src/main.rs
index 1459cbb..199bd6c 100644
--- a/crates/aofctl/src/main.rs
+++ b/crates/aofctl/src/main.rs
@@ -5,6 +5,7 @@ mod cli;
 mod commands;
 mod output;
 mod resources;
+pub mod session;
 
 use cli::Cli;
 
diff --git a/crates/aofctl/src/resources.rs b/crates/aofctl/src/resources.rs
index 0d8b636..7e19a41 100644
--- a/crates/aofctl/src/resources.rs
+++ b/crates/aofctl/src/resources.rs
@@ -26,6 +26,9 @@ pub enum ResourceType {
     // Storage resources
     Memory,
     State,
+
+    // Session resources
+    Session,
 }
 
 impl ResourceType {
@@ -46,6 +49,7 @@ impl ResourceType {
             ResourceType::Task,
             ResourceType::Memory,
             ResourceType::State,
+            ResourceType::Session,
         ]
     }
 
@@ -66,6 +70,7 @@ impl ResourceType {
             ResourceType::Task => "task",
             ResourceType::Memory => "memory",
             ResourceType::State => "state",
+            ResourceType::Session => "session",
         }
     }
 
@@ -86,6 +91,7 @@ impl ResourceType {
             ResourceType::Task => "tasks",
             ResourceType::Memory => "memories",
             ResourceType::State => "states",
+            ResourceType::Session => "sessions",
         }
     }
 
@@ -106,6 +112,7 @@ impl ResourceType {
             ResourceType::Task => vec!["tsk"],
             ResourceType::Memory => vec!["mem"],
             ResourceType::State => vec!["st"],
+            ResourceType::Session => vec!["sess"],
         }
     }
 
@@ -118,13 +125,14 @@ impl ResourceType {
             ResourceType::McpServer | ResourceType::McpTool => "mcp/v1",
             ResourceType::Job | ResourceType::Task => "batch/v1",
             ResourceType::Memory | ResourceType::State => "storage/v1",
+            ResourceType::Session => "cli/v1",
         }
     }
 
     /// Check if this resource is namespaced
     pub fn is_namespaced(&self) -> bool {
         match self {
-            ResourceType::Config | ResourceType::McpServer => false,
+            ResourceType::Config | ResourceType::McpServer | ResourceType::Session => false,
             _ => true,
         }
     }
@@ -146,6 +154,7 @@ impl ResourceType {
             ResourceType::Task => "Task",
             ResourceType::Memory => "Memory",
             ResourceType::State => "State",
+            ResourceType::Session => "Session",
         }
     }
 
diff --git a/crates/aofctl/src/session.rs b/crates/aofctl/src/session.rs
new file mode 100644
index 0000000..a8bafdc
--- /dev/null
+++ b/crates/aofctl/src/session.rs
@@ -0,0 +1,485 @@
+//! Session Management for Agent Conversations
+//!
+//! This module provides session persistence, allowing conversations to be saved
+//! and resumed across multiple invocations.
+
+use anyhow::{anyhow, Result};
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use std::fs;
+use std::path::PathBuf;
+use uuid::Uuid;
+
+/// A saved agent conversation session
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Session {
+    /// Unique session identifier
+    pub id: String,
+    /// Agent name this session belongs to
+    pub agent_name: String,
+    /// Model used in this session
+    pub model: String,
+    /// When the session was created
+    pub created_at: DateTime<Utc>,
+    /// When the session was last updated
+    pub updated_at: DateTime<Utc>,
+    /// Conversation messages
+    pub messages: Vec<SessionMessage>,
+    /// Token usage statistics
+    pub token_usage: TokenUsage,
+    /// Activity log entries
+    pub activity_log: Vec<ActivityLogEntry>,
+}
+
+/// A message in the session
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SessionMessage {
+    /// Role: user, assistant, error, system
+    pub role: String,
+    /// Message content
+    pub content: String,
+    /// When the message was sent
+    pub timestamp: DateTime<Utc>,
+    /// Token counts for this message (if available)
+    pub tokens: Option<MessageTokens>,
+}
+
+/// Token counts for a single message
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MessageTokens {
+    pub input: u32,
+    pub output: u32,
+}
+
+/// Cumulative token usage for the session
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct TokenUsage {
+    pub total_input: u32,
+    pub total_output: u32,
+}
+
+/// An activity log entry
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ActivityLogEntry {
+    pub timestamp: DateTime<Utc>,
+    pub activity_type: String,
+    pub message: String,
+}
+
+impl Session {
+    /// Create a new session
+    pub fn new(agent_name: &str, model: &str) -> Self {
+        let now = Utc::now();
+        Self {
+            id: Uuid::new_v4().to_string(),
+            agent_name: agent_name.to_string(),
+            model: model.to_string(),
+            created_at: now,
+            updated_at: now,
+            messages: Vec::new(),
+            token_usage: TokenUsage::default(),
+            activity_log: Vec::new(),
+        }
+    }
+
+    /// Add a message to the session
+    pub fn add_message(&mut self, role: &str, content: &str, tokens: Option<MessageTokens>) {
+        self.messages.push(SessionMessage {
+            role: role.to_string(),
+            content: content.to_string(),
+            timestamp: Utc::now(),
+            tokens: tokens.clone(),
+        });
+        self.updated_at = Utc::now();
+
+        // Update cumulative token usage
+        if let Some(t) = tokens {
+            self.token_usage.total_input += t.input;
+            self.token_usage.total_output += t.output;
+        }
+    }
+
+    /// Add an activity log entry
+    pub fn add_activity(&mut self, activity_type: &str, message: &str) {
+        self.activity_log.push(ActivityLogEntry {
+            timestamp: Utc::now(),
+            activity_type: activity_type.to_string(),
+            message: message.to_string(),
+        });
+        self.updated_at = Utc::now();
+    }
+
+    /// Get message count
+    pub fn message_count(&self) -> usize {
+        self.messages.len()
+    }
+
+    /// Get total tokens used
+    pub fn total_tokens(&self) -> u32 {
+        self.token_usage.total_input + self.token_usage.total_output
+    }
+
+    /// Convert messages to chat history format (role, content)
+    pub fn to_chat_history(&self) -> Vec<(String, String)> {
+        self.messages
+            .iter()
+            .map(|m| (m.role.clone(), m.content.clone()))
+            .collect()
+    }
+}
+
+/// Session storage manager
+pub struct SessionManager {
+    /// Base directory for session storage
+    base_dir: PathBuf,
+}
+
+impl SessionManager {
+    /// Create a new session manager
+    pub fn new() -> Result<Self> {
+        let base_dir = Self::get_sessions_dir()?;
+        fs::create_dir_all(&base_dir)?;
+        Ok(Self { base_dir })
+    }
+
+    /// Get the sessions directory path
+    fn get_sessions_dir() -> Result<PathBuf> {
+        let home = dirs::home_dir().ok_or_else(|| anyhow!("Could not find home directory"))?;
+        Ok(home.join(".aof").join("sessions"))
+    }
+
+    /// Get the directory for a specific agent's sessions
+    fn get_agent_dir(&self, agent_name: &str) -> PathBuf {
+        self.base_dir.join(sanitize_name(agent_name))
+    }
+
+    /// Save a session to disk
+    pub fn save(&self, session: &Session) -> Result<PathBuf> {
+        let agent_dir = self.get_agent_dir(&session.agent_name);
+        fs::create_dir_all(&agent_dir)?;
+
+        let file_path = agent_dir.join(format!("{}.json", session.id));
+        let json = serde_json::to_string_pretty(session)?;
+        fs::write(&file_path, json)?;
+
+        // Also update the "latest" symlink/file
+        let latest_path = agent_dir.join("latest.json");
+        fs::write(&latest_path, serde_json::to_string_pretty(session)?)?;
+
+        Ok(file_path)
+    }
+
+    /// Load a specific session by ID
+    pub fn load(&self, agent_name: &str, session_id: &str) -> Result<Session> {
+        let agent_dir = self.get_agent_dir(agent_name);
+        let file_path = agent_dir.join(format!("{}.json", session_id));
+
+        if !file_path.exists() {
+            return Err(anyhow!("Session not found: {}", session_id));
+        }
+
+        let json = fs::read_to_string(&file_path)?;
+        let session: Session = serde_json::from_str(&json)?;
+        Ok(session)
+    }
+
+    /// Load the latest session for an agent
+    pub fn load_latest(&self, agent_name: &str) -> Result<Session> {
+        let agent_dir = self.get_agent_dir(agent_name);
+        let latest_path = agent_dir.join("latest.json");
+
+        if !latest_path.exists() {
+            return Err(anyhow!("No sessions found for agent: {}", agent_name));
+        }
+
+        let json = fs::read_to_string(&latest_path)?;
+        let session: Session = serde_json::from_str(&json)?;
+        Ok(session)
+    }
+
+    /// List all sessions for an agent
+    pub fn list(&self, agent_name: &str) -> Result<Vec<SessionSummary>> {
+        let agent_dir = self.get_agent_dir(agent_name);
+
+        if !agent_dir.exists() {
+            return Ok(Vec::new());
+        }
+
+        let mut sessions = Vec::new();
+
+        for entry in fs::read_dir(&agent_dir)? {
+            let entry = entry?;
+            let path = entry.path();
+
+            // Skip the latest.json file
+            if path.file_name().map(|n| n == "latest.json").unwrap_or(false) {
+                continue;
+            }
+
+            if path.extension().map(|e| e == "json").unwrap_or(false) {
+                if let Ok(json) = fs::read_to_string(&path) {
+                    if let Ok(session) = serde_json::from_str::<Session>(&json) {
+                        // Compute values before moving fields
+                        let message_count = session.messages.len();
+                        let total_tokens = session.total_tokens();
+                        sessions.push(SessionSummary {
+                            id: session.id,
+                            agent_name: session.agent_name,
+                            model: session.model,
+                            created_at: session.created_at,
+                            updated_at: session.updated_at,
+                            message_count,
+                            total_tokens,
+                        });
+                    }
+                }
+            }
+        }
+
+        // Sort by updated_at descending (most recent first)
+        sessions.sort_by(|a, b| b.updated_at.cmp(&a.updated_at));
+
+        Ok(sessions)
+    }
+
+    /// List all agents with sessions
+    pub fn list_agents(&self) -> Result<Vec<String>> {
+        if !self.base_dir.exists() {
+            return Ok(Vec::new());
+        }
+
+        let mut agents = Vec::new();
+
+        for entry in fs::read_dir(&self.base_dir)? {
+            let entry = entry?;
+            if entry.path().is_dir() {
+                if let Some(name) = entry.file_name().to_str() {
+                    agents.push(name.to_string());
+                }
+            }
+        }
+
+        agents.sort();
+        Ok(agents)
+    }
+
+    /// Delete a specific session
+    pub fn delete(&self, agent_name: &str, session_id: &str) -> Result<()> {
+        let agent_dir = self.get_agent_dir(agent_name);
+        let file_path = agent_dir.join(format!("{}.json", session_id));
+
+        if file_path.exists() {
+            fs::remove_file(&file_path)?;
+        }
+
+        // If this was the latest session, remove latest.json too
+        let latest_path = agent_dir.join("latest.json");
+        if latest_path.exists() {
+            if let Ok(json) = fs::read_to_string(&latest_path) {
+                if let Ok(session) = serde_json::from_str::<Session>(&json) {
+                    if session.id == session_id {
+                        fs::remove_file(&latest_path)?;
+                    }
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Delete all sessions for an agent
+    pub fn delete_agent_sessions(&self, agent_name: &str) -> Result<usize> {
+        let agent_dir = self.get_agent_dir(agent_name);
+
+        if !agent_dir.exists() {
+            return Ok(0);
+        }
+
+        let mut count = 0;
+        for entry in fs::read_dir(&agent_dir)? {
+            let entry = entry?;
+            if entry.path().extension().map(|e| e == "json").unwrap_or(false) {
+                fs::remove_file(entry.path())?;
+                count += 1;
+            }
+        }
+
+        // Try to remove the directory if empty
+        let _ = fs::remove_dir(&agent_dir);
+
+        Ok(count)
+    }
+
+    /// Check if a session exists
+    pub fn exists(&self, agent_name: &str, session_id: &str) -> bool {
+        let agent_dir = self.get_agent_dir(agent_name);
+        let file_path = agent_dir.join(format!("{}.json", session_id));
+        file_path.exists()
+    }
+
+    /// Check if latest session exists
+    pub fn has_latest(&self, agent_name: &str) -> bool {
+        let agent_dir = self.get_agent_dir(agent_name);
+        let latest_path = agent_dir.join("latest.json");
+        latest_path.exists()
+    }
+}
+
+impl Default for SessionManager {
+    fn default() -> Self {
+        Self::new().expect("Failed to create session manager")
+    }
+}
+
+/// Summary information about a session (for listing)
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SessionSummary {
+    pub id: String,
+    pub agent_name: String,
+    pub model: String,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+    pub message_count: usize,
+    pub total_tokens: u32,
+}
+
+impl SessionSummary {
+    /// Format for display
+    pub fn format_display(&self) -> String {
+        let age = Utc::now().signed_duration_since(self.updated_at);
+        let age_str = if age.num_days() > 0 {
+            format!("{}d ago", age.num_days())
+        } else if age.num_hours() > 0 {
+            format!("{}h ago", age.num_hours())
+        } else if age.num_minutes() > 0 {
+            format!("{}m ago", age.num_minutes())
+        } else {
+            "just now".to_string()
+        };
+
+        format!(
+            "{} | {} msgs | {} tokens | {}",
+            &self.id[..8],
+            self.message_count,
+            self.total_tokens,
+            age_str
+        )
+    }
+}
+
+/// Sanitize agent name for use as a directory name
+fn sanitize_name(name: &str) -> String {
+    name.chars()
+        .map(|c| {
+            if c.is_alphanumeric() || c == '-' || c == '_' {
+                c
+            } else {
+                '_'
+            }
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::TempDir;
+
+    fn test_manager() -> (SessionManager, TempDir) {
+        let temp_dir = TempDir::new().unwrap();
+        let manager = SessionManager {
+            base_dir: temp_dir.path().to_path_buf(),
+        };
+        (manager, temp_dir)
+    }
+
+    #[test]
+    fn test_session_creation() {
+        let session = Session::new("test-agent", "google:gemini-2.5-flash");
+        assert_eq!(session.agent_name, "test-agent");
+        assert_eq!(session.model, "google:gemini-2.5-flash");
+        assert!(session.messages.is_empty());
+    }
+
+    #[test]
+    fn test_add_message() {
+        let mut session = Session::new("test-agent", "google:gemini-2.5-flash");
+        session.add_message("user", "Hello", Some(MessageTokens { input: 10, output: 0 }));
+        session.add_message(
+            "assistant",
+            "Hi there!",
+            Some(MessageTokens { input: 0, output: 20 }),
+        );
+
+        assert_eq!(session.messages.len(), 2);
+        assert_eq!(session.token_usage.total_input, 10);
+        assert_eq!(session.token_usage.total_output, 20);
+    }
+
+    #[test]
+    fn test_save_and_load() {
+        let (manager, _temp) = test_manager();
+
+        let mut session = Session::new("test-agent", "google:gemini-2.5-flash");
+        session.add_message("user", "Hello", None);
+        session.add_message("assistant", "Hi!", None);
+
+        let session_id = session.id.clone();
+
+        // Save
+        manager.save(&session).unwrap();
+
+        // Load
+        let loaded = manager.load("test-agent", &session_id).unwrap();
+        assert_eq!(loaded.id, session_id);
+        assert_eq!(loaded.messages.len(), 2);
+    }
+
+    #[test]
+    fn test_load_latest() {
+        let (manager, _temp) = test_manager();
+
+        let mut session = Session::new("test-agent", "google:gemini-2.5-flash");
+        session.add_message("user", "Test", None);
+        manager.save(&session).unwrap();
+
+        let latest = manager.load_latest("test-agent").unwrap();
+        assert_eq!(latest.id, session.id);
+    }
+
+    #[test]
+    fn test_list_sessions() {
+        let (manager, _temp) = test_manager();
+
+        // Create multiple sessions
+        for i in 0..3 {
+            let mut session = Session::new("test-agent", "google:gemini-2.5-flash");
+            session.add_message("user", &format!("Message {}", i), None);
+            manager.save(&session).unwrap();
+        }
+
+        let sessions = manager.list("test-agent").unwrap();
+        assert_eq!(sessions.len(), 3);
+    }
+
+    #[test]
+    fn test_delete_session() {
+        let (manager, _temp) = test_manager();
+
+        let mut session = Session::new("test-agent", "google:gemini-2.5-flash");
+        session.add_message("user", "Test", None);
+        let session_id = session.id.clone();
+        manager.save(&session).unwrap();
+
+        assert!(manager.exists("test-agent", &session_id));
+        manager.delete("test-agent", &session_id).unwrap();
+        assert!(!manager.exists("test-agent", &session_id));
+    }
+
+    #[test]
+    fn test_sanitize_name() {
+        assert_eq!(sanitize_name("my-agent"), "my-agent");
+        assert_eq!(sanitize_name("my agent"), "my_agent");
+        assert_eq!(sanitize_name("agent@v1"), "agent_v1");
+    }
+}
diff --git a/docs/getting-started.md b/docs/getting-started.md
index c663ecd..0b51015 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -74,8 +74,33 @@ spec:
 
 ### Run It
 
+**Interactive Mode (TUI)** - Just run the agent without `--input`:
+
+```bash
+# Launch interactive chat TUI
+aofctl run agent docker-health.yaml
+```
+
+This opens a full-featured terminal UI with:
+- **Chat Panel** - Conversation history with syntax highlighting
+- **Activity Log** - Real-time agent activity (thinking, tool calls, LLM calls)
+- **Context Gauge** - Token usage and execution time
+- **Keyboard Shortcuts** - Press `?` for help
+
+**Keyboard Shortcuts:**
+| Key | Action |
+|-----|--------|
+| `Enter` | Send message |
+| `ESC` | Cancel running agent |
+| `?` | Toggle help panel |
+| `Ctrl+S` | Save session |
+| `Ctrl+L` | Clear / New session |
+| `Ctrl+C` | Quit |
+
+**Non-Interactive Mode** - For scripts and automation:
+
 ```bash
-# Check container status
+# Single query with --input flag
 aofctl run agent docker-health.yaml --input "what containers are running?"
 
 # Get more details
@@ -85,6 +110,19 @@ aofctl run agent docker-health.yaml --input "show me stats for all containers"
 aofctl run agent docker-health.yaml --input "check logs for any unhealthy containers"
 ```
 
+**Resume Previous Sessions:**
+
+```bash
+# Resume the latest session
+aofctl run agent docker-health.yaml --resume
+
+# List all saved sessions
+aofctl get sessions
+
+# Resume a specific session by ID
+aofctl run agent docker-health.yaml --session abc12345
+```
+
 **That's it!** You have a working AI agent.
 
 ## Your First Fleet (5 minutes)
diff --git a/docs/internal/tui-enhancement-plan.md b/docs/internal/tui-enhancement-plan.md
new file mode 100644
index 0000000..4eb2886
--- /dev/null
+++ b/docs/internal/tui-enhancement-plan.md
@@ -0,0 +1,185 @@
+# TUI Enhancement Plan
+
+## Overview
+
+Enhance the AOF agentic console TUI to provide a sophisticated, LazyGit-inspired experience with rich agent activity logging, cancellation support, and conversation persistence.
+
+## Current State
+
+The current TUI (`crates/aofctl/src/commands/run.rs`) provides:
+- Two-column layout (60% chat, 40% system log + context usage)
+- Chat history with user/assistant/error messages
+- Token usage gauge
+- Spinner animation during execution
+- Basic keyboard navigation (scroll, enter, ctrl+c)
+- Tracing log capture (but system log panel is mostly empty)
+
+## Enhancements
+
+### 1. Agent Activity Logging (System Log Panel)
+
+**Goal**: Make the System Log panel show real-time agent activity.
+
+**Activity Types to Log**:
+- 🧠 **Thinking**: "Analyzing user request..."
+- 🔍 **Analyzing**: "Examining context from previous messages..."
+- 🛠️ **Tool Use**: "Executing: kubectl get pods -n default"
+- ⏳ **Waiting**: "Waiting for LLM response..."
+- ✓ **Complete**: "Tool execution completed in 234ms"
+- ⚠️ **Warning**: "Approaching token limit (85%)"
+- 📊 **Metrics**: "Input: 150 tokens, Output: 420 tokens"
+
+**Implementation**:
+- Create `AgentActivityLog` struct with activity types
+- Add activity channel from executor to TUI
+- Emit activities at key execution points:
+  - Before LLM call
+  - During tool discovery
+  - Before/after each tool execution
+  - On response parsing
+  - On error conditions
+
+### 2. Escape Key Cancellation
+
+**Goal**: Allow users to stop a running agent with Escape key.
+
+**Implementation**:
+- Add `CancellationToken` from `tokio_util`
+- Pass token to executor
+- Check for Escape key during execution loop
+- Trigger graceful cancellation
+- Show "Cancelled by user" in chat
+
+**UI Changes**:
+- Show "Press ESC to cancel" in footer during execution
+- Show cancellation status in system log
+
+### 3. Conversation Persistence
+
+**Goal**: Save conversation history for later resumption.
+
+**File Format**: JSON (human-readable, easy to inspect)
+
+**Session File Structure**:
+```json
+{
+  "session_id": "uuid",
+  "agent_name": "k8s-helper",
+  "model": "google:gemini-2.5-flash",
+  "created_at": "2024-01-23T12:00:00Z",
+  "updated_at": "2024-01-23T12:30:00Z",
+  "messages": [
+    {"role": "user", "content": "list pods", "timestamp": "..."},
+    {"role": "assistant", "content": "...", "timestamp": "...", "tokens": {"in": 50, "out": 120}}
+  ],
+  "token_usage": {"input": 500, "output": 1200},
+  "activity_log": [...]
+}
+```
+
+**Storage Location**: `~/.aof/sessions/<agent-name>/<session-id>.json`
+
+**Commands**:
+- `aofctl run agent -f agent.yaml --resume` - Resume latest session
+- `aofctl run agent -f agent.yaml --resume <session-id>` - Resume specific session
+- `aofctl sessions list` - List saved sessions
+- `aofctl sessions delete <session-id>` - Delete session
+
+### 4. LazyGit-Inspired UI Enhancements
+
+**Visual Improvements**:
+- Better border styling (rounded corners option)
+- Color-coded activity types in system log
+- Keyboard shortcuts panel (toggle with `?`)
+- Status indicators with icons
+- Progress bars for long operations
+- Breadcrumb navigation
+
+**New Panels/Features**:
+- Help overlay (press `?`)
+- Activity filter (press `f` to filter log types)
+- Compact mode toggle (press `c`)
+- Session info panel
+
+**Color Scheme** (keeping minimalist but adding semantic colors):
+- White: Primary text
+- Gray: Secondary/dimmed
+- Cyan: Thinking/analyzing activities
+- Yellow: Tool execution
+- Green: Success/complete
+- Red: Errors
+- Magenta: System messages
+
+### 5. Enhanced Keybindings
+
+| Key | Action |
+|-----|--------|
+| `Enter` | Send message |
+| `Esc` | Cancel running agent / Close popup |
+| `Ctrl+C` | Quit application |
+| `?` | Toggle help panel |
+| `f` | Toggle activity filter |
+| `c` | Toggle compact mode |
+| `Shift+↑/↓` | Scroll chat |
+| `PageUp/Down` | Scroll chat (5 lines) |
+| `Tab` | Switch focus between panels |
+| `Ctrl+S` | Save session manually |
+| `Ctrl+L` | Clear chat (new session) |
+| `/` | Search in history |
+
+### 6. Footer Enhancements
+
+**Current Footer**:
+```
+✓ Completed │ 5 messages │ Model: google:gemini-2.5-flash │ Tools: shell, kubectl │ Last: 234ms
+```
+
+**Enhanced Footer** (context-aware):
+```
+[Idle] ✓ 5 msgs │ google:gemini-2.5-flash │ 3 tools │ IN: 500 OUT: 1.2K (1.7K total) │ ?:help ESC:cancel
+```
+
+```
+[Running] ◐ 2.3s │ Executing tool: kubectl │ ESC to cancel
+```
+
+## Implementation Order
+
+1. **Phase 1: Activity Logging** (Priority: High)
+   - Add activity events to executor
+   - Display in system log panel
+   - Color-code by activity type
+
+2. **Phase 2: Cancellation** (Priority: High)
+   - Add CancellationToken support
+   - Handle Escape key
+   - Graceful cleanup
+
+3. **Phase 3: Session Persistence** (Priority: Medium)
+   - Create session file format
+   - Auto-save on exit
+   - Resume from file
+
+4. **Phase 4: UI Polish** (Priority: Medium)
+   - Help overlay
+   - Enhanced keybindings
+   - Better styling
+
+5. **Phase 5: Advanced Features** (Priority: Low)
+   - Search in history
+   - Activity filters
+   - Compact mode
+
+## Files to Modify
+
+- `crates/aofctl/src/commands/run.rs` - Main TUI implementation
+- `crates/aof-runtime/src/executor/mod.rs` - Add activity events
+- `crates/aof-runtime/src/executor/agent_executor.rs` - Emit activities
+- `crates/aofctl/src/cli.rs` - Add --resume flag
+- `crates/aofctl/src/commands/mod.rs` - Add sessions command
+
+## New Files to Create
+
+- `crates/aofctl/src/session.rs` - Session management
+- `crates/aof-core/src/activity.rs` - Activity event types
+- `docs/guides/tui-guide.md` - TUI documentation
diff --git a/docs/reference/agent-spec.md b/docs/reference/agent-spec.md
index 2bedb79..e74c948 100644
--- a/docs/reference/agent-spec.md
+++ b/docs/reference/agent-spec.md
@@ -38,6 +38,10 @@ spec:
       args: []
       env: {}
   memory: string|object     # Optional: "InMemory", "File:./path", or structured config
+  output_schema:            # Optional: JSON Schema for structured responses
+    type: string            # object, array, string, number, boolean
+    properties: {}          # Property definitions (for object type)
+    required: []            # Required fields
 ```
 
 ## Metadata Fields
@@ -481,6 +485,108 @@ spec:
 
 ---
 
+## Output Schema (Structured I/O)
+
+### `spec.output_schema`
+**Type:** `object`
+**Required:** No
+**Description:** JSON Schema definition for structured agent responses. When specified, the agent will return validated JSON instead of free-form text.
+
+Structured I/O enables:
+- Type-safe agent responses
+- Validated, parseable output
+- Better composability in flows
+- Auto-generated documentation
+
+**Basic Example:**
+```yaml
+spec:
+  output_schema:
+    type: object
+    properties:
+      status:
+        type: string
+        enum: [healthy, degraded, critical]
+      message:
+        type: string
+      count:
+        type: integer
+    required: [status, message]
+```
+
+**Schema Fields:**
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `type` | string | Schema type: `object`, `array`, `string`, `number`, `boolean`, `integer` |
+| `properties` | object | Property definitions (for object types) |
+| `required` | array | Required property names |
+| `items` | object | Item schema (for array types) |
+| `enum` | array | Allowed values |
+| `description` | string | Field description |
+| `additionalProperties` | boolean | Allow extra properties (default: false) |
+| `validation_mode` | string | `strict` (default), `lenient`, or `coerce` |
+| `on_validation_error` | string | `fail` (default), `retry`, or `passthrough` |
+| `max_retries` | integer | Retry attempts on validation failure (default: 2) |
+
+**Advanced Example with Validation:**
+```yaml
+spec:
+  model: google:gemini-2.5-flash
+  instructions: |
+    Analyze infrastructure and report findings.
+    Always respond in the JSON format matching the schema.
+
+  output_schema:
+    type: object
+    properties:
+      status:
+        type: string
+        enum: [healthy, degraded, critical, unknown]
+        description: Overall health status
+      issues:
+        type: array
+        items:
+          type: object
+          properties:
+            resource:
+              type: string
+            severity:
+              type: string
+              enum: [low, medium, high, critical]
+            message:
+              type: string
+          required: [resource, severity, message]
+      summary:
+        type: string
+    required: [status, issues, summary]
+    validation_mode: strict
+    on_validation_error: retry
+    max_retries: 2
+```
+
+**Using Output in Flows:**
+```yaml
+# AgentFlow using structured output
+spec:
+  nodes:
+    - id: analyze
+      type: Agent
+      config:
+        agent: my-analyzer
+        prompt: "Check status"
+    - id: route
+      type: Conditional
+      config:
+        conditions:
+          - condition: "{{analyze.output.status}} == 'critical'"
+            target: alert
+```
+
+For comprehensive documentation on Structured I/O including all schema types, use cases, and best practices, see the [Structured I/O Reference](structured-io.md).
+
+---
+
 ## Complete Examples
 
 ### Minimal Agent
@@ -570,6 +676,55 @@ spec:
   max_context_messages: 30  # More context for complex DevOps tasks
 ```
 
+### Agent with Structured Output
+
+```yaml
+apiVersion: aof.dev/v1
+kind: Agent
+metadata:
+  name: incident-classifier
+
+spec:
+  model: google:gemini-2.5-flash
+
+  model_config:
+    temperature: 0.2  # Lower for consistent structured output
+
+  instructions: |
+    You are an incident classification system.
+    Analyze incident descriptions and classify them.
+    Always respond with valid JSON matching the schema.
+
+  output_schema:
+    type: object
+    properties:
+      severity:
+        type: string
+        enum: [P1, P2, P3, P4]
+        description: Priority level (P1=critical, P4=low)
+      category:
+        type: string
+        enum: [infrastructure, application, security, network, database]
+      affected_services:
+        type: array
+        items:
+          type: string
+      estimated_impact:
+        type: string
+      recommended_runbook:
+        type: string
+    required: [severity, category, affected_services]
+    validation_mode: strict
+    on_validation_error: retry
+    max_retries: 2
+
+  tools:
+    - shell
+    - kubectl
+
+  memory: "File:./incident-memory.json"
+```
+
 ---
 
 ## Best Practices
@@ -604,6 +759,15 @@ spec:
 - **Testing**: `"InMemory"` (clean state each run)
 - **Conversation History**: Use `"File:./path.json:N"` to keep last N interactions
 
+### Output Schema (Structured I/O)
+- ✅ Include schema requirements in instructions
+- ✅ Use descriptive field names and descriptions
+- ✅ Start with simple schemas and expand as needed
+- ✅ Use `enum` for fields with fixed values
+- ✅ Set `validation_mode: strict` for critical workflows
+- ❌ Don't create deeply nested schemas (>3 levels)
+- ❌ Don't mix free-form text with structured output
+
 ---
 
 ## Environment Variables
@@ -651,5 +815,6 @@ aofctl agent get my-agent -o yaml
 
 - AgentFleet Spec (coming soon) - Multi-agent teams
 - [AgentFlow Spec](agentflow-spec.md) - Workflow automation
+- [Structured I/O Reference](structured-io.md) - Output schemas and validation
 - [aofctl CLI](aofctl.md) - Command reference
 - [Examples](../examples/) - Copy-paste configurations
diff --git a/docs/reference/aofctl.md b/docs/reference/aofctl.md
index a68c03c..864b38f 100644
--- a/docs/reference/aofctl.md
+++ b/docs/reference/aofctl.md
@@ -87,6 +87,38 @@ aofctl get agents pod-doctor --library
 
 # Get library agents as JSON
 aofctl get agents --library -o json
+
+# List all saved sessions
+aofctl get sessions
+
+# List sessions for a specific agent
+aofctl get sessions my-agent
+```
+
+### Session Management
+
+List and manage saved conversation sessions:
+
+```bash
+# List all sessions across all agents
+aofctl get sessions
+
+# List sessions for a specific agent
+aofctl get sessions k8s-agent
+
+# Output in JSON format
+aofctl get sessions -o json
+```
+
+**Session Output:**
+```
+ID         AGENT              MODEL                    MSGS   TOKENS AGE
+abc12345   k8s-agent          google:gemini-2.5-flash    12     2450 2h
+def67890   researcher-agent   anthropic:claude-3-5-sonnet    8     1830 1d
+
+To resume a session:
+  aofctl run agent <config.yaml> --resume
+  aofctl run agent <config.yaml> --session <session-id>
 ```
 
 **Output (default):**
@@ -187,6 +219,10 @@ aofctl run <resource_type> <name_or_file> [flags]
 **Flags:**
 - `-i, --input string` - Input/query for the agent (alias: `--prompt`)
 - `-o, --output string` - Output format: json|yaml|text (default: text)
+- `--output-schema string` - Output schema for structured responses
+- `--output-schema-file string` - Path to JSON schema file
+- `--resume` - Resume the latest session for this agent (interactive mode only)
+- `--session string` - Resume a specific session by ID (interactive mode only)
 
 **Agent Sources:**
 - **File path**: `aofctl run agent my-agent.yaml`
@@ -194,9 +230,15 @@ aofctl run <resource_type> <name_or_file> [flags]
 
 **Examples:**
 ```bash
-# Interactive mode (opens REPL)
+# Interactive mode (opens TUI)
 aofctl run agent my-agent.yaml
 
+# Resume previous conversation
+aofctl run agent my-agent.yaml --resume
+
+# Resume specific session by ID
+aofctl run agent my-agent.yaml --session abc12345
+
 # With query (non-interactive)
 aofctl run agent my-agent.yaml --input "Show me all pods"
 
@@ -213,6 +255,48 @@ aofctl run agent library://incident/rca-agent --prompt "Analyze high latency" -o
 aofctl run workflow incident-response.yaml
 ```
 
+### Interactive TUI Mode
+
+When running an agent without the `--input` option, aofctl launches an interactive terminal user interface (TUI) with:
+
+- **Chat Panel**: Shows conversation history with syntax highlighting
+- **Activity Log**: Real-time display of agent activities (thinking, tool use, LLM calls)
+- **Context Gauge**: Shows token usage and execution time
+- **Input Bar**: Type messages to send to the agent
+- **Help Panel**: Press `?` to view keyboard shortcuts
+
+**Keyboard Shortcuts:**
+
+| Key | Action |
+|-----|--------|
+| `Enter` | Send message to agent |
+| `ESC` | Cancel running execution / Close help panel |
+| `?` | Toggle help panel |
+| `Ctrl+S` | Save session manually |
+| `Ctrl+L` | Clear chat / Start new session |
+| `Ctrl+C` | Quit application |
+| `Shift+↑/↓` | Scroll chat history |
+| `PageUp/Down` | Scroll 5 lines |
+
+**Activity Log Events:**
+
+The activity log shows real-time agent status:
+- 🤔 **Thinking** - Agent is processing
+- 🔍 **Analyzing** - Agent is analyzing input
+- 📡 **LLM Call** - Calling the language model
+- 🔧 **Tool Use** - Executing a tool
+- ✓ **Tool Complete** - Tool execution finished
+- ⚠ **Warning** - Non-critical issue
+- ✗ **Error** - Execution error
+
+**Session Persistence:**
+
+Sessions are automatically saved to `~/.aof/sessions/<agent-name>/` and include:
+- Complete conversation history
+- Token usage statistics
+- Activity log entries
+- Timestamps for each message
+
 **Library Domains:**
 - `kubernetes` - Pod diagnostics, resource optimization
 - `incident` - RCA, incident command, postmortems
diff --git a/docs/reference/structured-io.md b/docs/reference/structured-io.md
index d241714..9d6632e 100644
--- a/docs/reference/structured-io.md
+++ b/docs/reference/structured-io.md
@@ -451,6 +451,6 @@ output_schema:
 
 ## Related
 
-- [Agent Configuration](/docs/reference/agent-spec)
-- [AgentFlow Variables](/docs/agentflow/variables)
+- [Agent Configuration](agent-spec.md)
+- [AgentFlow Specification](agentflow-spec.md)
 - [JSON Schema Reference](https://json-schema.org/)
diff --git a/docs/user/CLI_REFERENCE.md b/docs/user/CLI_REFERENCE.md
index eb35431..e88d022 100644
--- a/docs/user/CLI_REFERENCE.md
+++ b/docs/user/CLI_REFERENCE.md
@@ -79,6 +79,10 @@ aofctl run agent <config-file> [options]
 **Options:**
 - `-i, --input <input>`: Input/query for the agent
 - `-o, --output <format>`: Output format (json, yaml, text) [default: text]
+- `--output-schema <schema>`: Output schema for structured responses
+- `--output-schema-file <file>`: Path to JSON schema file
+- `--resume`: Resume the latest session for this agent (interactive mode only)
+- `--session <id>`: Resume a specific session by ID (interactive mode only)
 
 **Examples:**
 ```bash
@@ -88,6 +92,15 @@ aofctl run agent k8s-agent.yaml --input "list all pods"
 # Run agent with JSON output
 aofctl run agent agent.yaml -i "summarize logs" -o json
 
+# Run agent in interactive TUI mode (no input provided)
+aofctl run agent k8s-agent.yaml
+
+# Resume previous conversation session
+aofctl run agent k8s-agent.yaml --resume
+
+# Resume a specific session by ID
+aofctl run agent k8s-agent.yaml --session abc12345
+
 # Run workflow with initial state
 aofctl run workflow incident-response.yaml --input '{"severity": "high", "incidentId": "INC-123"}'
 
@@ -101,6 +114,36 @@ aofctl run flow slack-bot-flow.yaml
 aofctl run flow slack-bot-flow.yaml --input '{"event": {"text": "show pods", "user": "U123", "channel": "C456"}}'
 ```
 
+#### Interactive TUI Mode
+
+When running an agent without the `--input` option, aofctl launches an interactive terminal user interface (TUI) with:
+
+- **Chat Panel**: Shows conversation history with syntax-highlighted messages
+- **Activity Log**: Real-time display of agent activities (thinking, tool use, etc.)
+- **Context Gauge**: Shows token usage and execution time
+- **Input Bar**: Type messages to send to the agent
+
+**Keyboard Shortcuts:**
+
+| Key | Action |
+|-----|--------|
+| `Enter` | Send message to agent |
+| `ESC` | Cancel running execution / Close help |
+| `?` | Toggle help panel |
+| `Ctrl+S` | Save session manually |
+| `Ctrl+L` | Clear chat / Start new session |
+| `Ctrl+C` | Quit application |
+| `Shift+↑/↓` | Scroll chat history |
+| `PageUp/Down` | Scroll 5 lines |
+
+**Session Persistence:**
+
+Sessions are automatically saved to `~/.aof/sessions/<agent-name>/` and include:
+- Complete conversation history
+- Token usage statistics
+- Activity log entries
+- Timestamps for each message
+
 ### get
 
 List resources in the system.
@@ -116,10 +159,12 @@ aofctl get <resource-type> [name] [options]
 - `tools` / `tool`: List available tools
 - `mcpservers` / `mcpserver`: List MCP servers
 - `jobs` / `job`: List running jobs
+- `sessions` / `session`: List saved conversation sessions
 
 **Options:**
 - `-o, --output <format>`: Output format (json, yaml, wide, name) [default: wide]
 - `--all-namespaces`: Show resources in all namespaces
+- `--library`: List resources from the built-in library
 
 **Examples:**
 ```bash
@@ -131,6 +176,41 @@ aofctl get agent my-agent -o yaml
 
 # List all MCP tools
 aofctl get tools -o json
+
+# List agents from the built-in library
+aofctl get agents --library
+
+# List all saved sessions
+aofctl get sessions
+
+# List sessions for a specific agent
+aofctl get sessions my-agent
+```
+
+#### Session Management
+
+List and manage saved conversation sessions:
+
+```bash
+# List all sessions across all agents
+aofctl get sessions
+
+# List sessions for a specific agent
+aofctl get sessions k8s-agent
+
+# Output in JSON format
+aofctl get sessions -o json
+```
+
+**Session Output:**
+```
+ID         AGENT              MODEL                    MSGS   TOKENS AGE
+abc12345   k8s-agent          google:gemini-2.5-flash    12     2450 2h
+def67890   researcher-agent   claude-sonnet-4             8     1830 1d
+
+To resume a session:
+  aofctl run agent <config.yaml> --resume
+  aofctl run agent <config.yaml> --session <session-id>
 ```
 
 ### apply
diff --git a/docusaurus-site/docusaurus.config.ts b/docusaurus-site/docusaurus.config.ts
index 3d85634..e4eb86c 100644
--- a/docusaurus-site/docusaurus.config.ts
+++ b/docusaurus-site/docusaurus.config.ts
@@ -171,8 +171,8 @@ const config: Config = {
               to: '/docs/reference/agentflow-spec',
             },
             {
-              label: 'GitHub Integration',
-              to: '/docs/reference/github-integration',
+              label: 'Structured I/O',
+              to: '/docs/reference/structured-io',
             },
             {
               label: 'CLI Reference',
diff --git a/docusaurus-site/sidebars.ts b/docusaurus-site/sidebars.ts
index a297ed5..9acd884 100644
--- a/docusaurus-site/sidebars.ts
+++ b/docusaurus-site/sidebars.ts
@@ -181,6 +181,7 @@ const sidebars: SidebarsConfig = {
         'reference/fleet-spec',
         'reference/trigger-spec',
         'reference/context-spec',
+        'reference/structured-io',
       ],
     },
     {