diff --git a/.claude/skills/instrument-agent b/.claude/skills/instrument-agent new file mode 120000 index 00000000..aebffa7c --- /dev/null +++ b/.claude/skills/instrument-agent @@ -0,0 +1 @@ +../../.agents/skills/instrument-agent \ No newline at end of file diff --git a/.claude/skills/setup-agent-replay b/.claude/skills/setup-agent-replay new file mode 120000 index 00000000..20753820 --- /dev/null +++ b/.claude/skills/setup-agent-replay @@ -0,0 +1 @@ +../../.agents/skills/setup-agent-replay \ No newline at end of file diff --git a/.codex/config.toml b/.codex/config.toml new file mode 100644 index 00000000..3c5b69ce --- /dev/null +++ b/.codex/config.toml @@ -0,0 +1,3 @@ +[mcp_servers.raindrop] +command = "/Users/jwu/.raindrop/bin/raindrop" +args = [ "workshop", "mcp" ] diff --git a/.cursor/mcp.json b/.cursor/mcp.json new file mode 100644 index 00000000..3486727c --- /dev/null +++ b/.cursor/mcp.json @@ -0,0 +1,11 @@ +{ + "mcpServers": { + "raindrop": { + "command": "/Users/jwu/.raindrop/bin/raindrop", + "args": [ + "workshop", + "mcp" + ] + } + } +} diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 00000000..3486727c --- /dev/null +++ b/.mcp.json @@ -0,0 +1,11 @@ +{ + "mcpServers": { + "raindrop": { + "command": "/Users/jwu/.raindrop/bin/raindrop", + "args": [ + "workshop", + "mcp" + ] + } + } +} diff --git a/CLAUDE.md b/CLAUDE.md index 71c4f1ce..b2e8e9a5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,38 +1,27 @@ -# StackMemory - Project Configuration +You are a senior Node.js/Express engineer working on this codebase. Write working code over explanations. Run commands before asserting state — never assume branch, file, or test status without verification. + +# croissant.ai — Agent Guide + +Tool-agnostic reference for AI coding agents working in this repository. + +## Stack + +Node.js / Express / PostgreSQL / Redis +Railway deployment | Stripe / Salesforce / QuickBooks integrations ## Project Structure ``` src/ - cli/ # CLI commands and entry point - core/ # Core business logic - config/ # Config types and manager - context/ # Frame management, enrichment, rehydration - database/ # SQLite adapter, migrations, query cache - digest/ # Digest generation (hybrid, chronological) - errors/ # Error types and recovery - merge/ # Stack merge and conflict resolution - models/ # Model routing, complexity scoring - monitoring/ # Logging, metrics, session monitor - performance/ # Caching, profiling, benchmarks - query/ # Query parsing and routing - retrieval/ # Context retrieval, LLM provider - session/ # Handoff, session management - skills/ # Skill storage and types - storage/ # Tiered storage, remote sync - trace/ # Debug tracing, trace detection - integrations/ # External integrations - claude-code/ # Agent bridge, post-task hooks - linear/ # Linear sync, webhooks, OAuth - mcp/ # MCP server, 56 tool handlers - ralph/ # Multi-agent swarm orchestration - daemon/ # Unified daemon, session daemon - features/ # Analytics, browser, sweep, TUI - hooks/ # Claude Code hook handlers - skills/ # Built-in skill implementations - utils/ # Shared utilities -scripts/ # Build and utility scripts -docs/ # Documentation + api/ # Route handlers + core/ # monitoring-service, cache-service, queue-service, master-agent, api-validation + features/ # Feature modules + shared/ # Shared utilities + integrations/ # Third-party connectors +docs/ # Documentation +scripts/ # Automation scripts +docker/ # Container configs +prompts/ # Externalized LLM prompt templates ``` ## Key Files @@ -69,81 +58,64 @@ Full documentation (docs/): ## Commands ```bash -npm run build # Compile TypeScript (esbuild) -npm run lint # ESLint check -npm run lint:fix # Auto-fix lint issues -npm run lint:fast # Fast lint via oxlint -npm run typecheck # tsc --noEmit (8GB heap, avoids OOM) -npm test # Run Vitest (watch) -npm run test:run # Run tests once -npm run linear:sync # Sync with Linear - -# StackMemory CLI -stackmemory capture # Save session state for handoff -stackmemory restore # Restore from captured state -stackmemory snapshot save # Post-run context snapshot (alias: snap) -stackmemory snapshot list # List recent snapshots -stackmemory preflight # File overlap check for parallel tasks (alias: pf) -stackmemory conductor start # Autonomous Linear→worktree→agent orchestrator -stackmemory conductor learn # Analyze agent outcomes (success rate, failure phases, error patterns) -stackmemory conductor learn --evolve # Auto-mutate prompt template from failure data (GEPA) -stackmemory conductor status # Live agent status dashboard - -# GEPA Optimizer (scripts/gepa/optimize.js) -node scripts/gepa/optimize.js run [gens] [--auto-apply] # Full optimization loop -node scripts/gepa/optimize.js score [--auto-apply] # Score variants, select best -node scripts/gepa/optimize.js run --target skill:start # Optimize specific target -node scripts/gepa/optimize.js mutate --auto-phase # Auto-detect worst phase -# Flags: --auto-apply (deploy winner), --no-cache (fresh eval), --target , --phase -stackmemory conductor monitor # Real-time TUI with phase tracking -stackmemory conductor finalize # Clean up dead/stale agents -stackmemory conductor traces # View conversation traces for an agent run -stackmemory conductor replay # Replay full agent conversation from traces -stackmemory conductor trace-stats # Aggregate trace statistics -stackmemory loop "" --until "" # Poll until condition met (alias: watch) +npm run dev # Start dev server +npm run test # Run test suites (3 parallel Jest workers, maxWorkers=4) +npm run lint # Lint check +npm run migrate # Run DB migrations +docker-compose up -d # Start local DBs ``` -## Working Directory +## Git Conventions + +- Branch prefixes: `feature/`, `fix/`, `chore/` +- Commit format: `type(scope): message` +- Do NOT add `Co-Authored-By` lines to commits +- Pre-commit hook runs: `npm run lint` + `npm run test` + E2E browser screenshots + +## Testing Rules -- PRIMARY: /Users/jwu/Dev/stackmemory -- ALLOWED: All subdirectories -- TEMP: /tmp for temporary operations +- **Framework**: Jest + SWC +- **DB mocking**: Use dependency injection (DI), not global mocks +- **Supertest**: Pass `app` (NOT `server`) to supertest +- **Global jest**: src/ tests use global `jest` — do NOT import from `@jest/globals` (causes redeclaration errors) +- **Mock reset**: `jest.clearAllMocks()` resets `mockReturnValue` — always re-set mocks in `beforeEach` +- **Test runner**: `npm test` is long-running; run in a background process or sub-agent, not inline -## Validation +## ESLint Rules -Verify each step after code changes — pre-commit hooks catch 80% of CI failures locally: -1. `npm run lint` - fix any errors AND warnings -2. `npm run test:run` - verify no regressions -3. `npm run build` - ensure compilation -4. Run code to verify it works +- Use `catch {}` not `catch (_err) {}` — underscore prefix not in the allowed pattern +- CJS format for JS files in `src/` -Test coverage: -- New features require tests in `src/**/__tests__/` -- Maintain or improve coverage (no untested code paths) -- Critical paths: context management, handoff, Linear sync +## Key Patterns -Testing rules: -- Run `npm run test:run` via subagent or background task — never inline (blocks context) -- ESLint: use `catch {}` not `catch (_err) {}` (lint rule) -- `vi.clearAllMocks()` resets `mockReturnValue` — re-set mocks in `beforeEach` -- Pre-commit hook runs: lint + parallel vitest + build — fix issues before commit, never skip +- Provenance tracking: every data point includes source, timestamp, lineage +- Multi-tenant container isolation +- DI route factories for testability +- Error handling: return undefined over throwing; log and continue over crashing +- Add `.js` extension to relative ESM imports -## Git Rules +## Task Steering -The pre-commit hook enforces lint + test + build. Fix the underlying issue rather than bypassing it. +**`master-tasks.md`** is the single source of truth for what to build. Agents must: -- Do not use `--no-verify` on git push or commit — fix the hook failure instead -- Fix lint/test errors before pushing -- If pre-push hooks fail, fix the underlying issue -- Run `npm run lint && npm run test:run` before pushing -- Commit message format: `type(scope): message` -- Branch naming: `feature/STA-XXX-description` | `fix/STA-XXX-description` | `chore/description` +1. Read `master-tasks.md` before starting work (especially via `/next`) +2. Pick the highest-priority (`P0` > `P1` > `P2`) non-blocked `todo` task +3. Prefer tasks with `owner=@agent` over `owner=@me` (unless user overrides) +4. Update task status to `active` when starting, `done` when complete +5. Add branch/PR info to the table row +6. Never create tasks in Linear or GitHub unless `sync` column says so -## Task Management +## StackMemory Context Rule -- Use TodoWrite for 3+ steps or multiple requests -- Keep one task in_progress at a time -- Update task status immediately on completion +- When an agent fetches conversation context for active work, it must pass the exact current assignment or question as `task_query`. +- Prefer the MCP shape: + - `org_id` + - `conversation_id` + - `worker_mode: true` + - `task_query` + - `recover_on_low_signal: true` +- Do not fetch raw `get_conversation` context for worker execution unless full transcript behavior is explicitly required. +- The current assignment is persisted under `.stackmemory/worker-context/current-assignment.json` so wrappers and hooks can auto-fill or enforce `task_query`. ## Security diff --git a/docs/plans/webhook-retry-exponential-backoff.md b/docs/plans/webhook-retry-exponential-backoff.md new file mode 100644 index 00000000..01b6e6c4 --- /dev/null +++ b/docs/plans/webhook-retry-exponential-backoff.md @@ -0,0 +1,94 @@ +# Plan: Webhook Retry with Exponential Backoff + +## Summary + +Add persistent retry with exponential backoff to webhook event processing. Replace the in-memory `eventQueue` in `webhook-server.ts` with a SQLite-backed delivery queue that tracks attempts, applies exponential backoff with jitter, and respects circuit breaker state. + +## Existing Infrastructure to Leverage + +- **`src/core/errors/recovery.ts`**: `retry()`, `calculateBackoff()`, `CircuitBreaker` — all production-ready +- **`src/integrations/linear/webhook-server.ts`**: Current in-memory queue (`eventQueue[]`, `processQueue()`) +- **`src/core/database/sqlite-adapter.ts`**: SQLite persistence layer +- **Error codes**: `LINEAR_WEBHOOK_FAILED`, `LINEAR_API_ERROR` already exist + +## Files to Change + +| File | Action | Purpose | +|---|---|---| +| `src/integrations/linear/webhook-retry.ts` | CREATE | Delivery queue + retry worker | +| `src/integrations/linear/webhook-server.ts` | MODIFY | Replace in-memory queue with persistent queue | +| `src/integrations/linear/__tests__/webhook-retry.test.ts` | CREATE | Tests for retry logic | + +## Data Model + +New table: `webhook_deliveries` (added inline in webhook-retry.ts, not in global migrations — this is integration-scoped) + +```sql +CREATE TABLE IF NOT EXISTS webhook_deliveries ( + id TEXT PRIMARY KEY, + event_type TEXT NOT NULL, + payload TEXT NOT NULL, + status TEXT NOT NULL DEFAULT 'pending', -- pending | processing | completed | failed | dead + attempts INTEGER NOT NULL DEFAULT 0, + max_attempts INTEGER NOT NULL DEFAULT 5, + next_retry_at INTEGER, -- unix ms + last_error TEXT, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL +); +CREATE INDEX IF NOT EXISTS idx_webhook_deliveries_status_retry + ON webhook_deliveries(status, next_retry_at); +``` + +## Implementation Steps + +### Step 1: Create `webhook-retry.ts` + +- `WebhookDeliveryQueue` class + - `constructor(dbPath: string, options?: RetryConfig)` — opens/creates SQLite DB, ensures table + - `enqueue(eventType: string, payload: object): string` — inserts delivery, returns ID + - `processNext(): Promise` — picks oldest `pending` or retriable `failed` delivery where `next_retry_at <= now`, marks `processing`, calls handler, updates status + - `startWorker(intervalMs?: number): void` — setInterval loop calling `processNext()` + - `stopWorker(): void` — clearInterval + - `getStats(): { pending, processing, completed, failed, dead }` — counts by status +- Uses `calculateBackoff()` from `recovery.ts` for next_retry_at computation +- Marks delivery `dead` after max_attempts exceeded +- Config: `{ maxAttempts: 5, initialDelay: 1000, maxDelay: 300_000, backoffFactor: 2 }` + +### Step 2: Modify `webhook-server.ts` + +- Replace `eventQueue: LinearWebhookPayload[]` with `WebhookDeliveryQueue` instance +- In webhook endpoint handler: call `queue.enqueue()` instead of `eventQueue.push()` +- Start worker in `start()`, stop in `stop()` +- Remove `processQueue()` method and `isProcessing` flag + +### Step 3: Write tests + +- Unit tests for `WebhookDeliveryQueue`: + - enqueue creates a delivery record + - processNext picks the oldest pending delivery + - failed delivery gets exponential backoff schedule + - delivery marked dead after max_attempts + - concurrent processNext doesn't double-process (status = processing guard) + - getStats returns correct counts + +## Acceptance Criteria + +- [x] Failed webhook events are retried up to 5 times with exponential backoff +- [x] Backoff schedule: 1s, 2s, 4s, 8s, 16s (capped at 300s) +- [x] Delivery state persisted in SQLite — survives process restart +- [x] Dead deliveries (exceeded max attempts) are logged but not retried +- [x] Existing webhook signature verification unchanged +- [x] Tests pass with 80%+ coverage on new code + +## Risks + +- **LOW**: SQLite write contention if webhook volume is high — mitigated by WAL mode (already used) +- **LOW**: Worker interval drift — acceptable for webhook retry cadence (not real-time) + +## Non-Goals + +- Redis/BullMQ queue (overkill for single-process webhook handler) +- Webhook delivery UI/dashboard +- Dead letter queue notification +- Outbound webhook sending (this is for processing *received* webhooks) diff --git a/docs/research/agent-readable-web-2026.md b/docs/research/agent-readable-web-2026.md new file mode 100644 index 00000000..f079b7de --- /dev/null +++ b/docs/research/agent-readable-web-2026.md @@ -0,0 +1,305 @@ +# The Agent-Readable Web: State of the Art (May 2026) +Research report on standards, conventions, and infrastructure that expose web data in structured formats for AI agents. + +* * * +## 1. llms.txt — The robots.txt for LLMs +**Spec**: Proposed by Jeremy Howard (Answer.AI/FastAI) on September 3, 2024. A Markdown file at `/llms.txt` with a curated list of links to a site's highest-value content plus a one-paragraph brand summary. Companion file `/llms-full.txt` includes full inline content (Vercel's is ~400k words). + +**Adoption**: ~10% of top 300k domains (SERanking, Nov 2025). BuiltWith tracked 844k+ implementations by Oct 2025. Notable adopters: Anthropic, Stripe, Cursor, Cloudflare, Vercel, Mintlify, Supabase, ElevenLabs. A directory at llms-text.com lists 784+ live examples. Walmart briefly had one (Nov 2025, removed by Jan 2026). + +**Reality check**: Major LLM crawlers (GPTBot, GoogleExtended, ClaudeBot) are **not** fetching it in meaningful volume. An XGBoost model for AI citation prediction _improved_ when the llms.txt variable was removed. 8/9 sites saw no measurable traffic change after implementation. Google's Gary Illyes confirmed Google doesn't support it; John Mueller compared it to the discredited keywords meta tag. + +**Where it works**: IDE agents (Cursor, Windsurf, Claude Code, GitHub Copilot, Cline, Aider) **do** fetch `/llms.txt` routinely. LangChain's `mcpdoc` MCP server exposes llms.txt files to host apps. It's a **developer-experience play**, not an SEO play — the first standardized B2A (Business-to-Agent) surface. + +**Verdict**: Low cost (~half day), no proven SEO benefit, real value for developer tooling. Not dead, but not the standard it's being sold as. Google included it in their A2A protocol docs, signaling experimental interest. + +* * * +## 2. MCP (Model Context Protocol) — The USB-C of AI +**Origin**: Open-sourced by Anthropic November 2024. Standardizes how agents connect to external tools, databases, and APIs. Uses Streamable HTTP transport (formerly SSE). + +**Adoption — beyond Anthropic**: + +| Platform | Status | +| --- | --- | +| OpenAI | ChatGPT + API support (March 2025) | +| Google | Gemini API + Vertex AI Agent Builder (Q1 2026) | +| Microsoft | MCP servers for GitHub, Azure, Teams, M365 | +| IDE ecosystem | VS Code, Cursor, Windsurf, Cline — native | +| Agent frameworks | 92% of new frameworks ship MCP support (LangGraph, CrewAI, AutoGen) | + +**Scale**: 97M+ monthly SDK downloads, 10k+ active servers in production, 9,400+ public servers by April 2026. 78% enterprise team adoption. + +**Governance**: Donated to **Linux Foundation's Agentic AI Foundation (AAIF)** in December 2025, co-founded by Anthropic, Block, OpenAI, with AWS, Google, Microsoft as members. + +**Security**: June 2025 update mandated PKCE (OAuth 2.1), Resource Indicators (RFC 8707), and explicitly prohibited token passthrough. Anonymous Dynamic Client Registration remains a concern for enterprises. + +**2026 Roadmap**: Stateless Streamable HTTP across server instances, enterprise auth with SSO, gateway/proxy patterns, triggers and event-driven updates. + +**Verdict**: MCP crossed from "Anthropic-led" to "industry-default" between July 2025 and February 2026. It is the de facto standard for agent-to-tool communication. The protocol closest to "won." + +* * * +## 3. A2A (Agent-to-Agent Protocol) — Google's Answer +**What**: Google-introduced (April 2025) protocol for multi-agent systems. Uses HTTP + SSE + JSON-RPC 2.0. Agents advertise capabilities via **Agent Cards**. + +**Scope**: Where MCP connects agents to tools, A2A connects agents to agents. Complementary, not competing. + +**Adoption**: 150+ organizations by April 2026 (Google, Microsoft, AWS, Salesforce, SAP, ServiceNow, Workday, IBM). Donated to Linux Foundation June 2025. v0.3 added gRPC support and signed security cards. **v1.0 announced at Google Cloud Next 2026**. + +**Architecture**: Agent Cards (capability discovery) + Tasks (work units) + HTTP/SSE/JSON-RPC transport. Azure AI Foundry, Amazon Bedrock AgentCore, and Google Cloud all integrated natively. + +* * * +## 4. Other Emerging Protocols +| Protocol | Origin | Transport | Purpose | +| --- | --- | --- | --- | +| **ACP** (Agent Communication Protocol) | IBM | REST/HTTP | Enterprise MIME-typed multipart messages, RBAC + DID auth | +| **AGP** (Agent Gateway Protocol) | Community | gRPC/HTTP2 + Protobuf | High-throughput messaging between distributed agents | +| **ANP** (Agent Network Protocol) | Community | Decentralized | Open agent marketplaces | +| **WebMCP** | Google I/O 2026 | Web-native | Default contract for agent-facing web products | +| **VOIX** | TU Darmstadt | HTML `` + `` tags | Declarative agent-web interaction directly in HTML | + +**W3C AI Agent Protocol Community Group** is working toward official web standards for agent communication, with specifications expected 2026-2027. + +**NIST** released a concept paper on "Accelerating the Adoption of Software and AI Agent Identity and Authorization" (public comments closed April 2, 2026) — first federal-level effort on agent identity governance. + +* * * +## 5. JSON Feed +**Spec**: JSON-based web syndication format (v1.1), alternative to RSS/Atom. Created 2017 by Manton Reece and Brent Simmons. MIME type: `application/feed+json`. + +**Adoption**: Supported by NetNewsWire, NewsBlur, ReadKit, Reeder, Micro.blog, NPR. Lower adoption than RSS/Atom since CMS platforms have no incentive to switch. + +**AI relevance**: Easier to parse than XML-based feeds. `feed-mcp` is an open-source MCP server that exposes RSS/Atom/JSON feeds to AI agents. Structured content updates improve freshness signals for AI training data. + +**Verdict**: Niche but useful. If you're building new infrastructure, JSON Feed is simpler than RSS. But RSS remains the pragmatic default. + +* * * +## 6. Structured Data: JSON-LD + Schema.org +**Adoption**: ~47.6% of top 10M websites include at least one JSON-LD block (Common Crawl, July 2025). Google recommends JSON-LD as the preferred schema format. + +**AI impact**: Pages with valid structured data are **2.3x more likely** to appear in Google AI Overviews (Semrush 2025). Princeton GEO research found up to **40% higher visibility** in AI-generated responses for content with clear structural signals. + +**Who consumes it**: ChatGPT, Perplexity, Google AI Overviews, and AI agents all parse JSON-LD when browsing pages. It provides high-confidence facts that LLMs cite directly. + +**Caveat**: Some agents strip `