diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 4d6d4c0f..f45726ae 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -19,7 +19,7 @@ jobs:
       - uses: actions/setup-python@v6
         with:
           python-version: '3.13'
-      - run: uv sync --extra dev --extra rest --extra binary --extra vectorstores-sqlite-vec --extra openai-embeddings
+      - run: uv sync --extra dev --extra binary --extra vectorstores-sqlite-vec --extra vectorstores-pgvector --extra openai-embeddings
       - run: uv run pytest --cov --cov-report=term-missing --durations=50
 
   report-failure:
diff --git a/.github/workflows/pr-gate.yml b/.github/workflows/pr-gate.yml
index be04bcc4..c0ef76d4 100644
--- a/.github/workflows/pr-gate.yml
+++ b/.github/workflows/pr-gate.yml
@@ -57,7 +57,7 @@ jobs:
       - uses: actions/setup-python@v6
         with:
           python-version: '3.13'
-      - run: uv sync --extra dev --extra rest --extra binary --extra vectorstores-sqlite-vec --extra openai-embeddings
+      - run: uv sync --extra dev --extra binary --extra vectorstores-sqlite-vec --extra openai-embeddings
       - run: uv run pyright
 
   test:
@@ -72,7 +72,7 @@ jobs:
       - uses: actions/setup-python@v6
         with:
           python-version: '3.13'
-      - run: uv sync --extra dev --extra rest --extra binary --extra vectorstores-sqlite-vec --extra openai-embeddings
+      - run: uv sync --extra dev --extra binary --extra vectorstores-sqlite-vec --extra vectorstores-pgvector --extra openai-embeddings
       - run: uv run pytest -m "not nightly" --cov --cov-report=term-missing
 
   build:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b526f139..ac506986 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 Copyright 2026 Firefly Software Foundation. Licensed under the Apache License 2.0.
 
+## [26.05.33] - 2026-05-31
+
+### Removed
+
+- **BREAKING — REST/queue exposure layer.** Deleted the `fireflyframework_agentic.exposure`
+  package (FastAPI app factory, HTTP/WS controllers, health probes, SSE, CORS/rate-limit/auth
+  middleware, and Kafka/RabbitMQ/Redis consumer/producer hosts), the `rest`/`kafka`/`rabbitmq`/
+  `redis`/`queues` extras, the `ExposureError`/`QueueConnectionError` exceptions, and the
+  REST-serving config fields `auth_api_keys`/`auth_bearer_tokens`/`cors_allowed_origins`.
+  Serving/hosting is now owned by the consuming service. The framework is a pure in-process
+  library: it serves no port and consumes no broker.
+- **BREAKING — service/infra observability.** Removed `observability.configure_exporters`
+  (global OTel SDK provider/exporter wiring), the W3C trace-context propagation helpers
+  (`inject_trace_context`/`extract_trace_context`/`get_trace_context`/`set_trace_context`/
+  `trace_context_scope`), the `WebhookSink`, and the `otlp_endpoint` config field. The
+  framework still emits model/agent spans/metrics via the OpenTelemetry API; configuring the
+  SDK/exporters and cross-service trace propagation is now the host's responsibility.
+- **BREAKING — inbound RBAC auth.** Removed `security.RBACManager`/`require_permission`, the
+  `rbac_enabled`/`rbac_jwt_secret`/`rbac_multi_tenant` config fields, and the `pyjwt`
+  dependency from the `security` extra (`cryptography` stays for `EncryptedMemoryStore`).
+  Inbound-request authorization is a hosting concern owned by the service.
+
+### Changed
+
+- **`experiments`/`lab` documented as optional** leaf developer-tooling modules (no code or
+  dependency change; they were already not imported by the core).
+
 ## [26.05.32] - 2026-05-31
 
 ### Fixed
diff --git a/README.md b/README.md
index 3af6679f..a7b407e6 100644
--- a/README.md
+++ b/README.md
@@ -40,23 +40,22 @@ Copyright 2026 Firefly Software Foundation. Licensed under the Apache License 2.
 model-agnostic agents with structured output. But a production GenAI system demands
 far more than a single agent call. You need to orchestrate multi-step reasoning,
 validate and retry LLM outputs against schemas, manage conversation memory across
-turns, observe every call with traces and metrics, run A/B experiments to compare
-models, and expose the whole thing over REST or message queues — all without coupling
-your domain logic to infrastructure concerns.
+turns, observe every call with traces and metrics, and run A/B experiments to compare
+models — all without coupling your domain logic to infrastructure concerns.
 
 **fireflyframework-agentic is the production framework built on top of Pydantic AI.**
-It extends the engine with six composable layers — from core configuration through
-agent management, intelligent reasoning, experimentation, pipeline orchestration,
-and service exposure — so that every concern has a dedicated, protocol-driven module.
+It extends the engine with composable layers — from core configuration through
+agent management, intelligent reasoning, experimentation, and pipeline orchestration —
+so that every concern has a dedicated, protocol-driven module.
 You write your business logic; the framework provides the architecture.
 
 **What "metaframework" means in practice:**
 
 - You keep Pydantic AI's familiar `Agent`, `Tool`, and `RunContext` APIs unchanged.
 - The framework wraps them with lifecycle hooks, registries, delegation routers,
-  memory managers, reasoning patterns, validation loops, DAG pipelines, and exposure
-  endpoints — all optional, all composable, all swappable through Python protocols.
-- No vendor lock-in: switch models, swap memory backends, or replace the REST layer
+  memory managers, reasoning patterns, validation loops, and DAG pipelines — all
+  optional, all composable, all swappable through Python protocols.
+- No vendor lock-in: switch models, swap memory backends, or replace components
   without touching your agent code.
 
 ---
@@ -64,12 +63,11 @@ You write your business logic; the framework provides the architecture.
 ## Key Principles
 
 1. **Protocol-driven contracts** — Every extension point is defined as a
-   `@runtime_checkable` `Protocol` or abstract base class. The framework ships thirteen
+   `@runtime_checkable` `Protocol` or abstract base class. The framework ships twelve
    protocols (`AgentLike`, `ToolProtocol`, `GuardProtocol`, `ReasoningPattern`,
    `DelegationStrategy`, `StepExecutor`, `CompressionStrategy`, `MemoryStore`,
-   `ValidationRule`, `Chunker`, `EmbeddingProtocol`, `VectorStoreProtocol`,
-   `QueueConsumer` / `QueueProducer`) so you can swap or extend any component
-   without modifying framework internals.
+   `ValidationRule`, `Chunker`, `EmbeddingProtocol`, `VectorStoreProtocol`) so you can
+   swap or extend any component without modifying framework internals.
 
 2. **Convention over configuration** — Sensible defaults everywhere.
    `FireflyAgenticConfig` is a Pydantic Settings singleton that reads from environment
@@ -77,16 +75,15 @@ You write your business logic; the framework provides the architecture.
    governs model defaults, retry counts, token limits, observability endpoints,
    memory backends, and validation thresholds — override only what you need.
 
-3. **Layered composition** — Six layers with strict top-down dependency flow:
-   **Core → Agent → Intelligence → Experimentation → Orchestration → Exposure**.
+3. **Layered composition** — Layers with strict top-down dependency flow:
+   **Core → Agent → Intelligence → Experimentation → Orchestration**.
    Higher layers depend on lower layers but never the reverse, keeping the
    dependency graph acyclic and each module independently testable.
 
-4. **Optional dependencies** — Heavy libraries (`fastapi`, `aiokafka`, `aio-pika`,
-   `redis`, `chromadb`, `pinecone`, `openai`) are declared as pip extras (`[rest]`,
-   `[kafka]`, `[rabbitmq]`, `[redis]`, `[openai-embeddings]`,
-   `[vectorstores-chroma]`, `[all]`). The core framework imports them lazily inside
-   factory functions so that you install only what your deployment requires.
+4. **Optional dependencies** — Heavy libraries (`chromadb`, `pinecone`, `openai`,
+   `asyncpg`) are declared as pip extras (`[openai-embeddings]`,
+   `[vectorstores-chroma]`, `[postgres]`, `[all]`). The core framework imports them
+   lazily inside factory functions so that you install only what your deployment requires.
 
 ---
 
@@ -94,11 +91,6 @@ You write your business logic; the framework provides the architecture.
 
 ```mermaid
 graph TD
-    subgraph Exposure Layer
-        REST["REST API<br/><small>create_agentic_app · SSE streaming<br/>health · middleware · router</small>"]
-        QUEUES["Message Queues<br/><small>Kafka · RabbitMQ · Redis<br/>consumers · producers · QueueRouter</small>"]
-    end
-
     subgraph Orchestration Layer
         PIPE["Pipeline / DAG Engine<br/><small>DAG · DAGNode · DAGEdge<br/>PipelineEngine · PipelineBuilder<br/>AgentStep · ReasoningStep · CallableStep<br/>FanOutStep · FanInStep<br/>EmbeddingStep · RetrievalStep</small>"]
     end
@@ -116,7 +108,7 @@ graph TD
     subgraph Intelligence Layer
         REASON["Reasoning Patterns<br/><small>ReAct · CoT · PlanAndExecute<br/>Reflexion · ToT · GoalDecomposition<br/>ReasoningPipeline</small>"]
         VAL["Validation & QoS<br/><small>OutputReviewer · OutputValidator<br/>ConfidenceScorer · ConsistencyChecker<br/>GroundingChecker · 5 rule types</small>"]
-        OBS["Observability<br/><small>FireflyTracer · FireflyMetrics<br/>FireflyEvents · UsageTracker<br/>CostCalculator · @traced · @metered<br/>configure_exporters</small>"]
+        OBS["Observability<br/><small>FireflyTracer · FireflyMetrics<br/>FireflyEvents · UsageTracker<br/>CostCalculator · @traced · @metered</small>"]
         EXPL["Explainability<br/><small>TraceRecorder · ExplanationGenerator<br/>AuditTrail · ReportBuilder</small>"]
     end
 
@@ -135,8 +127,6 @@ graph TD
         PLUG["Plugin System<br/><small>PluginDiscovery<br/>3 entry-point groups</small>"]
     end
 
-    REST --> PIPE
-    QUEUES --> PIPE
     PIPE --> AGT
     PIPE --> REASON
     PIPE --> VAL
@@ -213,15 +203,6 @@ classDiagram
         +name: str
         +validate(value) ValidationRuleResult
     }
-    class QueueConsumer {
-        <<Protocol>>
-        +start()
-        +stop()
-    }
-    class QueueProducer {
-        <<Protocol>>
-        +publish(message)
-    }
     class EmbeddingProtocol {
         <<Protocol>>
         +embed(texts) EmbeddingResult
@@ -265,12 +246,6 @@ classDiagram
     ValidationRule <|.. RangeRule
     ValidationRule <|.. EnumRule
     ValidationRule <|.. CustomRule
-    QueueConsumer <|.. KafkaAgentConsumer
-    QueueConsumer <|.. RabbitMQAgentConsumer
-    QueueConsumer <|.. RedisAgentConsumer
-    QueueProducer <|.. KafkaAgentProducer
-    QueueProducer <|.. RabbitMQAgentProducer
-    QueueProducer <|.. RedisAgentProducer
     EmbeddingProtocol <|.. BaseEmbedder
     VectorStoreProtocol <|.. BaseVectorStore
 ```
@@ -346,8 +321,9 @@ classDiagram
   tools, and reasoning steps. `FireflyMetrics` records tokens (total, prompt,
   completion), latency, cost, errors, and reasoning depth via the OTel metrics API.
   `FireflyEvents` emits structured log records. `@traced` and `@metered` decorators
-  instrument any function with one line. `configure_exporters` sets up OTLP or
-  console exporters. `UsageTracker` automatically records token usage, cost
+  instrument any function with one line. The framework emits model and agent
+  telemetry purely through the OpenTelemetry API; the host application owns OTel
+  SDK and exporter configuration. `UsageTracker` automatically records token usage, cost
   estimates, and latency for every agent run, reasoning step, and pipeline
   execution. `CostCalculator` supports a built-in static price table and optional
   `genai-prices` integration. Budget enforcement logs warnings when configurable
@@ -370,14 +346,11 @@ classDiagram
   `EvalDataset` loads/saves test cases from JSON. `ModelComparison` runs the
   same prompts across multiple agents for side-by-side analysis.
 
-- **Exposure** — `create_agentic_app()` produces a FastAPI application with
-  auto-generated `POST /agents/{name}/run` endpoints, SSE streaming via
-  `sse_stream`, health/readiness/liveness checks, CORS and request-ID middleware,
-  and multimodal input support. Queue consumers (`KafkaAgentConsumer`,
-  `RabbitMQAgentConsumer`, `RedisAgentConsumer`) route messages to agents.
-  Queue producers (`KafkaAgentProducer`, `RabbitMQAgentProducer`,
-  `RedisAgentProducer`) publish results back. `QueueRouter` provides
-  pattern-based message routing across agents.
+  > **Optional developer tooling.** `fireflyframework_agentic.experiments` (A/B
+  > experiments) and `fireflyframework_agentic.lab` (offline evaluation /
+  > benchmarking) are leaf modules — nothing in the core imports them and they add
+  > no third-party dependencies. Import them only if you run experiments or
+  > evaluations; agent-building consumers can ignore them.
 
 - **Embeddings** — `EmbeddingProtocol` (duck-typed) and `BaseEmbedder`
   (inheritance with auto-batching) provide provider-agnostic text embedding.
@@ -425,17 +398,12 @@ classDiagram
 
 **Optional dependencies** (installed via extras):
 
-- `[rest]` — [FastAPI](https://fastapi.tiangolo.com/) `>=0.115.0`, [Uvicorn](https://www.uvicorn.org/) `>=0.34.0`, [sse-starlette](https://github.com/sysid/sse-starlette) `>=2.0.0`
-- `[kafka]` — [aiokafka](https://aiokafka.readthedocs.io/) `>=0.12.0`
-- `[rabbitmq]` — [aio-pika](https://aio-pika.readthedocs.io/) `>=9.5.0`
-- `[redis]` — [redis-py](https://redis-py.readthedocs.io/) `>=5.2.0`
 - `[costs]` — [genai-prices](https://pypi.org/project/genai-prices/) for up-to-date LLM pricing data
-- `[queues]` — All queue backends (Kafka + RabbitMQ + Redis)
 - `[openai-embeddings]` — [openai](https://github.com/openai/openai-python) `>=1.0.0` for OpenAI/Azure embeddings
 - `[vectorstores-chroma]` — [chromadb](https://www.trychroma.com/) `>=0.5.0`
 - `[vectorstores-pinecone]` — [pinecone](https://www.pinecone.io/) `>=5.0.0`
 - `[vectorstores-qdrant]` — [qdrant-client](https://qdrant.tech/) `>=1.12.0`
-- `[all]` — Everything (REST + queues + embeddings + vector stores + costs + security + HTTP)
+- `[all]` — Everything (embeddings + vector stores + costs + security + HTTP)
 
 **LLM provider keys** (at least one):
 
@@ -490,11 +458,6 @@ uv sync --all-extras # or: pip install -e ".[all]"
 
 | Extra | What it adds | When you need it |
 |---|---|---|
-| `rest` | FastAPI, Uvicorn, SSE | Exposing agents as REST endpoints |
-| `kafka` | aiokafka | Consuming/producing via Apache Kafka |
-| `rabbitmq` | aio-pika | Consuming/producing via RabbitMQ |
-| `redis` | redis-py | Consuming/producing via Redis Pub/Sub |
-| `queues` | All of the above | Any message queue integration |
 | `postgres` | asyncpg, SQLAlchemy | PostgreSQL memory persistence |
 | `mongodb` | motor, pymongo | MongoDB memory persistence |
 | `security` | PyJWT, cryptography | RBAC, encryption, JWT auth |
@@ -657,34 +620,6 @@ results = await store.search_text("machine learning languages", top_k=1)
 print(results[0].document.text)  # Python is great for AI
 ```
 
-### 9. Expose via REST
-
-```python
-from fireflyframework_agentic.exposure.rest import create_agentic_app
-
-app = create_agentic_app(title="My GenAI Service")
-# uvicorn myapp:app --reload
-```
-
-### 10. Expose via Queues (Consumer)
-
-```python
-from fireflyframework_agentic.exposure.queues.kafka import KafkaAgentConsumer
-
-consumer = KafkaAgentConsumer("assistant", topic="requests", bootstrap_servers="localhost:9092")
-await consumer.start()
-```
-
-### 11. Publish via Queues (Producer)
-
-```python
-from fireflyframework_agentic.exposure.queues.kafka import KafkaAgentProducer
-
-producer = KafkaAgentProducer(topic="results", bootstrap_servers="localhost:9092")
-await producer.publish({"agent": "assistant", "output": "Done processing."})
-await producer.close()
-```
-
 ## Using in Jupyter Notebooks
 
 firefly-agentic works seamlessly in Jupyter notebooks and JupyterLab.
@@ -779,7 +714,7 @@ pipeline. Start here if you want to learn the framework thoroughly.
 **[docs/use-case-idp.md](docs/use-case-idp.md)** is a focused walkthrough of building a
 7-phase IDP pipeline that ingests, splits, classifies, extracts, validates, assembles,
 and explains data from corporate documents — using agents, reasoning, document splitting,
-content processing, validation, explainability, pipelines, and REST exposure.
+content processing, validation, explainability, and pipelines.
 
 ### Module Reference
 
@@ -801,8 +736,6 @@ Detailed guides for each module:
 - [Explainability](docs/explainability.md) — Decision recording, audit trails, reports
 - [Experiments](docs/experiments.md) — A/B testing, variant comparison
 - [Lab](docs/lab.md) — Benchmarks, datasets, evaluators
-- [Exposure REST](docs/exposure-rest.md) — FastAPI integration, SSE streaming
-- [Exposure Queues](docs/exposure-queues.md) — Kafka, RabbitMQ, Redis integration
 - Studio — moved to [fireflyframework-agentic-studio](https://github.com/fireflyframework/fireflyframework-agentic-studio)
 ---
 
diff --git a/docs/README.md b/docs/README.md
index 1063a382..9473b5f8 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -8,9 +8,9 @@ Copyright 2026 Firefly Software Foundation. Licensed under the Apache License 2.
 ---
 
 **fireflyframework-agentic** is the production-grade GenAI metaframework built on
-[Pydantic AI](https://ai.pydantic.dev/). It extends the engine with six composable
+[Pydantic AI](https://ai.pydantic.dev/). It extends the engine with composable
 layers — from core configuration through agent management, intelligent reasoning,
-experimentation, pipeline orchestration, and service exposure — so that every concern
+experimentation, and pipeline orchestration — so that every concern
 has a dedicated, protocol-driven module.
 
 ---
@@ -28,14 +28,14 @@ has a dedicated, protocol-driven module.
 
 ## Documentation Map
 
-The framework is organised into six layers. Each layer depends only on the layers
+The framework is organised into layered modules. Each layer depends only on the layers
 below it, keeping the dependency graph acyclic and each module independently testable.
 
 ### Core Layer
 
 | | |
 |---|---|
-| **[Architecture](architecture.md)** | Design principles, six-layer model, protocol hierarchy, dependency flow |
+| **[Architecture](architecture.md)** | Design principles, layered model, protocol hierarchy, dependency flow |
 
 ### Agent Layer
 
@@ -82,19 +82,16 @@ below it, keeping the dependency graph acyclic and each module independently tes
 | **[Experiments](experiments.md)** | `Experiment`, `Variant`, `ExperimentRunner`, `ExperimentTracker`, `VariantComparator` |
 | **[Lab](lab.md)** | `LabSession`, `Benchmark`, `EvalOrchestrator`, `EvalDataset`, `ModelComparison` |
 
+> **Optional developer tooling.** `experiments` and `lab` are leaf modules — nothing
+> in the core imports them and they add no third-party dependencies. Import them only
+> if you run experiments or evaluations; agent-building consumers can ignore them.
+
 ### Orchestration Layer
 
 | | |
 |---|---|
 | **[Pipeline](pipeline.md)** | `DAG`, `PipelineEngine`, `PipelineBuilder`, step types, parallel execution, retries |
 
-### Exposure Layer
-
-| | |
-|---|---|
-| **[REST Exposure](exposure-rest.md)** | `create_agentic_app()`, auto-generated routes, SSE streaming, WebSocket, auth middleware, conversation CRUD, rate limiting, health checks |
-| **[Queue Exposure](exposure-queues.md)** | Kafka, RabbitMQ, Redis consumers/producers, `QueueRouter` |
-
 ### Studio
 
 Studio (visual IDE, project API, scheduling, tunnel exposure, BPM tutorial)
@@ -109,7 +106,7 @@ lives in a separate repository:
 every concept from zero to expert through a real-world **Intelligent Document
 Processing** pipeline. It covers configuration, agents, tools, prompts, reasoning,
 content processing, memory, validation, pipelines, observability, explainability,
-experiments, lab, REST and queue exposure, deployment, and advanced patterns.
+experiments, lab, deployment, and advanced patterns.
 
 ---
 
diff --git a/docs/agents.md b/docs/agents.md
index 351577de..9a605956 100644
--- a/docs/agents.md
+++ b/docs/agents.md
@@ -909,15 +909,6 @@ async with await agent.run_stream("Question", streaming_mode="incremental") as s
         print(token, end="", flush=True)
 ```
 
-### REST API Integration
-
-The framework's REST API exposes both streaming modes:
-
-- **`POST /agents/{name}/stream`** — Buffered streaming (SSE)
-- **`POST /agents/{name}/stream/incremental`** — Incremental streaming (SSE)
-
-See [REST API Guide](exposure-rest.md) for details.
-
 ---
 
 ## Run Timeout
diff --git a/docs/architecture.md b/docs/architecture.md
index 560b6a74..ada45add 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -20,11 +20,11 @@ The framework follows four guiding principles:
    configuration and supports environment-variable overrides.
 
 3. **Layered composition** -- Modules are organised into layers (Core, Agent, Intelligence,
-   Experimentation, Exposure). Higher layers depend on lower layers but never the reverse.
+   Experimentation, Orchestration). Higher layers depend on lower layers but never the reverse.
 
-4. **Optional dependencies** -- Heavy third-party libraries (FastAPI, aiokafka, aio-pika,
-   redis) are declared as extras. The core framework imports them lazily so that users
-   only install what they need.
+4. **Optional dependencies** -- Heavy third-party libraries (embedding providers, vector
+   store clients, storage backends) are declared as extras. The core framework imports them
+   lazily so that users only install what they need.
 
 ---
 
@@ -32,11 +32,6 @@ The framework follows four guiding principles:
 
 ```mermaid
 graph TD
-    subgraph Exposure Layer
-        REST["REST API<br/><small>create_agentic_app · SSE streaming · WebSocket<br/>health · auth middleware · router · conversations<br/>RateLimiter</small>"]
-        QUEUES["Message Queues<br/><small>Kafka · RabbitMQ · Redis<br/>consumers · producers · QueueRouter</small>"]
-    end
-
     subgraph Orchestration Layer
         PIPE["Pipeline / DAG Engine<br/><small>DAG · DAGNode · DAGEdge<br/>PipelineEngine · PipelineBuilder · PipelineEventHandler<br/>AgentStep · ReasoningStep · CallableStep · BranchStep<br/>FanOutStep · FanInStep · exponential backoff + jitter</small>"]
     end
@@ -49,7 +44,7 @@ graph TD
     subgraph Intelligence Layer
         REASON["Reasoning Patterns<br/><small>ReAct · CoT · PlanAndExecute<br/>Reflexion · ToT · GoalDecomposition<br/>ReasoningPipeline</small>"]
         VAL["Validation & QoS<br/><small>OutputReviewer · OutputValidator<br/>ConfidenceScorer · ConsistencyChecker<br/>GroundingChecker · 5 rule types</small>"]
-        OBS["Observability<br/><small>FireflyTracer · FireflyMetrics<br/>FireflyEvents · UsageTracker<br/>CostCalculator · @traced · @metered<br/>configure_exporters</small>"]
+        OBS["Observability<br/><small>FireflyTracer · FireflyMetrics<br/>FireflyEvents · UsageTracker<br/>CostCalculator · @traced · @metered</small>"]
         EXPL["Explainability<br/><small>TraceRecorder · ExplanationGenerator<br/>AuditTrail · ReportBuilder</small>"]
     end
 
@@ -72,8 +67,6 @@ graph TD
         PLUG["Plugin System<br/><small>PluginDiscovery<br/>3 entry-point groups</small>"]
     end
 
-    REST --> PIPE
-    QUEUES --> PIPE
     PIPE --> AGT
     PIPE --> REASON
     PIPE --> VAL
@@ -146,15 +139,6 @@ classDiagram
         +name: str
         +validate(value) ValidationRuleResult
     }
-    class QueueConsumer {
-        <<Protocol>>
-        +start()
-        +stop()
-    }
-    class QueueProducer {
-        <<Protocol>>
-        +publish(message)
-    }
 
     AgentLike <|.. FireflyAgent
     AgentLike <|.. pydantic_ai.Agent
@@ -192,12 +176,6 @@ classDiagram
     ValidationRule <|.. RangeRule
     ValidationRule <|.. EnumRule
     ValidationRule <|.. CustomRule
-    QueueConsumer <|.. KafkaAgentConsumer
-    QueueConsumer <|.. RabbitMQAgentConsumer
-    QueueConsumer <|.. RedisAgentConsumer
-    QueueProducer <|.. KafkaAgentProducer
-    QueueProducer <|.. RabbitMQAgentProducer
-    QueueProducer <|.. RedisAgentProducer
 ```
 
 ---
@@ -314,14 +292,6 @@ a global registry, delegation strategies, and declarative decorators.
 - **pipeline/context.py** -- `PipelineContext` shared data bus.
 - **pipeline/result.py** -- `NodeResult`, `PipelineResult`, `ExecutionTraceEntry`.
 
-### Exposure Layer
-
-- **exposure/rest/** -- FastAPI application factory that auto-generates REST endpoints
-  for every registered agent, with rate limiting, authentication middleware,
-  WebSocket support, and conversation CRUD endpoints.
-- **exposure/queues/** -- Abstract consumer/producer with Kafka, RabbitMQ, and Redis
-  implementations and a pattern-based message router.
-
 ### Studio Layer
 
 - **studio/server.py** -- FastAPI application factory for Firefly Agentic Studio,
@@ -346,15 +316,13 @@ a global registry, delegation strategies, and declarative decorators.
 
 ## Request Flow
 
-The following diagram shows the typical lifecycle of a request entering through the
-REST exposure layer, being processed by an agent with reasoning, and producing
-observability and explainability artefacts.
+The following diagram shows the typical lifecycle of an in-process agent run: a caller
+resolves an agent from the registry and invokes it, the agent reasons with tools, and
+observability and explainability artefacts are produced.
 
 ```mermaid
 sequenceDiagram
-    participant Client
-    participant REST as REST API<br/>(create_agentic_app)
-    participant MW as Middleware<br/>(CORS · RequestID)
+    participant Caller
     participant Reg as AgentRegistry
     participant Agent as FireflyAgent
     participant Mem as MemoryManager
@@ -364,11 +332,9 @@ sequenceDiagram
     participant OBS as FireflyTracer<br/>FireflyMetrics
     participant EXPL as TraceRecorder<br/>AuditTrail
 
-    Client->>REST: POST /agents/{name}/run
-    REST->>MW: apply middleware chain
-    MW->>Reg: agent_registry.get(name)
-    Reg-->>MW: FireflyAgent instance
-    MW->>Agent: agent.run(prompt, conversation_id)
+    Caller->>Reg: agent_registry.get(name)
+    Reg-->>Caller: FireflyAgent instance
+    Caller->>Agent: agent.run(prompt, conversation_id)
     Agent->>OBS: tracer.start_span("agent.run")
     Agent->>Mem: load conversation history
     Mem-->>Agent: message_history
@@ -386,8 +352,7 @@ sequenceDiagram
     Agent->>Mem: save conversation turn
     Agent->>OBS: tracer.end_span() · metrics.record_latency()
     Agent->>EXPL: audit_trail.append()
-    Agent-->>REST: AgentResponse
-    REST-->>Client: JSON response (or SSE stream)
+    Agent-->>Caller: AgentResponse
 ```
 
 ### Pipeline Execution Flow
diff --git a/docs/exposure-queues.md b/docs/exposure-queues.md
deleted file mode 100644
index e03ec8b7..00000000
--- a/docs/exposure-queues.md
+++ /dev/null
@@ -1,171 +0,0 @@
-# Exposure Queues Guide
-
-Copyright 2026 Firefly Software Foundation. Licensed under the Apache License 2.0.
-
-The Exposure Queues module provides an abstract consumer/producer model with concrete
-implementations for Apache Kafka, RabbitMQ, and Redis Pub/Sub, plus a pattern-based
-message router.
-
----
-
-## Architecture
-
-```mermaid
-flowchart TD
-    subgraph Message Brokers
-        KAFKA[Apache Kafka]
-        RABBIT[RabbitMQ]
-        REDIS[Redis Pub/Sub]
-    end
-
-    subgraph Consumers
-        KC[KafkaAgentConsumer]
-        RC[RabbitMQAgentConsumer]
-        RDC[RedisAgentConsumer]
-    end
-
-    KAFKA --> KC
-    RABBIT --> RC
-    REDIS --> RDC
-
-    KC --> ROUTER[QueueRouter]
-    RC --> ROUTER
-    RDC --> ROUTER
-    ROUTER --> REG[Agent Registry]
-    REG --> AGENT[Agent]
-```
-
----
-
-## Quick Start
-
-Install the queue extra for your broker:
-
-```bash
-uv add "fireflyframework-agentic[kafka]" # Kafka
-uv add "fireflyframework-agentic[rabbitmq]" # RabbitMQ
-uv add "fireflyframework-agentic[redis]" # Redis
-uv add "fireflyframework-agentic[queues]" # All brokers
-```
-
-### Kafka
-
-```python
-from fireflyframework_agentic.exposure.queues.kafka import KafkaAgentConsumer
-
-consumer = KafkaAgentConsumer(
-    agent_name="assistant",
-    topic="genai-requests",
-    bootstrap_servers="localhost:9092",
-    group_id="genai-workers",
-)
-await consumer.start()
-```
-
-### RabbitMQ
-
-```python
-from fireflyframework_agentic.exposure.queues.rabbitmq import RabbitMQAgentConsumer
-
-consumer = RabbitMQAgentConsumer(
-    agent_name="assistant",
-    queue_name="genai-requests",
-    url="amqp://guest:guest@localhost/",
-)
-await consumer.start()
-```
-
-### Redis
-
-```python
-from fireflyframework_agentic.exposure.queues.redis import RedisAgentConsumer
-
-consumer = RedisAgentConsumer(
-    agent_name="assistant",
-    channel="genai-requests",
-    url="redis://localhost:6379",
-)
-await consumer.start()
-```
-
----
-
-## QueueMessage
-
-All consumers and producers operate on `QueueMessage` objects:
-
-```python
-from fireflyframework_agentic.exposure.queues import QueueMessage
-
-message = QueueMessage(
-    body="Summarise this document.",
-    headers={"user": "alice"},
-    routing_key="summarisation",
-    reply_to="response-queue",
-)
-```
-
----
-
-## Queue Router
-
-The `QueueRouter` maps incoming messages to agents based on routing-key patterns.
-This is useful when a single consumer receives messages for multiple agents.
-
-```mermaid
-flowchart TD
-    MSG[Incoming Message] --> QR[QueueRouter]
-    QR -->|routing_key ~ 'summary.*'| A1[Summariser Agent]
-    QR -->|routing_key ~ 'translate.*'| A2[Translator Agent]
-    QR -->|no match| A3[Default Agent]
-```
-
-```python
-from fireflyframework_agentic.exposure.queues import QueueRouter, QueueMessage
-
-router = QueueRouter(default_agent="fallback")
-router.add_route(r"summary\..*", "summariser")
-router.add_route(r"translate\..*", "translator")
-
-message = QueueMessage(body="Bonjour", routing_key="translate.fr")
-response = await router.route(message)
-```
-
----
-
-## Creating a Custom Consumer
-
-To integrate with a message broker not supported out of the box, extend
-`BaseQueueConsumer` and implement the `start` and `stop` methods:
-
-```python
-from fireflyframework_agentic.exposure.queues.base import BaseQueueConsumer
-
-class MyBrokerConsumer(BaseQueueConsumer):
-    async def start(self) -> None:
-        # Connect to the broker and begin consuming
-        ...
-
-    async def stop(self) -> None:
-        # Disconnect
-        ...
-```
-
-The base class provides `_process_message(message)` which routes the message to the
-configured agent automatically.
-
----
-
-## Lifecycle
-
-```mermaid
-stateDiagram-v2
-    [*] --> Created
-    Created --> Running : start()
-    Running --> Running : process message
-    Running --> Stopped : stop()
-    Stopped --> [*]
-```
-
-Consumers are designed to be long-running. Call `start()` to connect and begin
-processing, and `stop()` to shut down gracefully.
diff --git a/docs/exposure-rest.md b/docs/exposure-rest.md
deleted file mode 100644
index 1aa00c02..00000000
--- a/docs/exposure-rest.md
+++ /dev/null
@@ -1,294 +0,0 @@
-# Exposure REST Guide
-
-Copyright 2026 Firefly Software Foundation. Licensed under the Apache License 2.0.
-
-The Exposure REST module provides a FastAPI application factory that auto-generates
-REST endpoints for registered agents, with health checks, SSE streaming, middleware,
-and CORS support.
-
----
-
-## Architecture
-
-```mermaid
-flowchart TD
-    CLIENT[HTTP Client] --> MW["Middleware<br/>(Request ID, CORS)"]
-    MW --> ROUTER[Agent Router]
-    MW --> HEALTH[Health Router]
-    ROUTER --> REG[Agent Registry]
-    REG --> AGENT[Agent]
-    AGENT --> STREAM[SSE Streaming]
-    STREAM --> CLIENT
-```
-
----
-
-## Quick Start
-
-Install the REST extra:
-
-```bash
-uv add "fireflyframework-agentic[rest]"
-```
-
-Create a FastAPI application:
-
-```python
-from fireflyframework_agentic.exposure.rest import create_agentic_app
-
-app = create_agentic_app(title="My GenAI Service", version="1.0.0")
-```
-
-Run with Uvicorn:
-
-```bash
-uvicorn myapp:app --reload
-```
-
----
-
-## Application Factory
-
-The `create_agentic_app` function creates a configured FastAPI application with:
-
-- Agent routes auto-generated from the `AgentRegistry`.
-- Health-check endpoints at `/health` and `/health/ready`.
-- Request-ID middleware that injects or propagates `X-Request-ID` headers.
-- CORS middleware with configurable origins.
-
-```python
-app = create_agentic_app(
-    title="Production GenAI API",
-    version="2.0.0",
-    enable_cors=True,
-    cors_origins=["https://myapp.example.com"],
-)
-```
-
----
-
-## Auto-Generated Endpoints
-
-The agent router creates two endpoints for every registered agent:
-
-- **GET /agents/** -- Lists all registered agents with their metadata.
-- **POST /agents/{name}/run** -- Invokes an agent with a prompt and returns the response.
-
-### Request Schema
-
-```json
-{
-    "prompt": "Summarise this document.",
-    "deps": {}
-}
-```
-
-### Response Schema
-
-```json
-{
-    "agent_name": "summariser",
-    "output": "The document discusses...",
-    "success": true,
-    "error": null,
-    "metadata": {}
-}
-```
-
----
-
-## SSE Streaming
-
-For long-running agent invocations, the REST layer supports Server-Sent Events (SSE).
-The `sse_stream` function yields SSE-formatted events as the agent produces output.
-Streaming uses the same request body as the run endpoint.
-
-```mermaid
-sequenceDiagram
-    participant Client
-    participant REST
-    participant Agent
-
-    Client->>REST: POST /agents/writer/stream {"prompt": "..."}
-    REST->>Agent: run_stream(prompt)
-    loop Stream chunks
-        Agent-->>REST: text chunk
-        REST-->>Client: data: {"text": "..."}
-    end
-    REST-->>Client: data: [DONE]
-```
-
----
-
-## Middleware
-
-### Request ID
-
-Every request receives a unique `X-Request-ID` header. If the client sends one, it
-is propagated; otherwise, the middleware generates a UUID.
-
-### CORS
-
-Cross-Origin Resource Sharing is configured via the application factory. By default
-it allows all origins. In production, restrict this to your known domains.
-
----
-
-## Rate Limiting
-
-The REST layer includes a sliding-window rate limiter that can be applied
-as middleware to protect agents from excessive traffic.
-
-```python
-from fireflyframework_agentic.exposure.rest.middleware import add_rate_limit_middleware
-
-add_rate_limit_middleware(
-    app,
-    max_requests=100,
-    window_seconds=60.0,
-)
-```
-
-When a client exceeds the limit, the middleware returns a `429 Too Many Requests`
-response with a JSON body `{"detail": "Rate limit exceeded"}`.
-
-By default, the rate key is the client's IP address. Provide a custom
-`key_func` to rate-limit by API key, user ID, or any other request attribute:
-
-```python
-add_rate_limit_middleware(
-    app,
-    max_requests=20,
-    window_seconds=60.0,
-    key_func=lambda request: request.headers.get("X-API-Key", "anonymous"),
-)
-```
-
-The `RateLimiter` class can also be used standalone outside of middleware:
-
-```python
-from fireflyframework_agentic.exposure.rest.middleware import RateLimiter
-
-limiter = RateLimiter(max_requests=10, window_seconds=30.0)
-if not limiter.is_allowed("client-123"):
-    raise HTTPException(status_code=429)
-```
-
----
-
-## Authentication Middleware
-
-The REST layer includes `add_auth_middleware()` that supports two authentication
-modes:
-
-- **API Key** — checked via the `X-API-Key` header.
-- **Bearer Token** — checked via the `Authorization: Bearer <token>` header.
-
-When both are configured, a request is accepted if **either** method succeeds.
-Unauthenticated requests receive a `401 Unauthorized` response.
-
-```python
-from fireflyframework_agentic.exposure.rest.middleware import add_auth_middleware
-
-add_auth_middleware(
-    app,
-    api_keys=["key-abc-123", "key-def-456"],
-    bearer_tokens=["token-xyz"],
-    exclude_paths=["/health", "/health/ready", "/docs"],
-)
-```
-
-The authentication middleware is **auto-wired** when the config fields
-`auth_api_keys` or `auth_bearer_tokens` are set:
-
-```bash
-export FIREFLY_AGENTIC_AUTH_API_KEYS='["key-abc-123"]'
-export FIREFLY_AGENTIC_AUTH_BEARER_TOKENS='["token-xyz"]'
-```
-
----
-
-## WebSocket Endpoint
-
-The REST layer includes a bidirectional WebSocket endpoint for real-time,
-multi-turn agent conversations at `/ws/agents/{name}`.
-
-```mermaid
-sequenceDiagram
-    participant Client
-    participant WS as WebSocket /ws/agents/{name}
-    participant Agent
-
-    Client->>WS: connect
-    WS-->>Client: accept
-    loop Conversation turns
-        Client->>WS: {"prompt": "Hello!", "conversation_id": "abc"}
-        WS->>Agent: run_stream / run
-        loop Streaming tokens
-            Agent-->>WS: token
-            WS-->>Client: {"type": "token", "data": "partial..."}
-        end
-        WS-->>Client: {"type": "result", "data": "full output", "success": true}
-    end
-    Client->>WS: disconnect
-```
-
-### Message Protocol
-
-**Client → Server** (JSON):
-
-```json
-{
-    "prompt": "Hello, agent!",
-    "conversation_id": "optional-id",
-    "deps": null
-}
-```
-
-**Server → Client** (JSON, one or more):
-
-```json
-{"type": "token", "data": "partial text..."}
-{"type": "result", "data": "full output", "success": true}
-{"type": "error", "data": "error message", "success": false}
-```
-
-If no `conversation_id` is provided, the server generates one and sends it
-back as `{"type": "conversation_id", "data": "generated-id"}`.
-
-Each WebSocket connection gets an isolated memory scope to prevent cross-talk
-between concurrent sessions.
-
----
-
-## Conversation Management Endpoints
-
-The agent router includes CRUD endpoints for managing conversations:
-
-- **POST /agents/conversations** — Create a new conversation. Returns
-  `{"conversation_id": "..."}`.
-- **GET /agents/conversations/{conversation_id}** — Return the message history
-  with `conversation_id`, `message_count`, and serialised `messages`.
-- **DELETE /agents/conversations/{conversation_id}** — Clear a conversation's
-  history.
-
-Pass `conversation_id` in the run or stream request body for multi-turn
-conversations:
-
-```json
-{
-    "prompt": "What did we discuss earlier?",
-    "conversation_id": "abc123"
-}
-```
-
----
-
-## Health Checks
-
-Two health endpoints are provided:
-
-- **GET /health** — Returns `{"status": "healthy"}` if the application is running.
-- **GET /health/ready** — Returns `{"status": "ready"}` when all agents are initialised.
-
-These endpoints are suitable for Kubernetes liveness and readiness probes.
diff --git a/docs/observability.md b/docs/observability.md
index 6e1e1c72..c9d29d8a 100644
--- a/docs/observability.md
+++ b/docs/observability.md
@@ -5,6 +5,14 @@ Copyright 2026 Firefly Software Foundation. Licensed under the Apache License 2.
 The Observability module provides OpenTelemetry-native tracing, custom metrics, and
 event recording for GenAI workloads.
 
+> **Framework emits, host exports.** The framework *emits* model/agent spans,
+> metrics, and events through the OpenTelemetry **API**. It deliberately does
+> **not** configure the OpenTelemetry SDK, install global tracer/meter
+> providers, wire up exporters, or propagate trace context across services —
+> that is the **host** application's responsibility. Configure your SDK and
+> exporters once in the host process and the framework's telemetry flows into
+> them automatically.
+
 ---
 
 ## Architecture
@@ -15,10 +23,10 @@ flowchart TD
     DEC --> TRACER[FireflyTracer]
     DEC --> METRICS[FireflyMetrics]
     APP --> EVENTS[FireflyEvents]
-    TRACER --> OTEL[OpenTelemetry SDK]
+    TRACER --> OTEL["OpenTelemetry API"]
     METRICS --> OTEL
     EVENTS --> OTEL
-    OTEL --> EXP["Exporters<br/>(OTLP, Console, Jaeger)"]
+    OTEL --> HOST["Host-configured SDK<br/>(providers + exporters)"]
 ```
 
 ---
@@ -53,54 +61,11 @@ async def process_request(prompt: str) -> str:
 
 ### Distributed Trace Correlation
 
-The framework supports **W3C Trace Context** propagation for correlating traces
-across service boundaries (HTTP, message queues, pipelines).
-
-**Trace Context Functions:**
-
-```python
-from fireflyframework_agentic.observability.tracer import inject_trace_context, extract_trace_context
-
-# Inject trace context into HTTP headers
-headers = {}
-inject_trace_context(headers)
-# headers now contain: traceparent, tracestate
-
-# Send request with trace context
-response = await http_client.post(url, headers=headers)
-
-# On receiving side, extract trace context
-incoming_headers = request.headers
-context = extract_trace_context(incoming_headers)
-# Continue trace with extracted context
-```
-
-**REST API Integration:**
-
-The framework's REST API automatically propagates trace context:
-
-```python
-# Middleware injects trace context into responses
-# and extracts from incoming requests
-from fireflyframework_agentic.exposure.rest.middleware import add_trace_propagation_middleware
-
-add_trace_propagation_middleware(app)
-```
-
-**Queue Integration:**
-
-Message queue consumers/producers automatically propagate trace context:
-
-```python
-# Kafka example - trace context in message headers
-from fireflyframework_agentic.exposure.queues.kafka import KafkaConsumer
-
-consumer = KafkaConsumer(
-    topic="requests",
-    handler=process_message,
-)
-# Trace context automatically extracted from message headers
-```
+Cross-service trace-context propagation (e.g. W3C Trace Context over HTTP or
+message queues) is owned by the **host** application: configure the standard
+OpenTelemetry propagators in your host and the spans the framework emits will
+be parented correctly. The framework itself only emits spans; it does not
+inject or extract `traceparent`/`tracestate` headers.
 
 **Pipeline Context:**
 
@@ -160,24 +125,14 @@ events.emit("agent.started", {"agent": "writer", "model": "gpt-4o"})
 
 ---
 
-## Exporters
-
-The `configure_exporters` function sets up OpenTelemetry exporters based on the
-framework's configuration:
-
-```python
-from fireflyframework_agentic.observability import configure_exporters
-
-configure_exporters(
-    otlp_endpoint="http://localhost:4317",
-    console=True,
-)
-```
-
-Supported exporters:
+## Exporters and SDK Configuration
 
-- **OTLP** -- Sends traces and metrics to any OpenTelemetry-compatible collector.
-- **Console** -- Prints spans and metrics to standard output (useful for development).
+The framework does **not** configure OpenTelemetry exporters or install global
+providers. It emits spans and metrics through the OpenTelemetry API; the **host**
+application owns SDK/exporter setup (OTLP collector, console, Jaeger, Azure
+Monitor, etc.). Configure the SDK once in your host process — for example with
+the standard `opentelemetry-sdk` / `opentelemetry-exporter-otlp` packages — and
+the framework's telemetry flows into it automatically.
 
 ---
 
@@ -268,7 +223,7 @@ For the single-tenant case, the `budget_limit_usd` config field auto-installs a
 
 ## Cost Sinks
 
-`UsageTracker` fans every `UsageRecord` out to one or more `CostSink` instances. Built-ins: `OTelMetricsSink`, `EventBusSink`, `LoggingSink`, `JSONLFileSink`, `WebhookSink`. Custom sinks implement the protocol's `emit(record)` method.
+`UsageTracker` fans every `UsageRecord` out to one or more `CostSink` instances. Built-ins: `OTelMetricsSink`, `EventBusSink`, `LoggingSink`, `JSONLFileSink`. Custom sinks implement the protocol's `emit(record)` method.
 
 ```python
 from fireflyframework_agentic.observability.sinks import (
diff --git a/docs/security.md b/docs/security.md
index 6703e631..0b5334e7 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -309,76 +309,6 @@ The `scan()` method returns an `OutputGuardResult` dataclass with:
 
 ---
 
-## Role-Based Access Control (RBAC)
-
-The RBAC module provides JWT-based authentication and role/permission management
-for multi-tenant agent deployments.
-
-```python
-from fireflyframework_agentic.security.rbac import RBACManager, require_permission
-
-# Initialize RBAC with JWT secret
-rbac = RBACManager(jwt_secret="your-secret-key-here")
-
-# Create roles and assign permissions
-rbac.create_role("admin", permissions=["agent.create", "agent.delete", "agent.run"])
-rbac.create_role("user", permissions=["agent.run"])
-
-# Assign roles to users
-rbac.assign_role("user@example.com", "user")
-rbac.assign_role("admin@example.com", "admin")
-
-# Generate JWT token
-token = rbac.generate_token("user@example.com")
-
-# Validate token and check permissions
-claims = rbac.validate_token(token)
-if rbac.has_permission(claims["sub"], "agent.run"):
-    # Allow access
-    result = await agent.run(prompt)
-```
-
-### Decorator-Based Protection
-
-Protect agent endpoints with the `@require_permission` decorator:
-
-```python
-from fireflyframework_agentic.security.rbac import require_permission
-
-@require_permission("agent.run")
-async def call_agent(prompt: str, token: str):
-    # Token is validated and permission checked
-    return await agent.run(prompt)
-```
-
-### Multi-Tenant Isolation
-
-RBAC supports tenant-scoped permissions for SaaS applications:
-
-```python
-# Create tenant-specific roles
-rbac.create_role("tenant-1-user", permissions=["agent.run"], tenant="tenant-1")
-rbac.create_role("tenant-2-user", permissions=["agent.run"], tenant="tenant-2")
-
-# Assign users to tenants
-rbac.assign_role("user1@example.com", "tenant-1-user", tenant="tenant-1")
-
-# Check tenant-scoped permission
-if rbac.has_permission("user1@example.com", "agent.run", tenant="tenant-1"):
-    # User can access tenant-1 resources only
-    pass
-```
-
-### Environment Configuration
-
-```bash
-export FIREFLY_AGENTIC_RBAC_ENABLED=true
-export FIREFLY_AGENTIC_RBAC_JWT_SECRET=your-secret-key
-export FIREFLY_AGENTIC_RBAC_TOKEN_EXPIRY_SECONDS=3600
-```
-
----
-
 ## Data Encryption
 
 The encryption module provides AES-256-GCM encryption for sensitive data at rest.
@@ -500,47 +430,6 @@ export FIREFLY_AGENTIC_DATABASE_ALLOW_UNSAFE_QUERIES=true
 
 ---
 
-## CORS Security
-
-The REST API enforces restrictive CORS policies by default.
-
-### Default Policy (Secure)
-
-By default, **no origins** are allowed:
-
-```python
-from fireflyframework_agentic.exposure.rest.middleware import add_cors_middleware
-
-# Default - blocks all cross-origin requests
-add_cors_middleware(app)
-```
-
-### Explicit Allow List
-
-Specify allowed origins for production deployments:
-
-```python
-add_cors_middleware(
-    app,
-    allow_origins=["https://app.example.com", "https://admin.example.com"],
-    allow_credentials=True,
-)
-```
-
-### Environment Configuration
-
-```bash
-export FIREFLY_AGENTIC_CORS_ALLOWED_ORIGINS='["https://app.example.com"]'
-export FIREFLY_AGENTIC_CORS_ALLOW_CREDENTIALS=true
-export FIREFLY_AGENTIC_CORS_ALLOW_METHODS='["GET", "POST"]'
-export FIREFLY_AGENTIC_CORS_MAX_AGE=3600
-```
-
-**Security Note:** Never use `allow_origins=["*"]` in production. Always
-maintain an explicit allow list of trusted domains.
-
----
-
 ## Security Best Practices
 
 ### Defence in Depth
@@ -554,7 +443,6 @@ from fireflyframework_agentic.agents.builtin_middleware import (
     OutputGuardMiddleware,
     CostGuardMiddleware,
 )
-from fireflyframework_agentic.security.rbac import require_permission
 from fireflyframework_agentic.security.encryption import EncryptedMemoryStore
 
 # Encrypted storage
@@ -573,18 +461,14 @@ agent = FireflyAgent(
     ],
 )
 
-# Protected endpoint
-@require_permission("agent.run")
-async def secure_endpoint(prompt: str, token: str):
-    return await agent.run(prompt)
+# Run the agent through the security middleware chain
+result = await agent.run(prompt)
 ```
 
 ### Production Checklist
 
-- [x] Enable RBAC for multi-user access
 - [x] Encrypt sensitive data at rest
 - [x] Use parameterized queries for database access
-- [x] Configure restrictive CORS policies
 - [x] Enable PromptGuard and OutputGuard middleware
 - [x] Set budget limits with CostGuardMiddleware
 - [x] Store secrets in a secure vault (not env vars)
diff --git a/docs/tutorial.md b/docs/tutorial.md
index b0614b78..ba9c9828 100644
--- a/docs/tutorial.md
+++ b/docs/tutorial.md
@@ -9,11 +9,10 @@ Copyright 2026 Firefly Software Foundation. Licensed under the Apache License 2.
 >
 > Each chapter introduces a concept, explains *why* it exists, shows *how* it works
 > with architecture diagrams, and immediately applies it to the IDP pipeline. By
-> Chapter 20 you will have a production-grade GenAI application that uses agents,
+> Chapter 18 you will have a production-grade GenAI application that uses agents,
 > tools, prompts, reasoning patterns, content processing, memory, validation, pipelines,
-> observability, explainability, experiments, a REST API, message-queue consumers **and
-> producers** (Kafka, RabbitMQ, Redis), multi-agent delegation, template agents, and
-> a plugin system — all wired together.
+> observability, explainability, experiments, multi-agent delegation, template agents,
+> and a plugin system — all wired together.
 
 ---
 
@@ -39,19 +38,17 @@ Copyright 2026 Firefly Software Foundation. Licensed under the Apache License 2.
 11. [Observability](#chapter-11-observability) — Tracing, metrics, events, OpenTelemetry exporters
 12. [Explainability](#chapter-12-explainability) — Decisions, explanations, audit trail, reports
 
-**Part IV — Experimentation & Deployment**
+**Part IV — Experimentation**
 
 13. [Experiments](#chapter-13-experiments) — A/B testing, variant comparison, tracking
 14. [Lab](#chapter-14-lab) — Interactive sessions, benchmarks, model comparison, eval datasets
-15. [Exposure: REST API](#chapter-15-exposure-rest-api) — FastAPI factory, SSE streaming, health probes, CORS
-16. [Exposure: Message Queues](#chapter-16-exposure-message-queues) — Consumers + producers for Kafka, RabbitMQ, Redis *(diagram)*
 
 **Part V — Advanced**
 
-17. [Template Agents](#chapter-17-template-agents) — Summariser, classifier, extractor, conversational, router
-18. [Multi-Agent Delegation](#chapter-18-multi-agent-delegation) — Delegation router, strategies, memory forking *(diagram)*
-19. [Plugin System](#chapter-19-plugin-system) — Entry-point discovery, packaging agents/tools/patterns
-20. [Putting It All Together](#chapter-20-putting-it-all-together) — Full IDP implementation, project structure, production checklist *(full system diagram)*
+15. [Template Agents](#chapter-15-template-agents) — Summariser, classifier, extractor, conversational, router
+16. [Multi-Agent Delegation](#chapter-16-multi-agent-delegation) — Delegation router, strategies, memory forking *(diagram)*
+17. [Plugin System](#chapter-17-plugin-system) — Entry-point discovery, packaging agents/tools/patterns
+18. [Putting It All Together](#chapter-18-putting-it-all-together) — Full IDP implementation, project structure, production checklist *(full system diagram)*
 
 ---
 
@@ -76,7 +73,6 @@ to your destination.
 - **Backend engineers** building GenAI features into existing applications.
 - **ML/AI engineers** who want structured reasoning, validation, and observability out
   of the box.
-- **Platform teams** who need a standard way to expose agents via REST APIs and queues.
 
 ### The Four Design Principles
 
@@ -87,12 +83,12 @@ The framework is guided by four principles that show up in every module:
 2. **Convention over configuration** — Sensible defaults everywhere. One `FireflyAgenticConfig`
    object (backed by Pydantic Settings) centralises every knob and reads from environment
    variables automatically.
-3. **Layered composition** — Modules are organised into six layers (Core, Agent,
-   Intelligence, Experimentation, Orchestration, Exposure). Higher layers depend on
+3. **Layered composition** — Modules are organised into layers (Core, Agent,
+   Intelligence, Experimentation, Orchestration). Higher layers depend on
    lower layers, never the reverse.
-4. **Optional dependencies** — Heavy libraries (FastAPI, aiokafka, aio-pika, redis) are
-   declared as extras. The core framework imports them lazily so you only install what
-   you use.
+4. **Optional dependencies** — Heavy libraries (embedding providers, vector store
+   clients, storage backends) are declared as extras. The core framework imports them
+   lazily so you only install what you use.
 
 ### The Running Example: Intelligent Document Processing
 
@@ -103,7 +99,7 @@ Raw Document → Classify → Digitise (OCR) → Extract Fields → Validate →
 ```
 
 Every chapter teaches a framework concept and immediately applies it to a phase of this
-pipeline. By Chapter 20 you will have the complete, production-ready system.
+pipeline. By Chapter 18 you will have the complete, production-ready system.
 
 ---
 
@@ -130,22 +126,20 @@ This installs the core framework with its minimal dependencies: `pydantic-ai`,
 The framework provides optional extras for additional capabilities:
 
 ```bash
-# REST API support (FastAPI + Uvicorn + SSE)
-uv add "fireflyframework-agentic[rest]"
+# Embedding providers (e.g. OpenAI / Azure)
+uv add "fireflyframework-agentic[openai-embeddings]"
 
-# Individual message queue backends
-uv add "fireflyframework-agentic[kafka]"
-uv add "fireflyframework-agentic[rabbitmq]"
-uv add "fireflyframework-agentic[redis]"
+# Vector store backends
+uv add "fireflyframework-agentic[vectorstores-chroma]"
 
-# All queue backends at once
-uv add "fireflyframework-agentic[queues]"
+# Memory persistence backends
+uv add "fireflyframework-agentic[postgres]"
 
-# Everything (REST + all queues)
+# Everything
 uv add "fireflyframework-agentic[all]"
 ```
 
-For our IDP project we will eventually use REST and queues, so install everything:
+For our IDP project we will eventually use several of these, so install everything:
 
 ```bash
 uv add "fireflyframework-agentic[all]"
@@ -191,7 +185,6 @@ Here are the most commonly used configuration fields:
 - `default_temperature` — Sampling temperature (0.0–1.0).
 - `max_retries` — Default retry count for agent runs.
 - `observability_enabled` — Toggle OpenTelemetry instrumentation.
-- `otlp_endpoint` — OTLP exporter endpoint (default: console).
 - `prompt_templates_dir` — Directory for Jinja2 prompt files.
 - `default_chunk_size` / `default_chunk_overlap` — Content chunking defaults.
 - `max_context_tokens` — Maximum context window (default 128,000).
@@ -342,7 +335,7 @@ agent = FireflyAgent(name="local-agent", model=model)
   runtime.
 
 Both approaches work identically with every framework feature — tools, reasoning
-patterns, pipelines, REST exposure, queue consumers, cost tracking, prompt caching,
+patterns, pipelines, cost tracking, prompt caching,
 and all other modules. The framework's `model_utils` module normalizes model
 identity from both strings and `Model` objects, so observability and resilience
 features work uniformly across all providers.
@@ -367,8 +360,8 @@ FIREFLY_AGENTIC_OBSERVABILITY_ENABLED=true
 Every GenAI application starts with a single question: *"How do I talk to the model?"*
 In raw Pydantic AI you create an `Agent`, give it a system prompt, and call `run()`.
 That works great for scripts — but the moment you need to register agents by name,
-share them across REST endpoints and queue consumers, attach lifecycle hooks, or plug
-them into reasoning patterns and pipelines, you need a thin coordination layer on top.
+share them across pipelines, delegation, and reasoning patterns, attach lifecycle hooks,
+or plug them into a larger system, you need a thin coordination layer on top.
 
 That is exactly what `FireflyAgent` is. It wraps a Pydantic AI `Agent` and adds three
 things the framework relies on: a **global registry** (so any module can look up an
@@ -398,8 +391,6 @@ graph TB
     end
 
     subgraph Consumers
-        REST["REST / API"]
-        QUEUE["Queue Consumers"]
         PIPE["Pipelines"]
         DELEG["Delegation Router"]
         REASON["Reasoning Patterns"]
@@ -411,8 +402,6 @@ graph TB
     FA -->|registers in| REG
     FA -->|carries| CTX
     FA -->|hooks| LC
-    REG -->|lookup by name| REST
-    REG -->|lookup by name| QUEUE
     REG -->|lookup by name| PIPE
     REG -->|lookup by name| DELEG
     REG -->|lookup by name| REASON
@@ -442,7 +431,7 @@ What happens behind the scenes:
 2. The decorated function becomes the agent's **dynamic instructions provider** — it is
    called at the start of every run and can use the context to customise the system prompt.
 3. The agent is automatically registered in the global `AgentRegistry`, so any module
-   (REST endpoints, pipelines, delegation routers) can look it up by name.
+   (pipelines, delegation routers) can look it up by name.
 
 ### Creating an Agent with the Class
 
@@ -466,7 +455,7 @@ classifier = FireflyAgent(
     output_type=dict,
 )
 
-# Register it so other parts of the framework (REST, queues, pipelines) can find it.
+# Register it so other parts of the framework (pipelines, delegation) can find it.
 agent_registry.register(classifier)
 ```
 
@@ -492,8 +481,8 @@ async with classifier.run_stream("Classify this document: ...") as stream:
 ### The Agent Registry
 
 The `AgentRegistry` is a process-wide singleton that maps agent names to `FireflyAgent`
-instances. This is the glue that lets any module — REST endpoints, queue consumers,
-delegation routers, pipelines, reasoning patterns — discover and invoke agents without
+instances. This is the glue that lets any module — delegation routers, pipelines,
+reasoning patterns — discover and invoke agents without
 importing them directly:
 
 ```python
@@ -976,7 +965,7 @@ agent = FireflyAgent(
 For our IDP pipeline, we need tools the extraction agent can call. We define them
 with `@firefly_tool`, group them into a `ToolKit`, and attach them to the agent
 via `as_pydantic_tools()`. This is the pattern you will see end-to-end in
-Chapter 6 (reasoning patterns) and Chapter 20 (full IDP application).
+Chapter 6 (reasoning patterns) and Chapter 18 (full IDP application).
 
 **Step 1 — Define the tools:**
 
@@ -1023,7 +1012,7 @@ extractor_agent = FireflyAgent(
 > **What happens next:** In Chapter 6 we pass `extractor_agent` (with its tools
 > already attached) to reasoning patterns like Plan-and-Execute and Reflexion.
 > The pattern calls `agent.run()` internally — the tools are available because
-> they were bound here. Chapter 20 shows the complete production module
+> they were bound here. Chapter 18 shows the complete production module
 > (`idp/tools.py`) with retries, guards, and the full ToolKit.
 
 ---
@@ -1762,7 +1751,7 @@ if not validation_passed:
 > **Architecture recap:** Reasoning patterns never see tools directly. They receive
 > an agent (which owns its tools) and call `agent.run()`. This is why tools must be
 > bound to the agent *before* passing it to a pattern — see the "Attaching Tools to
-> Agents" section in Chapter 4 and the full `idp/tools.py` module in Chapter 20.
+> Agents" section in Chapter 4 and the full `idp/tools.py` module in Chapter 18.
 
 ---
 
@@ -2916,23 +2905,17 @@ events.emit("pipeline.step.completed", {"step": "classify", "duration_ms": 250})
 
 ### Exporter Configuration
 
-Configure where traces and metrics go:
-
-```python
-from fireflyframework_agentic.observability import configure_exporters
-
-# Send to an OTLP collector (Jaeger, Grafana Tempo, etc.)
-configure_exporters(otlp_endpoint="http://localhost:4317")
-
-# Or just print to console for development
-configure_exporters(console=True)
-```
+The framework emits spans and metrics purely through the OpenTelemetry API; it
+does not configure the OTel SDK or any exporters itself. The host application
+owns OTel SDK and exporter setup — wire up your `TracerProvider`,
+`MeterProvider`, and the exporters (OTLP collector, console, etc.) however your
+deployment requires, and the framework's telemetry flows through the globally
+configured providers automatically.
 
 Configuration via environment variables:
 
 ```bash
 export FIREFLY_AGENTIC_OBSERVABILITY_ENABLED=true
-export FIREFLY_AGENTIC_OTLP_ENDPOINT=http://localhost:4317
 export FIREFLY_AGENTIC_LOG_LEVEL=DEBUG
 ```
 
@@ -3135,7 +3118,7 @@ print(report.build_markdown())
 
 ---
 
-# Part IV — Experimentation & Deployment
+# Part IV — Experimentation
 
 ---
 
@@ -3341,389 +3324,11 @@ print(f"Extraction accuracy: {report.avg_score:.1%}")
 
 ---
 
-## Chapter 15: Exposure: REST API
-
-Your agents work, your pipeline passes validation, your experiments prove which model is
-best. Now you need to put it all behind an HTTP endpoint so other services (or a UI)
-can call it. The Exposure REST module gives you a one-liner FastAPI application factory
-that auto-generates endpoints for every agent in the `AgentRegistry` — including
-streaming via Server-Sent Events, health probes, CORS, and correlation-ID propagation.
-You can also add custom endpoints for pipelines.
-
-### Quick Start
-
-```bash
-uv add "fireflyframework-agentic[rest]"
-```
-
-```python
-from fireflyframework_agentic.exposure.rest import create_agentic_app
-
-app = create_agentic_app(title="IDP Service", version="1.0.0")
-```
-
-```bash
-uvicorn myapp:app --reload
-```
-
-That's it. The app auto-generates endpoints for every agent in the `AgentRegistry`.
-
-### What You Get Out of the Box
-
-- **GET /agents/** — Lists all registered agents with metadata.
-- **POST /agents/{name}/run** — Invokes an agent with a JSON body.
-- **GET /agents/{name}/stream** — SSE streaming for real-time output.
-- **GET /health** — Liveness probe (`{"status": "healthy"}`).
-- **GET /health/ready** — Readiness probe (`{"status": "ready"}`).
-- **X-Request-ID** middleware — Injects or propagates request correlation IDs.
-- **CORS** middleware — Configurable origins.
-
-### Request and Response
-
-```json
-// POST /agents/extractor/run
-{
-    "prompt": "Extract fields from: Invoice #INV-001, Acme Corp, $500",
-    "deps": {}
-}
-```
-
-```json
-// Response
-{
-    "agent_name": "extractor",
-    "output": {"invoice_number": "INV-001", "vendor_name": "Acme Corp", ...},
-    "success": true,
-    "error": null,
-    "metadata": {}
-}
-```
-
-### SSE Streaming
-
-For long-running agent invocations:
-
-```
-GET /agents/extractor/stream?prompt=Extract+fields+from+...
-
-data: {"text": "Processing..."}
-data: {"text": "Found invoice number..."}
-data: [DONE]
-```
-
-### Configuration
-
-```python
-app = create_agentic_app(
-    title="IDP Service",
-    version="1.0.0",
-    enable_cors=True,
-    cors_origins=["https://myapp.example.com"],
-)
-```
-
-### Multi-Turn Conversations via REST
-
-Pass `conversation_id` in the request body:
-
-```json
-{
-    "prompt": "What did we discuss earlier?",
-    "conversation_id": "abc123"
-}
-```
-
-### IDP Tie-In: Exposing the Pipeline as a REST API
-
-```python
-from fireflyframework_agentic.exposure.rest import create_agentic_app
-from fastapi import UploadFile
-
-app = create_agentic_app(title="IDP Service")
-
-# Custom endpoint for the full IDP pipeline
-@app.post("/idp/process")
-async def process_document(file: UploadFile):
-    content = await file.read()
-    ctx = PipelineContext(
-        inputs=content,
-        metadata={"filename": file.filename},
-    )
-    result = await idp_pipeline.run(context=ctx)
-    return result.model_dump() if hasattr(result, "model_dump") else result
-```
-
----
-
-## Chapter 16: Exposure: Message Queues
-
-REST is great for synchronous request/response, but many production systems are
-**event-driven**: documents arrive on a Kafka topic, processing results go back on
-another topic, and nothing blocks. The Queues module gives you both sides of that
-coin — **consumers** that listen for incoming messages and route them to agents, and
-**producers** that publish agent results back to the broker.
-
-Three brokers are supported out of the box: Apache Kafka, RabbitMQ, and Redis Pub/Sub.
-Each follows the same `QueueConsumer` / `QueueProducer` protocol, so switching
-brokers is a one-line change.
-
-```mermaid
-flowchart LR
-    subgraph Broker
-        REQ["Requests Topic"]
-        RES["Results Topic"]
-    end
-
-    subgraph fireflyframework-agentic
-        CONS["Consumer<br/><small>KafkaAgentConsumer<br/>RabbitMQAgentConsumer<br/>RedisAgentConsumer</small>"]
-        ROUTER["QueueRouter<br/><small>pattern-based routing</small>"]
-        REG["AgentRegistry"]
-        AGT["FireflyAgent"]
-        PROD["Producer<br/><small>KafkaAgentProducer<br/>RabbitMQAgentProducer<br/>RedisAgentProducer</small>"]
-    end
-
-    REQ --> CONS
-    CONS --> ROUTER
-    ROUTER --> REG
-    REG --> AGT
-    AGT --> PROD
-    PROD --> RES
-```
-
-### Quick Start
-
-```bash
-# Install the backend you need
-uv add "fireflyframework-agentic[kafka]"
-uv add "fireflyframework-agentic[rabbitmq]"
-uv add "fireflyframework-agentic[redis]"
-```
-
-### Consumers
-
-Consumers listen on a topic/queue/channel and route each incoming message to a
-registered agent. They run continuously — think of them as your agent's "inbox".
-
-#### Kafka Consumer
-
-```python
-from fireflyframework_agentic.exposure.queues.kafka import KafkaAgentConsumer
-
-# This consumer reads from the "idp-incoming-documents" topic.
-# Every message body is passed to the "document_classifier" agent's run() method.
-consumer = KafkaAgentConsumer(
-    agent_name="document_classifier",
-    topic="idp-incoming-documents",
-    bootstrap_servers="localhost:9092",
-    group_id="idp-workers", # Kafka consumer group for load balancing
-)
-await consumer.start() # Blocks and processes messages until stopped
-```
-
-#### RabbitMQ Consumer
-
-```python
-from fireflyframework_agentic.exposure.queues.rabbitmq import RabbitMQAgentConsumer
-
-consumer = RabbitMQAgentConsumer(
-    agent_name="document_classifier",
-    queue_name="idp-incoming-documents",
-    url="amqp://guest:guest@localhost/",
-)
-await consumer.start()
-```
-
-#### Redis Consumer
-
-```python
-from fireflyframework_agentic.exposure.queues.redis import RedisAgentConsumer
-
-consumer = RedisAgentConsumer(
-    agent_name="document_classifier",
-    channel="idp-incoming-documents",
-    url="redis://localhost:6379",
-)
-await consumer.start()
-```
-
-### Producers
-
-Producers are the other half — they publish messages (typically agent results) back
-to the broker. Each producer satisfies the `QueueProducer` protocol.
-
-#### Kafka Producer
-
-```python
-from fireflyframework_agentic.exposure.queues.kafka import KafkaAgentProducer
-from fireflyframework_agentic.exposure.queues import QueueMessage
-
-# Create a producer that publishes to the "idp-results" topic.
-producer = KafkaAgentProducer(
-    topic="idp-results",
-    bootstrap_servers="localhost:9092",
-)
-
-# Publish a result back to the broker.
-await producer.publish(QueueMessage(
-    body='{"invoice_number": "INV-001", "status": "extracted"}',
-    headers={"agent": "field_extractor", "tenant": "acme-corp"},
-))
-
-# When you're done, clean up.
-await producer.stop()
-```
-
-#### RabbitMQ Producer
-
-```python
-from fireflyframework_agentic.exposure.queues.rabbitmq import RabbitMQAgentProducer
-
-producer = RabbitMQAgentProducer(
-    queue_name="idp-results",
-    url="amqp://guest:guest@localhost/",
-)
-await producer.publish(QueueMessage(body='{"status": "done"}'))
-await producer.stop()
-```
-
-#### Redis Producer
-
-```python
-from fireflyframework_agentic.exposure.queues.redis import RedisAgentProducer
-
-producer = RedisAgentProducer(
-    channel="idp-results",
-    url="redis://localhost:6379",
-)
-await producer.publish(QueueMessage(body='{"status": "done"}'))
-await producer.stop()
-```
-
-#### Consumer + Producer Pattern
-
-The most common pattern is a **consumer that processes messages and publishes results**.
-This turns your agent into a microservice that reads from one topic and writes to another:
-
-```python
-from fireflyframework_agentic.exposure.queues.kafka import KafkaAgentConsumer, KafkaAgentProducer
-from fireflyframework_agentic.exposure.queues import QueueMessage
-from fireflyframework_agentic.agents.registry import agent_registry
-
-# Set up both sides
-consumer = KafkaAgentConsumer(
-    agent_name="field_extractor",
-    topic="idp-extract-requests",
-    bootstrap_servers="kafka:9092",
-)
-producer = KafkaAgentProducer(
-    topic="idp-extract-results",
-    bootstrap_servers="kafka:9092",
-)
-
-# Process: consume → run agent → publish result
-async def process_and_publish():
-    agent = agent_registry.get("field_extractor")
-    # In practice, you'd integrate this into the consumer's message loop.
-    # Here we show the conceptual flow:
-    result = await agent.run("Extract fields from: Invoice #INV-001, Acme, $500")
-    await producer.publish(QueueMessage(
-        body=str(result.output),
-        headers={"agent": "field_extractor"},
-    ))
-```
-
-### Queue Messages
-
-All consumers and producers work with `QueueMessage`:
-
-```python
-from fireflyframework_agentic.exposure.queues import QueueMessage
-
-message = QueueMessage(
-    body="Process this invoice",
-    headers={"tenant": "acme-corp", "priority": "high"},
-    routing_key="invoice.process",
-    reply_to="idp-responses",
-)
-```
-
-### Queue Router
-
-Route messages to different agents based on routing-key patterns:
-
-```python
-from fireflyframework_agentic.exposure.queues import QueueRouter
-
-router = QueueRouter(default_agent="fallback")
-router.add_route(r"invoice\..*", "invoice_processor")
-router.add_route(r"receipt\..*", "receipt_processor")
-router.add_route(r"contract\..*", "contract_processor")
-
-# Incoming message with routing_key="invoice.classify"
-# → routed to "invoice_processor" agent
-```
-
-### Custom Consumers
-
-For unsupported brokers, extend `BaseQueueConsumer`:
-
-```python
-from fireflyframework_agentic.exposure.queues.base import BaseQueueConsumer
-
-class MyBrokerConsumer(BaseQueueConsumer):
-    async def start(self) -> None:
-        # Connect and begin consuming
-        ...
-
-    async def stop(self) -> None:
-        # Disconnect gracefully
-        ...
-```
-
-The base class provides `_process_message(message)` which routes to the configured
-agent automatically.
-
-### IDP Tie-In: Processing Documents from Kafka
-
-In our IDP system, documents arrive on a Kafka topic. The consumer classifies them
-and routes to specialised extractors. Results go back on a results topic for
-downstream systems to pick up:
-
-```python
-from fireflyframework_agentic.exposure.queues.kafka import KafkaAgentConsumer, KafkaAgentProducer
-from fireflyframework_agentic.exposure.queues import QueueRouter, QueueMessage
-
-# Route different document types to specialised extraction agents.
-# Messages with routing_key "invoice.*" go to the invoice extractor, etc.
-router = QueueRouter(default_agent="document_classifier")
-router.add_route(r"invoice\..*", "invoice_extractor")
-router.add_route(r"receipt\..*", "receipt_extractor")
-
-# Consumer: reads raw documents from Kafka
-consumer = KafkaAgentConsumer(
-    agent_name="document_classifier",
-    topic="idp-documents",
-    bootstrap_servers="kafka:9092",
-    group_id="idp-consumers",
-)
-
-# Producer: publishes extraction results back to Kafka
-producer = KafkaAgentProducer(
-    topic="idp-results",
-    bootstrap_servers="kafka:9092",
-)
-
-# Start both — the consumer runs in a loop, the producer is ready to publish.
-await consumer.start()
-```
-
----
-
 # Part V — Advanced
 
 ---
 
-## Chapter 17: Template Agents
+## Chapter 15: Template Agents
 
 By now you've written several agents from scratch — classifier, extractor, OCR. Each
 time you had to think about the system prompt, output type, and registration. But many
@@ -3878,7 +3483,7 @@ extractor_agent = create_extractor_agent(
 
 ---
 
-## Chapter 18: Multi-Agent Delegation
+## Chapter 16: Multi-Agent Delegation
 
 Not every document is an invoice. Your IDP system might receive receipts, contracts,
 and forms — each requiring a specialised agent with different prompts, tools, and
@@ -3993,7 +3598,7 @@ router = DelegationRouter(
 
 ---
 
-## Chapter 19: Plugin System
+## Chapter 17: Plugin System
 
 As your application grows, you'll want to share agents, tools, and reasoning patterns
 across projects — or let third-party teams contribute their own. The Plugin module
@@ -4058,7 +3663,7 @@ agents automatically.
 
 ---
 
-## Chapter 20: Putting It All Together
+## Chapter 18: Putting It All Together
 
 You've learned every module in fireflyframework-agentic, each in isolation. Now it's time
 to see how they all fit together in a single, production-grade application. The diagram
@@ -4068,11 +3673,8 @@ below shows the full system architecture — every layer, every connection:
 
 ```mermaid
 graph TB
-    subgraph "Exposure Layer"
-        REST["REST API\n(FastAPI + SSE)"]
-        KAFKA["Kafka Consumer"]
-        RABBIT["RabbitMQ Consumer"]
-        REDIS["Redis Consumer"]
+    subgraph "Caller"
+        APP["Host application\n(in-process)"]
     end
 
     subgraph "Orchestration Layer"
@@ -4112,7 +3714,7 @@ graph TB
         PLUG["Plugin System\n(entry-point discovery)"]
     end
 
-    REST & KAFKA & RABBIT & REDIS --> PIPE & DELEG
+    APP --> PIPE & DELEG
     PIPE --> FA
     DELEG --> FA
     FA --> REASON
@@ -4154,8 +3756,7 @@ idp-service/
 │ ├── tools.py # Tool definitions
 │ ├── pipeline.py # Pipeline wiring
 │ ├── validation.py # Validation rules
-│ ├── app.py # REST application
-│ └── consumers.py # Queue consumers
+│ └── main.py # In-process entry point
 └── tests/
     └── test_pipeline.py
 ```
@@ -4167,7 +3768,6 @@ FIREFLY_AGENTIC_DEFAULT_MODEL=openai:gpt-4o
 FIREFLY_AGENTIC_DEFAULT_TEMPERATURE=0.1
 FIREFLY_AGENTIC_MAX_RETRIES=3
 FIREFLY_AGENTIC_OBSERVABILITY_ENABLED=true
-FIREFLY_AGENTIC_OTLP_ENDPOINT=http://localhost:4317
 FIREFLY_AGENTIC_MEMORY_BACKEND=file
 FIREFLY_AGENTIC_MEMORY_FILE_DIR=.firefly_memory
 FIREFLY_AGENTIC_DEFAULT_CHUNK_SIZE=4000
@@ -4372,59 +3972,33 @@ async def process_document(document_bytes: bytes, metadata: dict | None = None)
     return result.final_output if result.success else {"error": result.failed_nodes}
 ```
 
-### REST Application (app.py)
+### Entry Point (main.py)
+
+`fireflyframework-agentic` is a pure in-process library: it serves no port and consumes
+no broker. Your host service owns serving and calls `process_document` directly. The host
+also owns OTel SDK and exporter configuration; the framework emits spans and metrics
+through the OpenTelemetry API, so they flow through whatever providers the host has set up:
 
 ```python
-from fireflyframework_agentic.exposure.rest import create_agentic_app
-from fireflyframework_agentic.observability import configure_exporters
-from fastapi import UploadFile
-from .pipeline import process_document
+import asyncio
 
-# Configure observability
-configure_exporters(otlp_endpoint="http://localhost:4317", console=True)
+from .pipeline import process_document
 
-# Create the app
-app = create_agentic_app(title="IDP Service", version="1.0.0")
 
-@app.post("/idp/process")
-async def handle_document(file: UploadFile):
-    content = await file.read()
-    result = await process_document(
-        content,
-        metadata={"filename": file.filename, "source": "rest-api"},
+async def main(document_bytes: bytes, filename: str) -> dict:
+    return await process_document(
+        document_bytes,
+        metadata={"filename": filename, "source": "host-service"},
     )
-    return result
-```
-
-### Queue Consumers (consumers.py)
-
-```python
-from fireflyframework_agentic.exposure.queues.kafka import KafkaAgentConsumer
-from fireflyframework_agentic.exposure.queues import QueueRouter
 
-# Route different document types to specialised processing
-router = QueueRouter(default_agent="document_classifier")
-router.add_route(r"invoice\..*", "field_extractor")
-router.add_route(r"receipt\..*", "receipt_processor")
 
-# Main Kafka consumer
-consumer = KafkaAgentConsumer(
-    agent_name="document_classifier",
-    topic="idp-documents",
-    bootstrap_servers="kafka:9092",
-    group_id="idp-workers",
-)
+if __name__ == "__main__":
+    with open("invoice.pdf", "rb") as fh:
+        print(asyncio.run(main(fh.read(), "invoice.pdf")))
 ```
 
-### Running the Service
-
-```bash
-# Start the REST API
-uvicorn idp_service.app:app --host 0.0.0.0 --port 8000
-
-# Or start the Kafka consumer
-python -m idp_service.consumers
-```
+To expose this over HTTP or wire it to a message broker, embed `process_document` in
+your host service's framework of choice — the agent library stays in-process.
 
 ### Production Checklist
 
@@ -4440,8 +4014,6 @@ Before deploying to production, verify:
   for your use case.
 - [ ] **Retry limits** — Pipeline nodes have appropriate `retry_max` and
   `timeout_seconds`.
-- [ ] **CORS** — REST API `cors_origins` is restricted to known domains.
-- [ ] **Health checks** — Kubernetes probes point to `/health` and `/health/ready`.
 - [ ] **Experiments** — You've A/B tested your prompt and model variants.
 - [ ] **Audit trail** — Explainability is enabled for regulated workloads.
 
@@ -4474,6 +4046,4 @@ paths to explore further:
 - [Explainability](explainability.md)
 - [Experiments](experiments.md)
 - [Lab](lab.md)
-- [Exposure REST](exposure-rest.md)
-- [Exposure Queues](exposure-queues.md)
 - [Use Case: IDP](use-case-idp.md)
diff --git a/docs/use-case-idp.md b/docs/use-case-idp.md
index 6c63f9e9..6c20f373 100644
--- a/docs/use-case-idp.md
+++ b/docs/use-case-idp.md
@@ -324,30 +324,6 @@ else:
 
 ---
 
-## Exposing the Pipeline via REST
-
-Register the pipeline as a REST endpoint so it can be called from external systems:
-
-```python
-from fireflyframework_agentic.exposure.rest import create_agentic_app
-
-# The IDP agents are already registered in the AgentRegistry.
-# The REST app auto-generates endpoints for each agent.
-app = create_agentic_app()
-
-# The pipeline itself can be exposed as a custom endpoint:
-from fastapi import UploadFile
-
-@app.post("/idp/process")
-async def process_document(file: UploadFile):
-    content = await file.read()
-    ctx = PipelineContext(inputs=content, metadata={"filename": file.filename})
-    result = await idp_pipeline.run(context=ctx)
-    return result.model_dump()
-```
-
----
-
 ## Key Framework Features Used
 
 This use case exercises the following framework capabilities:
@@ -381,8 +357,5 @@ This use case exercises the following framework capabilities:
   (with `warn_only`, `per_call_limit_usd`), Observability, Explainability, Cache,
   Validation.
 - **Logging** -- `configure_logging` for structured framework-wide logging.
-- **Exposure** -- REST API with authentication middleware (`add_auth_middleware`),
-  WebSocket endpoint (`/ws/agents/{name}`), conversation CRUD endpoints, and
-  SSE streaming.
 - **Observability** -- `PipelineResult.execution_trace` for per-node timing and status;
   bounded `UsageTracker` with `max_records` for production memory management.
diff --git a/examples/cost_tracking.py b/examples/cost_tracking.py
index 3a53718b..6ea773f0 100644
--- a/examples/cost_tracking.py
+++ b/examples/cost_tracking.py
@@ -10,12 +10,10 @@
 
 On top of that it shows:
 
-* Attaching custom sinks (``JSONLFileSink``) to the *existing* tracker so
-  every agent's cost lands on disk for offline inspection.
-* Optional Azure Monitor export — when
-  ``APPLICATIONINSIGHTS_CONNECTION_STRING`` is in the environment, OTel
-  metrics flow to Application Insights; otherwise the demo falls back to
-  local sinks only.
+* Attaching custom sinks (``JSONLFileSink``, ``OTelMetricsSink``) to the
+  *existing* tracker so every agent's cost lands on disk for offline
+  inspection and is emitted via the OpenTelemetry API. Configuring the OTel
+  SDK/exporters (where those metrics ultimately land) is the host's job.
 * A :class:`BudgetGate` with HARD/SOFT rules installed on the default
   tracker so it applies to real agent traffic.
 * A model-specific :class:`CostFn` (``fixed_rate_cost``) backed by
@@ -58,7 +56,6 @@
     DEFAULT_RESOLVERS,
     CostContext,
 )
-from fireflyframework_agentic.observability.exporters import configure_exporters
 from fireflyframework_agentic.observability.sinks import (
     JSONLFileSink,
     OTelMetricsSink,
@@ -101,31 +98,11 @@ def fixed_rate_cost(ctx: CostContext) -> float | None:
     return ctx.input_tokens * input_price + ctx.output_tokens * output_price
 
 
-def _try_attach_app_insights() -> bool:
-    """Wire Azure Monitor exporters if a connection string is present."""
-    cs = os.environ.get("APPLICATIONINSIGHTS_CONNECTION_STRING")
-    if not cs:
-        print("APPLICATIONINSIGHTS_CONNECTION_STRING not set; skipping App Insights.")
-        return False
-    try:
-        configure_exporters(
-            service_name="firefly-cost-demo",
-            azure_monitor_connection_string=cs,
-            metric_export_interval_ms=5_000,
-        )
-    except Exception as exc:  # noqa: BLE001
-        print(f"App Insights export not enabled ({type(exc).__name__}: {exc}); falling back to local sinks.")
-        return False
-    print("App Insights exporters attached.")
-    return True
-
-
-def configure_default_tracker(*, with_otel: bool, inflated_prices: bool) -> None:
+def configure_default_tracker(*, inflated_prices: bool) -> None:
     """Install sinks, optional fixed-rate resolver, and budget gate on the singleton."""
     JSONL_PATH.unlink(missing_ok=True)
     default_usage_tracker.add_sink(JSONLFileSink(JSONL_PATH))
-    if with_otel:
-        default_usage_tracker.add_sink(OTelMetricsSink())
+    default_usage_tracker.add_sink(OTelMetricsSink())
 
     resolvers = list(DEFAULT_RESOLVERS)
     if inflated_prices:
@@ -210,8 +187,7 @@ def _print_breakdown(title: str, group: dict, *, width: int) -> None:
 
 async def main() -> None:
     args = parse_args()
-    app_insights_ready = _try_attach_app_insights()
-    configure_default_tracker(with_otel=app_insights_ready, inflated_prices=args.inflated_prices)
+    configure_default_tracker(inflated_prices=args.inflated_prices)
     try:
         await run_agents()
     except BudgetExceededError as exc:
diff --git a/examples/distributed_tracing.py b/examples/distributed_tracing.py
deleted file mode 100755
index 8fbcd46c..00000000
--- a/examples/distributed_tracing.py
+++ /dev/null
@@ -1,261 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Distributed tracing with W3C Trace Context propagation.
-
-This example demonstrates how Firefly Agentic automatically propagates trace
-context across service boundaries using the W3C Trace Context standard.
-
-Features demonstrated:
-- Automatic trace context injection into HTTP requests
-- Trace context extraction from incoming requests
-- Multi-agent distributed tracing
-- Queue-based trace propagation (Kafka, RabbitMQ, Redis)
-- Jaeger UI visualization
-
-Prerequisites:
-    1. Start Jaeger for trace visualization:
-       docker run -d --name jaeger \\
-         -p 16686:16686 \\
-         -p 4317:4317 \\
-         jaegertracing/all-in-one:latest
-
-    2. Set environment variables:
-       export FIREFLY_AGENTIC_OTLP_ENDPOINT=http://localhost:4317
-       export OPENAI_API_KEY=sk-...
-
-    3. View traces at: http://localhost:16686
-
-Usage:
-    python examples/distributed_tracing.py
-"""
-
-import asyncio
-
-from opentelemetry import trace
-
-from fireflyframework_agentic.agents.base import FireflyAgent
-from fireflyframework_agentic.config import get_config
-from fireflyframework_agentic.observability.tracer import (
-    default_tracer,
-    extract_trace_context,
-    inject_trace_context,
-    trace_context_scope,
-)
-
-
-async def simulate_http_request(url: str, payload: str) -> str:
-    """Simulate an HTTP request with trace propagation.
-
-    In a real application, this would be an actual HTTP client call.
-    """
-    # Inject trace context into outgoing request headers
-    headers = {"Content-Type": "application/json"}
-    inject_trace_context(headers)
-
-    print(f"→ HTTP POST {url}")
-    print(f"  Headers: {headers}")
-    print(f"  traceparent: {headers.get('traceparent', 'None')}")
-
-    # Simulate receiving response headers
-    response_headers = {}
-    inject_trace_context(response_headers)
-
-    return "Response from service"
-
-
-async def agent_service_a() -> str:
-    """Service A: Initial request handler."""
-    print("\n" + "=" * 70)
-    print("SERVICE A: Processing initial request")
-    print("=" * 70)
-
-    agent = FireflyAgent(
-        name="service_a_agent",
-        model="openai:gpt-4o-mini",
-        description="First agent in the distributed trace",
-    )
-
-    # Create a span for Service A
-    with default_tracer.agent_span("service_a", model="gpt-4o-mini"):
-        result = await agent.run("Generate a short creative story opening (max 2 sentences).")
-
-        # Service A calls Service B via HTTP
-        print("\n→ Service A calling Service B via HTTP...")
-        await simulate_http_request("http://service-b/process", result.output)
-
-        return result.output
-
-
-async def agent_service_b(incoming_headers: dict[str, str], prompt: str) -> str:
-    """Service B: Receives request from Service A with trace context."""
-    print("\n" + "=" * 70)
-    print("SERVICE B: Processing request from Service A")
-    print("=" * 70)
-    print(f"  Received traceparent: {incoming_headers.get('traceparent', 'None')}")
-
-    # Extract trace context from incoming request
-    span_context = extract_trace_context(incoming_headers)
-
-    agent = FireflyAgent(
-        name="service_b_agent",
-        model="openai:gpt-4o-mini",
-        description="Second agent in the distributed trace",
-    )
-
-    # Continue the trace from Service A
-    with trace_context_scope(span_context), default_tracer.agent_span("service_b", model="gpt-4o-mini"):
-        result = await agent.run(f"Continue this story with one more sentence: {prompt}")
-
-        # Service B calls Service C
-        print("\n→ Service B calling Service C via HTTP...")
-        await simulate_http_request("http://service-c/finalize", result.output)
-
-        return result.output
-
-
-async def agent_service_c(incoming_headers: dict[str, str], prompt: str) -> str:
-    """Service C: Final service in the chain."""
-    print("\n" + "=" * 70)
-    print("SERVICE C: Processing request from Service B")
-    print("=" * 70)
-    print(f"  Received traceparent: {incoming_headers.get('traceparent', 'None')}")
-
-    # Extract trace context from incoming request
-    span_context = extract_trace_context(incoming_headers)
-
-    agent = FireflyAgent(
-        name="service_c_agent",
-        model="openai:gpt-4o-mini",
-        description="Final agent in the distributed trace",
-    )
-
-    # Continue the trace from Service B
-    with trace_context_scope(span_context), default_tracer.agent_span("service_c", model="gpt-4o-mini"):
-        result = await agent.run(f"Add a surprising plot twist to this story: {prompt}")
-
-        return result.output
-
-
-async def main() -> None:
-    """Demonstrate distributed tracing across multiple services."""
-
-    print("=" * 70)
-    print("Distributed Tracing Example")
-    print("=" * 70)
-
-    cfg = get_config()
-    if cfg.otlp_endpoint:
-        print(f"\n✓ OTLP endpoint: {cfg.otlp_endpoint}")
-        print("✓ Traces will be exported to Jaeger")
-        print("✓ View at: http://localhost:16686")
-    else:
-        print("\n⚠ No OTLP endpoint configured - traces will be console-only")
-        print("Set FIREFLY_AGENTIC_OTLP_ENDPOINT=http://localhost:4317 to enable Jaeger")
-
-    # Start distributed trace
-    tracer = trace.get_tracer(__name__)
-
-    with tracer.start_as_current_span("distributed_trace_example") as root_span:
-        print("\n✓ Started root trace")
-        root_context = root_span.get_span_context()
-        print(f"  Trace ID: {root_context.trace_id:032x}")
-        print(f"  Root Span ID: {root_context.span_id:016x}")
-
-        # Service A processes initial request
-        story_opening = await agent_service_a()
-
-        # Simulate Service A calling Service B with trace propagation
-        headers_to_b = {}
-        inject_trace_context(headers_to_b)
-        story_continuation = await agent_service_b(headers_to_b, story_opening)
-
-        # Simulate Service B calling Service C with trace propagation
-        headers_to_c = {}
-        inject_trace_context(headers_to_c)
-        story_final = await agent_service_c(headers_to_c, story_continuation)
-
-        print("\n" + "=" * 70)
-        print("Final Story Result")
-        print("=" * 70)
-        print(story_final)
-
-    print("\n" + "=" * 70)
-    print("Trace Propagation Summary")
-    print("=" * 70)
-    print("✓ Service A → Service B → Service C")
-    print("✓ All services share the same trace ID")
-    print("✓ Each service has its own span ID")
-    print("✓ Parent-child relationships are preserved")
-    print("=" * 70)
-
-    print("\n" + "=" * 70)
-    print("View Trace in Jaeger")
-    print("=" * 70)
-    print("1. Open http://localhost:16686")
-    print("2. Select 'fireflyframework_agentic' service")
-    print("3. Click 'Find Traces'")
-    print(f"4. Look for trace ID: {root_context.trace_id:032x}")
-    print("5. Click the trace to see the full span hierarchy:")
-    print("   - distributed_trace_example (root)")
-    print("     - agent.service_a")
-    print("       - agent.service_b")
-    print("         - agent.service_c")
-    print("=" * 70)
-
-    # Allow time for trace export
-    await asyncio.sleep(1)
-
-
-async def demonstrate_queue_propagation():
-    """Demonstrate trace propagation through message queues.
-
-    This shows how trace context is automatically propagated through
-    Kafka, RabbitMQ, and Redis Pub/Sub.
-    """
-    print("\n" + "=" * 70)
-    print("Queue-Based Trace Propagation")
-    print("=" * 70)
-
-    # Example of injecting trace context into Kafka message
-    print("\n1. Kafka Message:")
-    headers = {}
-    inject_trace_context(headers)
-    kafka_headers = [(k, v.encode()) for k, v in headers.items()]
-    print(f"   Headers: {kafka_headers}")
-
-    # Example of injecting trace context into RabbitMQ message
-    print("\n2. RabbitMQ Message:")
-    headers = {}
-    inject_trace_context(headers)
-    print(f"   Headers: {headers}")
-
-    # Example of injecting trace context into Redis message
-    print("\n3. Redis Pub/Sub Message (JSON-wrapped):")
-    import json
-
-    headers = {}
-    inject_trace_context(headers)
-    redis_message = json.dumps({"headers": headers, "body": "message content"})
-    print(f"   Wrapped message: {redis_message}")
-
-    print("\n✓ Queue consumers automatically extract trace context")
-    print("✓ Traces span across async message boundaries")
-    print("=" * 70)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-    asyncio.run(demonstrate_queue_propagation())
diff --git a/examples/full_integration.py b/examples/full_integration.py
index 4638dbf6..5f0afa5c 100644
--- a/examples/full_integration.py
+++ b/examples/full_integration.py
@@ -17,9 +17,9 @@
 
 This example shows all production-ready features working together:
 - Database persistence (PostgreSQL/MongoDB)
-- Distributed tracing (W3C Trace Context)
+- Model/agent telemetry (OpenTelemetry spans and metrics)
 - API quota management
-- Security (RBAC, encryption, SQL injection prevention)
+- Security (encryption, SQL injection prevention)
 - HTTP connection pooling
 - Incremental streaming
 - Batch processing
@@ -245,33 +245,20 @@ async def demo_security_features():
     """Demonstrate security features integration."""
     print("\n\n=== Security Features Integration ===\n")
 
-    # RBAC (if enabled)
-    print("1. RBAC (Role-Based Access Control):")
-    print("   Configure with: FIREFLY_AGENTIC_RBAC_ENABLED=true")
-    print("   Set JWT secret: FIREFLY_AGENTIC_RBAC_JWT_SECRET=your-secret")
-    print("   Use @require_permission decorator on agent endpoints")
-    print()
-
     # Encryption (if enabled)
-    print("2. Data Encryption:")
+    print("1. Data Encryption:")
     print("   Configure with: FIREFLY_AGENTIC_ENCRYPTION_ENABLED=true")
     print("   Set encryption key: FIREFLY_AGENTIC_ENCRYPTION_KEY=your-key-32-bytes")
     print("   Use EncryptedMemoryStore wrapper for sensitive data")
     print()
 
     # SQL Injection Prevention
-    print("3. SQL Injection Prevention:")
+    print("2. SQL Injection Prevention:")
     print("   Automatically enabled in DatabaseTool")
     print("   Detects 15+ dangerous SQL patterns")
     print("   Enforces parameterized queries")
     print()
 
-    # CORS Security
-    print("4. CORS Security:")
-    print("   Default: No origins allowed (secure)")
-    print("   Configure: FIREFLY_AGENTIC_CORS_ALLOWED_ORIGINS=['https://app.example.com']")
-    print()
-
 
 async def demo_observability_integration():
     """Demonstrate observability features."""
@@ -279,11 +266,9 @@ async def demo_observability_integration():
 
     config = get_config()
 
-    print("1. Distributed Tracing:")
-    print(f"   Enabled: {config.observability_enabled}")
-    print(f"   OTLP endpoint: {config.otlp_endpoint or 'Not configured'}")
-    print(f"   Service name: {config.service_name}")
-    print("   W3C Trace Context propagation: Enabled")
+    print("1. Telemetry:")
+    print(f"   Model/agent observability enabled: {config.observability_enabled}")
+    print("   Spans/metrics are emitted via the OpenTelemetry API; the host configures exporters.")
     print()
 
     print("2. Usage Tracking:")
@@ -312,10 +297,8 @@ async def demo_configuration_integration():
     print("export FIREFLY_AGENTIC_MEMORY_MONGODB_URL=mongodb://localhost:27017/")
     print()
 
-    print("# Distributed Tracing")
+    print("# Telemetry (the host owns OTel SDK/exporter configuration)")
     print("export FIREFLY_AGENTIC_OBSERVABILITY_ENABLED=true")
-    print("export FIREFLY_AGENTIC_OTLP_ENDPOINT=http://localhost:4317")
-    print("export FIREFLY_AGENTIC_SERVICE_NAME=my-genai-app")
     print()
 
     print("# Quota Management")
@@ -325,11 +308,8 @@ async def demo_configuration_integration():
     print()
 
     print("# Security")
-    print("export FIREFLY_AGENTIC_RBAC_ENABLED=true")
-    print("export FIREFLY_AGENTIC_RBAC_JWT_SECRET=your-secret-key")
     print("export FIREFLY_AGENTIC_ENCRYPTION_ENABLED=true")
     print("export FIREFLY_AGENTIC_ENCRYPTION_KEY=your-32-byte-key")
-    print("export FIREFLY_AGENTIC_CORS_ALLOWED_ORIGINS=['https://app.example.com']")
     print()
 
     print("# HTTP Connection Pooling")
@@ -351,9 +331,9 @@ async def main():
     print()
     print("This example demonstrates all production-ready features working together:")
     print("✓ Database persistence (PostgreSQL/MongoDB)")
-    print("✓ Distributed tracing (W3C Trace Context)")
+    print("✓ Model/agent telemetry (OpenTelemetry spans and metrics)")
     print("✓ API quota management")
-    print("✓ Security (RBAC, encryption, SQL injection prevention)")
+    print("✓ Security (encryption, SQL injection prevention)")
     print("✓ HTTP connection pooling")
     print("✓ Incremental streaming")
     print("✓ Batch processing")
@@ -377,7 +357,6 @@ async def main():
     print("✓ Configuration is unified through environment variables")
     print("✓ Middleware provides composable production features")
     print("✓ Pipelines support all agent capabilities")
-    print("✓ REST API exposes all functionality")
     print()
     print("Quick Start:")
     print("  1. Set environment variables for desired features")
@@ -387,7 +366,7 @@ async def main():
     print("For detailed documentation:")
     print("  - docs/deployment.md - Production deployment guide")
     print("  - docs/observability.md - Tracing and monitoring")
-    print("  - docs/security.md - RBAC and encryption")
+    print("  - docs/security.md - Encryption and SQL injection prevention")
     print("  - docs/memory.md - Database persistence")
     print()
 
diff --git a/examples/incremental_streaming.py b/examples/incremental_streaming.py
index f5b46075..224a7b45 100644
--- a/examples/incremental_streaming.py
+++ b/examples/incremental_streaming.py
@@ -269,10 +269,6 @@ async def main():
     print("\n  # With debouncing to reduce message frequency")
     print("  async for token in stream.stream_tokens(debounce_ms=50.0):")
     print("      ...")
-    print("\nREST API:")
-    print("  POST /agents/{name}/stream/incremental")
-    print("  - Returns SSE events with individual tokens")
-    print("  - Query param: debounce_ms (optional)")
     print()
 
 
diff --git a/fireflyframework_agentic/__init__.py b/fireflyframework_agentic/__init__.py
index 2eadcab8..993b0248 100644
--- a/fireflyframework_agentic/__init__.py
+++ b/fireflyframework_agentic/__init__.py
@@ -16,8 +16,7 @@
 
 This package provides production-grade abstractions for building GenAI
 applications including agents, reasoning patterns, prompt engineering,
-tools, observability, explainability, experimentation, and exposure
-via REST APIs and message queues.
+tools, observability, explainability, and experimentation.
 
 Quick start::
 
@@ -51,7 +50,6 @@
     EmbeddingProviderError,
     ExperimentError,
     ExplainabilityError,
-    ExposureError,
     FireflyAgenticError,
     FireflyMemoryError,
     MemoryError,
@@ -63,7 +61,6 @@
     PromptNotFoundError,
     PromptValidationError,
     QoSError,
-    QueueConnectionError,
     QuotaError,
     RateLimitError,
     ReasoningError,
@@ -122,8 +119,6 @@
     "ExperimentError",
     "ObservabilityError",
     "ExplainabilityError",
-    "ExposureError",
-    "QueueConnectionError",
     "ChunkingError",
     "CompressionError",
     "OutputReviewError",
diff --git a/fireflyframework_agentic/agents/base.py b/fireflyframework_agentic/agents/base.py
index 9e93106c..b2ad6b89 100644
--- a/fireflyframework_agentic/agents/base.py
+++ b/fireflyframework_agentic/agents/base.py
@@ -106,8 +106,8 @@ class FireflyAgent(Generic[AgentDepsT, OutputT]):
         output_type: The Pydantic model (or scalar type) for structured output.
         deps_type: The dependency type expected at run time.
         tools: Sequence of tool functions or :class:`pydantic_ai.Tool` objects.
-        description: Free-form description shown in documentation and the REST
-            exposure layer.
+        description: Free-form description shown in documentation and agent
+            discovery listings.
         version: Semantic version string for this agent definition.
         tags: Iterable of tags used for capability-based discovery.
         metadata: Arbitrary key-value pairs attached to the agent.
diff --git a/fireflyframework_agentic/agents/registry.py b/fireflyframework_agentic/agents/registry.py
index 86c20f20..7c7b6647 100644
--- a/fireflyframework_agentic/agents/registry.py
+++ b/fireflyframework_agentic/agents/registry.py
@@ -44,11 +44,11 @@ class AgentRegistry:
 
     The registry enables:
 
-    * **Discovery** -- the REST exposure layer queries the registry to
-      auto-generate endpoints for every agent.
+    * **Discovery** -- host services query the registry to discover agents
+      by name.
     * **Delegation** -- the :class:`DelegationRouter` selects among registered
       agents based on capability tags.
-    * **Lifecycle** -- the exposure layer can iterate over agents to run
+    * **Lifecycle** -- callers can iterate over registered agents to run
       warmup / shutdown hooks.
     """
 
diff --git a/fireflyframework_agentic/config.py b/fireflyframework_agentic/config.py
index a1f3bc88..d7e84e18 100644
--- a/fireflyframework_agentic/config.py
+++ b/fireflyframework_agentic/config.py
@@ -58,9 +58,6 @@ class FireflyAgenticConfig(BaseSettings):
     observability_enabled: bool = True
     """Whether OpenTelemetry instrumentation is active."""
 
-    otlp_endpoint: str | None = None
-    """OTLP exporter endpoint.  When *None*, traces are exported to the console."""
-
     log_level: str = "INFO"
     """Logging level for the framework's internal logger."""
 
@@ -151,14 +148,6 @@ class FireflyAgenticConfig(BaseSettings):
     memory_mongodb_pool_size: int = 10
     """Maximum connections in MongoDB pool."""
 
-    # -- Authentication -------------------------------------------------------
-    auth_api_keys: list[str] | None = None
-    """List of valid API keys for REST endpoint authentication.  When set,
-    the auth middleware is automatically enabled."""
-
-    auth_bearer_tokens: list[str] | None = None
-    """List of valid bearer tokens for REST endpoint authentication."""
-
     # -- Usage tracker -------------------------------------------------------
     usage_tracker_max_records: int = 10_000
     """Maximum number of usage records retained in memory.  Oldest records
@@ -187,25 +176,13 @@ class FireflyAgenticConfig(BaseSettings):
     rate_limit_max_delay: float = 60.0
     """Maximum delay (seconds) between rate limit retries."""
 
-    # -- Security (RBAC & Encryption) ----------------------------------------
-    rbac_enabled: bool = False
-    """Whether Role-Based Access Control is active."""
-
-    rbac_jwt_secret: str | None = None
-    """JWT secret key for token signing and verification."""
-
-    rbac_multi_tenant: bool = False
-    """Whether to enforce tenant isolation in RBAC."""
-
+    # -- Security (Encryption) -----------------------------------------------
     encryption_enabled: bool = False
     """Whether data encryption at rest is active."""
 
     encryption_key: str | None = None
     """Encryption key for AES-256-GCM (32 bytes, or password for key derivation)."""
 
-    cors_allowed_origins: list[str] = []
-    """List of allowed CORS origins. Empty list = no origins allowed (secure default)."""
-
     # -- HTTP Connection Pooling ---------------------------------------------
     http_pool_enabled: bool = True
     """Whether to use HTTP connection pooling (requires httpx)."""
@@ -240,11 +217,21 @@ class FireflyAgenticConfig(BaseSettings):
     @classmethod
     def _reject_removed_cost_fields(cls, data: Any) -> Any:
         if isinstance(data, dict):
-            removed = {"cost_calculator", "budget_alert_threshold_usd"} & set(data)
+            removed = {
+                "cost_calculator",
+                "budget_alert_threshold_usd",
+                "auth_api_keys",
+                "auth_bearer_tokens",
+                "cors_allowed_origins",
+                "otlp_endpoint",
+                "rbac_enabled",
+                "rbac_jwt_secret",
+                "rbac_multi_tenant",
+            } & set(data)
             if removed:
                 raise ValueError(
-                    f"Removed cost-tracking config fields: {sorted(removed)}. "
-                    "See docs/observability.md for the new BudgetGate / resolver API."
+                    f"Removed config fields: {sorted(removed)}. Serving/exposure (REST/queue "
+                    "auth, CORS) is now owned by the host service; see CHANGELOG."
                 )
         return data
 
diff --git a/fireflyframework_agentic/exceptions.py b/fireflyframework_agentic/exceptions.py
index 8a42c709..095dc96d 100644
--- a/fireflyframework_agentic/exceptions.py
+++ b/fireflyframework_agentic/exceptions.py
@@ -118,17 +118,6 @@ class ExplainabilityError(FireflyAgenticError):
     """Raised for errors in trace recording, explanation generation, or audit."""
 
 
-# -- Exposure ----------------------------------------------------------------
-
-
-class ExposureError(FireflyAgenticError):
-    """Raised for errors in REST API or queue-based agent exposure."""
-
-
-class QueueConnectionError(ExposureError):
-    """Raised when a queue backend connection fails."""
-
-
 # -- Content processing ------------------------------------------------------
 
 
diff --git a/fireflyframework_agentic/exposure/__init__.py b/fireflyframework_agentic/exposure/__init__.py
deleted file mode 100644
index eda2031e..00000000
--- a/fireflyframework_agentic/exposure/__init__.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Exposure package -- REST API and message queue agent exposure."""
-
-from fireflyframework_agentic.exposure.queues import (
-    BaseQueueConsumer,
-    QueueConsumer,
-    QueueMessage,
-    QueueProducer,
-    QueueRouter,
-)
-
-__all__ = [
-    "BaseQueueConsumer",
-    "QueueConsumer",
-    "QueueMessage",
-    "QueueProducer",
-    "QueueRouter",
-]
-
-
-def __getattr__(name: str):
-    """Lazy-load REST symbols so the package works without FastAPI installed."""
-    _rest_names = {"create_agentic_app", "AgentRequest", "AgentResponse", "HealthResponse"}
-    if name in _rest_names:
-        # imports-top: optional dep (fastapi) loaded on demand inside __getattr__
-        from fireflyframework_agentic.exposure.rest import (  # noqa: PLC0415 — optional dep loaded on demand
-            AgentRequest,
-            AgentResponse,
-            HealthResponse,
-            create_agentic_app,
-        )
-
-        _map = {
-            "create_agentic_app": create_agentic_app,
-            "AgentRequest": AgentRequest,
-            "AgentResponse": AgentResponse,
-            "HealthResponse": HealthResponse,
-        }
-        return _map[name]
-    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/fireflyframework_agentic/exposure/queues/__init__.py b/fireflyframework_agentic/exposure/queues/__init__.py
deleted file mode 100644
index 5a3f9a20..00000000
--- a/fireflyframework_agentic/exposure/queues/__init__.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Queues exposure subpackage -- Kafka, RabbitMQ, Redis consumers, producers, and routing."""
-
-from fireflyframework_agentic.exposure.queues.base import (
-    BaseQueueConsumer,
-    QueueConsumer,
-    QueueMessage,
-    QueueProducer,
-)
-from fireflyframework_agentic.exposure.queues.router import QueueRouter
-
-__all__ = [
-    "BaseQueueConsumer",
-    "QueueConsumer",
-    "QueueMessage",
-    "QueueProducer",
-    "QueueRouter",
-]
-
-
-def __getattr__(name: str):
-    """Lazy-load queue backend implementations so the package works without
-    their optional dependencies (aiokafka, aio-pika, redis) installed."""
-    _kafka_names = {"KafkaAgentConsumer", "KafkaAgentProducer"}
-    _rabbitmq_names = {"RabbitMQAgentConsumer", "RabbitMQAgentProducer"}
-    _redis_names = {"RedisAgentConsumer", "RedisAgentProducer"}
-
-    if name in _kafka_names:
-        # imports-top: module-level __getattr__ lazy loader
-        from fireflyframework_agentic.exposure.queues.kafka import (
-            KafkaAgentConsumer,
-            KafkaAgentProducer,
-        )
-
-        return {"KafkaAgentConsumer": KafkaAgentConsumer, "KafkaAgentProducer": KafkaAgentProducer}[name]
-
-    if name in _rabbitmq_names:
-        # imports-top: module-level __getattr__ lazy loader
-        from fireflyframework_agentic.exposure.queues.rabbitmq import (
-            RabbitMQAgentConsumer,
-            RabbitMQAgentProducer,
-        )
-
-        return {"RabbitMQAgentConsumer": RabbitMQAgentConsumer, "RabbitMQAgentProducer": RabbitMQAgentProducer}[name]
-
-    if name in _redis_names:
-        # imports-top: module-level __getattr__ lazy loader
-        from fireflyframework_agentic.exposure.queues.redis import (
-            RedisAgentConsumer,
-            RedisAgentProducer,
-        )
-
-        return {"RedisAgentConsumer": RedisAgentConsumer, "RedisAgentProducer": RedisAgentProducer}[name]
-
-    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/fireflyframework_agentic/exposure/queues/base.py b/fireflyframework_agentic/exposure/queues/base.py
deleted file mode 100644
index ee421897..00000000
--- a/fireflyframework_agentic/exposure/queues/base.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Abstract queue consumer and producer protocols."""
-
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from typing import Protocol, runtime_checkable
-
-from pydantic import BaseModel
-
-from fireflyframework_agentic.agents.registry import agent_registry
-
-
-class QueueMessage(BaseModel):
-    """A message consumed from or produced to a queue."""
-
-    body: str
-    headers: dict[str, str] = {}
-    routing_key: str = ""
-    reply_to: str = ""
-
-
-@runtime_checkable
-class QueueConsumer(Protocol):
-    """Protocol for queue consumers."""
-
-    async def start(self) -> None: ...
-    async def stop(self) -> None: ...
-
-
-@runtime_checkable
-class QueueProducer(Protocol):
-    """Protocol for queue producers."""
-
-    async def publish(self, message: QueueMessage) -> None: ...
-
-
-class BaseQueueConsumer(ABC):
-    """Abstract base class for queue consumers that route messages to agents.
-
-    Parameters:
-        agent_name: Name of the agent to route messages to.
-    """
-
-    def __init__(self, agent_name: str) -> None:
-        self._agent_name = agent_name
-        self._running = False
-
-    @property
-    def agent_name(self) -> str:
-        return self._agent_name
-
-    @property
-    def is_running(self) -> bool:
-        return self._running
-
-    @abstractmethod
-    async def start(self) -> None:
-        """Connect and begin consuming messages."""
-        ...
-
-    @abstractmethod
-    async def stop(self) -> None:
-        """Gracefully stop consuming and disconnect."""
-        ...
-
-    async def _process_message(self, message: QueueMessage) -> str:
-        """Route the message to the configured agent and return the response."""
-        agent = agent_registry.get(self._agent_name)
-        result = await agent.run(message.body)
-        return str(result.output if hasattr(result, "output") else result)
diff --git a/fireflyframework_agentic/exposure/queues/kafka.py b/fireflyframework_agentic/exposure/queues/kafka.py
deleted file mode 100644
index 23d9203a..00000000
--- a/fireflyframework_agentic/exposure/queues/kafka.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Kafka consumer/producer for agent exposure.
-
-Requires the ``aiokafka`` optional dependency (install via
-``pip install fireflyframework-agentic[kafka]``).
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    from aiokafka import AIOKafkaConsumer, AIOKafkaProducer  # pyright: ignore[reportMissingImports]
-else:
-    try:
-        from aiokafka import AIOKafkaConsumer, AIOKafkaProducer  # type: ignore[import-not-found]
-    except ImportError:  # pragma: no cover - optional dep
-        AIOKafkaConsumer = None
-        AIOKafkaProducer = None
-
-from fireflyframework_agentic.exposure.queues.base import BaseQueueConsumer, QueueMessage
-from fireflyframework_agentic.observability.tracer import extract_trace_context, trace_context_scope
-
-logger = logging.getLogger(__name__)
-
-
-_AIOKAFKA_IMPORT_ERROR = (
-    "aiokafka is required for Kafka support. Install it with: pip install fireflyframework-agentic[kafka]"
-)
-
-
-class KafkaAgentConsumer(BaseQueueConsumer):
-    """Consume messages from a Kafka topic and route to an agent.
-
-    Parameters:
-        agent_name: Name of the agent to invoke.
-        topic: Kafka topic to consume from.
-        bootstrap_servers: Kafka bootstrap servers.
-        group_id: Consumer group ID.
-    """
-
-    def __init__(
-        self,
-        agent_name: str,
-        *,
-        topic: str,
-        bootstrap_servers: str = "localhost:9092",
-        group_id: str = "firefly-agentic",
-    ) -> None:
-        super().__init__(agent_name)
-        self._topic = topic
-        self._bootstrap_servers = bootstrap_servers
-        self._group_id = group_id
-        self._consumer: Any = None
-
-    async def start(self) -> None:
-        """Connect to Kafka and begin consuming."""
-        if AIOKafkaConsumer is None:
-            raise ImportError(_AIOKAFKA_IMPORT_ERROR)
-
-        self._consumer = AIOKafkaConsumer(
-            self._topic,
-            bootstrap_servers=self._bootstrap_servers,
-            group_id=self._group_id,
-        )
-        await self._consumer.start()
-        self._running = True
-        logger.info("Kafka consumer started on topic '%s'", self._topic)
-
-        try:
-            async for msg in self._consumer:
-                # Extract trace context from message headers for distributed tracing
-                headers = {k: v.decode("utf-8") if isinstance(v, bytes) else v for k, v in (msg.headers or [])}
-                span_context = extract_trace_context(headers)
-
-                message = QueueMessage(body=msg.value.decode("utf-8"))
-
-                # Process message within trace context scope
-                with trace_context_scope(span_context):
-                    try:
-                        await self._process_message(message)
-                    except Exception:
-                        logger.exception("Failed to process Kafka message on topic '%s'", self._topic)
-                        continue
-        finally:
-            await self.stop()
-
-    async def stop(self) -> None:
-        """Stop the Kafka consumer."""
-        if self._consumer:
-            await self._consumer.stop()
-        self._running = False
-        logger.info("Kafka consumer stopped")
-
-
-class KafkaAgentProducer:
-    """Publish messages to a Kafka topic.
-
-    Satisfies the :class:`~fireflyframework_agentic.exposure.queues.base.QueueProducer`
-    protocol.
-
-    Parameters:
-        topic: Kafka topic to publish to.
-        bootstrap_servers: Kafka bootstrap servers.
-    """
-
-    def __init__(
-        self,
-        *,
-        topic: str,
-        bootstrap_servers: str = "localhost:9092",
-    ) -> None:
-        self._topic = topic
-        self._bootstrap_servers = bootstrap_servers
-        self._producer: Any = None
-
-    async def start(self) -> None:
-        """Connect the underlying Kafka producer."""
-        if AIOKafkaProducer is None:
-            raise ImportError(_AIOKAFKA_IMPORT_ERROR)
-
-        self._producer = AIOKafkaProducer(
-            bootstrap_servers=self._bootstrap_servers,
-        )
-        await self._producer.start()
-        logger.info("Kafka producer started for topic '%s'", self._topic)
-
-    async def publish(self, message: QueueMessage) -> None:
-        """Publish *message* to the configured Kafka topic."""
-        if self._producer is None:
-            await self.start()
-        producer: Any = self._producer
-        await producer.send_and_wait(
-            self._topic,
-            value=message.body.encode("utf-8"),
-            headers=[(k, v.encode("utf-8")) for k, v in message.headers.items()] or None,
-        )
-
-    async def stop(self) -> None:
-        """Flush and stop the Kafka producer."""
-        if self._producer:
-            await self._producer.stop()
-            self._producer = None
-        logger.info("Kafka producer stopped")
diff --git a/fireflyframework_agentic/exposure/queues/rabbitmq.py b/fireflyframework_agentic/exposure/queues/rabbitmq.py
deleted file mode 100644
index 786901f0..00000000
--- a/fireflyframework_agentic/exposure/queues/rabbitmq.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""RabbitMQ consumer for agent exposure.
-
-Requires the ``aio-pika`` optional dependency (install via
-``pip install fireflyframework-agentic[rabbitmq]``).
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import TYPE_CHECKING, Any, cast
-
-if TYPE_CHECKING:
-    import aio_pika  # pyright: ignore[reportMissingImports]
-else:
-    try:
-        import aio_pika  # type: ignore[import-not-found]
-    except ImportError:  # pragma: no cover - optional dep
-        aio_pika = None
-
-from fireflyframework_agentic.exposure.queues.base import BaseQueueConsumer, QueueMessage
-from fireflyframework_agentic.observability.tracer import extract_trace_context, trace_context_scope
-
-logger = logging.getLogger(__name__)
-
-
-_AIO_PIKA_IMPORT_ERROR = (
-    "aio-pika is required for RabbitMQ support. Install it with: pip install fireflyframework-agentic[rabbitmq]"
-)
-
-
-class RabbitMQAgentConsumer(BaseQueueConsumer):
-    """Consume messages from a RabbitMQ queue and route to an agent.
-
-    Parameters:
-        agent_name: Name of the agent to invoke.
-        queue_name: RabbitMQ queue to consume from.
-        url: AMQP connection URL.
-    """
-
-    def __init__(
-        self,
-        agent_name: str,
-        *,
-        queue_name: str,
-        url: str = "amqp://guest:guest@localhost/",
-    ) -> None:
-        super().__init__(agent_name)
-        self._queue_name = queue_name
-        self._url = url
-        self._connection: Any = None
-
-    async def start(self) -> None:
-        """Connect to RabbitMQ and begin consuming."""
-        if aio_pika is None:
-            raise ImportError(_AIO_PIKA_IMPORT_ERROR)
-
-        self._connection = await aio_pika.connect_robust(self._url)
-        channel = await self._connection.channel()
-        queue = await channel.declare_queue(self._queue_name, durable=True)
-        self._running = True
-        logger.info("RabbitMQ consumer started on queue '%s'", self._queue_name)
-
-        async with queue.iterator() as queue_iter:
-            async for amqp_message in queue_iter:
-                async with amqp_message.process():
-                    # Extract trace context from message headers for distributed tracing
-                    headers = {}
-                    if amqp_message.headers:
-                        headers = {k: str(v) for k, v in amqp_message.headers.items()}
-                    span_context = extract_trace_context(headers)
-
-                    message = QueueMessage(body=amqp_message.body.decode("utf-8"))
-
-                    # Process message within trace context scope
-                    with trace_context_scope(span_context):
-                        try:
-                            await self._process_message(message)
-                        except Exception:
-                            logger.exception("Failed to process RabbitMQ message on queue '%s'", self._queue_name)
-                            continue
-
-    async def stop(self) -> None:
-        """Stop the RabbitMQ consumer."""
-        if self._connection:
-            await self._connection.close()
-        self._running = False
-        logger.info("RabbitMQ consumer stopped")
-
-
-class RabbitMQAgentProducer:
-    """Publish messages to a RabbitMQ exchange.
-
-    Satisfies the :class:`~fireflyframework_agentic.exposure.queues.base.QueueProducer`
-    protocol.
-
-    Parameters:
-        exchange_name: RabbitMQ exchange to publish to.  Use ``""`` for the
-            default exchange (messages are routed by *routing_key* directly
-            to a queue).
-        url: AMQP connection URL.
-    """
-
-    def __init__(
-        self,
-        *,
-        exchange_name: str = "",
-        url: str = "amqp://guest:guest@localhost/",
-    ) -> None:
-        self._exchange_name = exchange_name
-        self._url = url
-        self._connection: Any = None
-        self._channel: Any = None
-
-    async def start(self) -> None:
-        """Open a connection and channel."""
-        if aio_pika is None:
-            raise ImportError(_AIO_PIKA_IMPORT_ERROR)
-
-        self._connection = await aio_pika.connect_robust(self._url)
-        self._channel = await self._connection.channel()
-        logger.info("RabbitMQ producer started (exchange='%s')", self._exchange_name)
-
-    async def publish(self, message: QueueMessage) -> None:
-        """Publish *message* to the configured exchange."""
-        if aio_pika is None:
-            raise ImportError(_AIO_PIKA_IMPORT_ERROR)
-
-        if self._channel is None:
-            await self.start()
-
-        channel: Any = self._channel
-        exchange = await channel.get_exchange(self._exchange_name) if self._exchange_name else channel.default_exchange
-        amqp_message = aio_pika.Message(
-            body=message.body.encode("utf-8"),
-            headers=cast("dict[str, Any]", message.headers) or None,
-            reply_to=message.reply_to or None,
-        )
-        await exchange.publish(amqp_message, routing_key=message.routing_key)
-
-    async def stop(self) -> None:
-        """Close the connection."""
-        if self._connection:
-            await self._connection.close()
-            self._connection = None
-            self._channel = None
-        logger.info("RabbitMQ producer stopped")
diff --git a/fireflyframework_agentic/exposure/queues/redis.py b/fireflyframework_agentic/exposure/queues/redis.py
deleted file mode 100644
index e754ba6f..00000000
--- a/fireflyframework_agentic/exposure/queues/redis.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Redis Pub/Sub consumer for agent exposure.
-
-Requires the ``redis`` optional dependency (install via
-``pip install fireflyframework-agentic[redis]``).
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    import redis.asyncio as aioredis  # pyright: ignore[reportMissingImports]
-else:
-    try:
-        import redis.asyncio as aioredis  # type: ignore[import-not-found]
-    except ImportError:  # pragma: no cover - optional dep
-        aioredis = None
-
-from fireflyframework_agentic.exposure.queues.base import BaseQueueConsumer, QueueMessage
-from fireflyframework_agentic.observability.tracer import extract_trace_context, trace_context_scope
-
-logger = logging.getLogger(__name__)
-
-
-_REDIS_IMPORT_ERROR = (
-    "redis[hiredis] is required for Redis support. Install it with: pip install fireflyframework-agentic[redis]"
-)
-
-
-class RedisAgentConsumer(BaseQueueConsumer):
-    """Subscribe to a Redis Pub/Sub channel and route messages to an agent.
-
-    Parameters:
-        agent_name: Name of the agent to invoke.
-        channel: Redis Pub/Sub channel to subscribe to.
-        url: Redis connection URL.
-    """
-
-    def __init__(
-        self,
-        agent_name: str,
-        *,
-        channel: str,
-        url: str = "redis://localhost:6379",
-    ) -> None:
-        super().__init__(agent_name)
-        self._channel = channel
-        self._url = url
-        self._client: Any = None
-
-    async def start(self) -> None:
-        """Connect to Redis and begin subscribing."""
-        if aioredis is None:
-            raise ImportError(_REDIS_IMPORT_ERROR)
-
-        self._client = aioredis.from_url(self._url)
-        pubsub = self._client.pubsub()
-        await pubsub.subscribe(self._channel)
-        self._running = True
-        logger.info("Redis consumer started on channel '%s'", self._channel)
-
-        try:
-            async for raw_message in pubsub.listen():
-                if raw_message["type"] == "message":
-                    body = raw_message["data"]
-                    if isinstance(body, bytes):
-                        body = body.decode("utf-8")
-
-                    # Try to parse as JSON to extract trace context
-                    # If not JSON, treat as plain text
-                    span_context = None
-                    try:
-                        data = json.loads(body)
-                        if isinstance(data, dict) and "headers" in data and "body" in data:
-                            # Message is wrapped with metadata for trace propagation
-                            span_context = extract_trace_context(data.get("headers", {}))
-                            body = data["body"]
-                    except (json.JSONDecodeError, KeyError):
-                        # Not a wrapped message, use body as-is
-                        pass
-
-                    message = QueueMessage(body=body)
-
-                    # Process message within trace context scope
-                    with trace_context_scope(span_context):
-                        try:
-                            await self._process_message(message)
-                        except Exception:
-                            logger.exception("Failed to process Redis message on channel '%s'", self._channel)
-                            continue
-        finally:
-            await self.stop()
-
-    async def stop(self) -> None:
-        """Stop the Redis consumer."""
-        if self._client:
-            await self._client.close()
-        self._running = False
-        logger.info("Redis consumer stopped")
-
-
-class RedisAgentProducer:
-    """Publish messages to a Redis Pub/Sub channel.
-
-    Satisfies the :class:`~fireflyframework_agentic.exposure.queues.base.QueueProducer`
-    protocol.
-
-    Parameters:
-        channel: Redis Pub/Sub channel to publish to.
-        url: Redis connection URL.
-    """
-
-    def __init__(
-        self,
-        *,
-        channel: str,
-        url: str = "redis://localhost:6379",
-    ) -> None:
-        self._channel = channel
-        self._url = url
-        self._client: Any = None
-
-    async def start(self) -> None:
-        """Open a Redis connection."""
-        if aioredis is None:
-            raise ImportError(_REDIS_IMPORT_ERROR)
-
-        self._client = aioredis.from_url(self._url)
-        logger.info("Redis producer started for channel '%s'", self._channel)
-
-    async def publish(self, message: QueueMessage) -> None:
-        """Publish *message* to the configured Redis channel."""
-        if self._client is None:
-            await self.start()
-        client: Any = self._client
-        await client.publish(self._channel, message.body.encode("utf-8"))
-
-    async def stop(self) -> None:
-        """Close the Redis connection."""
-        if self._client:
-            await self._client.close()
-            self._client = None
-        logger.info("Redis producer stopped")
diff --git a/fireflyframework_agentic/exposure/queues/router.py b/fireflyframework_agentic/exposure/queues/router.py
deleted file mode 100644
index 31f09566..00000000
--- a/fireflyframework_agentic/exposure/queues/router.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Queue message router: maps topic/queue patterns to specific agents."""
-
-from __future__ import annotations
-
-import re
-
-from fireflyframework_agentic.agents.registry import agent_registry
-from fireflyframework_agentic.exceptions import ExposureError
-from fireflyframework_agentic.exposure.queues.base import QueueMessage
-
-
-class QueueRouter:
-    """Routes queue messages to agents based on pattern matching.
-
-    Rules are added via :meth:`add_route` and evaluated in order.
-
-    Parameters:
-        default_agent: Agent name used when no rule matches.
-    """
-
-    def __init__(self, default_agent: str | None = None) -> None:
-        self._routes: list[tuple[re.Pattern[str], str]] = []
-        self._default_agent = default_agent
-
-    def add_route(self, pattern: str, agent_name: str) -> None:
-        """Add a routing rule: messages whose routing key matches *pattern*
-        are sent to *agent_name*."""
-        self._routes.append((re.compile(pattern), agent_name))
-
-    async def route(self, message: QueueMessage) -> str:
-        """Route *message* to the appropriate agent and return the response."""
-        agent_name = self._resolve(message.routing_key)
-        agent = agent_registry.get(agent_name)
-        result = await agent.run(message.body)
-        return str(result.output if hasattr(result, "output") else result)
-
-    def _resolve(self, routing_key: str) -> str:
-        """Find the first matching agent name for *routing_key*."""
-        for pattern, agent_name in self._routes:
-            if pattern.search(routing_key):
-                return agent_name
-        if self._default_agent:
-            return self._default_agent
-        raise ExposureError(f"No route matched routing key '{routing_key}' and no default agent set")
diff --git a/fireflyframework_agentic/exposure/rest/__init__.py b/fireflyframework_agentic/exposure/rest/__init__.py
deleted file mode 100644
index 8f493baf..00000000
--- a/fireflyframework_agentic/exposure/rest/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""REST exposure subpackage -- FastAPI app factory, router, middleware, streaming."""
-
-from fireflyframework_agentic.exposure.rest.app import create_agentic_app
-from fireflyframework_agentic.exposure.rest.schemas import AgentRequest, AgentResponse, HealthResponse
-
-__all__ = [
-    "AgentRequest",
-    "AgentResponse",
-    "HealthResponse",
-    "create_agentic_app",
-]
diff --git a/fireflyframework_agentic/exposure/rest/app.py b/fireflyframework_agentic/exposure/rest/app.py
deleted file mode 100644
index 6cddcdb7..00000000
--- a/fireflyframework_agentic/exposure/rest/app.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""FastAPI application factory for exposing Firefly agents over REST.
-
-Call :func:`create_agentic_app` to get a fully-configured FastAPI instance
-with agent, health, and streaming endpoints.
-"""
-
-from __future__ import annotations
-
-import logging
-from collections.abc import AsyncIterator
-from contextlib import asynccontextmanager
-from typing import Any
-
-try:
-    from fastapi import FastAPI  # type: ignore[import-not-found]
-except ImportError:  # pragma: no cover - optional dep
-    FastAPI = None  # type: ignore[assignment,misc]
-
-from fireflyframework_agentic.agents.lifecycle import agent_lifecycle
-from fireflyframework_agentic.config import get_config
-from fireflyframework_agentic.exposure.rest.health import create_health_router
-from fireflyframework_agentic.exposure.rest.middleware import (
-    add_auth_middleware,
-    add_cors_middleware,
-    add_rate_limit_middleware,
-    add_request_id_middleware,
-)
-from fireflyframework_agentic.exposure.rest.router import create_agent_router
-from fireflyframework_agentic.exposure.rest.websocket import create_websocket_router
-from fireflyframework_agentic.observability.exporters import configure_exporters
-from fireflyframework_agentic.plugin import PluginDiscovery
-from fireflyframework_agentic.reasoning.prompts import register_reasoning_prompts
-
-logger = logging.getLogger(__name__)
-
-
-@asynccontextmanager
-async def _lifespan(app: Any) -> AsyncIterator[None]:
-    """FastAPI lifespan: plugin discovery, warmup, OTel, and shutdown."""
-    cfg = get_config()
-
-    # -- Startup -----------------------------------------------------------
-    if cfg.plugin_auto_discover:
-        result = PluginDiscovery.discover_all()
-        logger.info(
-            "Plugins: %d loaded, %d failed",
-            len(result.successful),
-            len(result.failed),
-        )
-
-    register_reasoning_prompts()
-    await agent_lifecycle.run_warmup()
-
-    if cfg.observability_enabled:
-        configure_exporters(
-            otlp_endpoint=cfg.otlp_endpoint,
-            console=cfg.otlp_endpoint is None,
-        )
-
-    yield
-
-    # -- Shutdown ----------------------------------------------------------
-    await agent_lifecycle.run_shutdown()
-
-
-def create_agentic_app(
-    *,
-    title: str = "Firefly Agentic",
-    version: str = "0.1.0",
-    cors: bool = True,
-    request_id: bool = True,
-    rate_limit: bool | dict[str, Any] = False,
-) -> Any:
-    """Create a FastAPI application with agent exposure endpoints.
-
-    Parameters:
-        title: Application title for OpenAPI docs.
-        version: Application version.
-        cors: Enable CORS middleware.
-        request_id: Enable request-ID injection middleware.
-        rate_limit: Enable rate-limiting middleware.  Pass ``True`` for
-            defaults or a dict with ``max_requests``, ``window_seconds``,
-            and/or ``key_func`` to customise behaviour.
-
-    Returns:
-        A configured :class:`fastapi.FastAPI` instance.
-    """
-    if FastAPI is None:
-        raise ImportError("fastapi is required for create_agentic_app")
-
-    app = FastAPI(title=title, version=version, lifespan=_lifespan)
-
-    # Middleware
-    if cors:
-        add_cors_middleware(app)
-    if request_id:
-        add_request_id_middleware(app)
-    if rate_limit:
-        rl_kwargs = rate_limit if isinstance(rate_limit, dict) else {}
-        add_rate_limit_middleware(app, **rl_kwargs)
-
-    # Auto-wire auth middleware from config
-    cfg = get_config()
-    if cfg.auth_api_keys or cfg.auth_bearer_tokens:
-        add_auth_middleware(
-            app,
-            api_keys=cfg.auth_api_keys,
-            bearer_tokens=cfg.auth_bearer_tokens,
-        )
-
-    # Routers
-    app.include_router(create_health_router())
-    app.include_router(create_agent_router())
-
-    # WebSocket
-    app.include_router(create_websocket_router())
-
-    return app
diff --git a/fireflyframework_agentic/exposure/rest/health.py b/fireflyframework_agentic/exposure/rest/health.py
deleted file mode 100644
index e7a7074e..00000000
--- a/fireflyframework_agentic/exposure/rest/health.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Health check endpoint factory."""
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-from fireflyframework_agentic.agents.registry import agent_registry
-from fireflyframework_agentic.exposure.rest.schemas import HealthResponse
-
-if TYPE_CHECKING:
-    from fastapi import APIRouter
-else:
-    try:
-        from fastapi import APIRouter  # type: ignore[import-not-found]
-    except ImportError:  # pragma: no cover - optional dep
-        APIRouter = None
-
-
-def create_health_router() -> APIRouter:
-    """Create a FastAPI router with health check endpoints."""
-    if APIRouter is None:
-        raise ImportError("fastapi is required for REST exposure; install with `pip install fastapi`")
-    router = APIRouter(tags=["health"])
-
-    @router.get("/health", response_model=HealthResponse)
-    async def health() -> HealthResponse:
-        return HealthResponse(
-            status="ok",
-            agents=len(agent_registry),
-        )
-
-    @router.get("/health/ready")
-    async def readiness() -> dict[str, str]:
-        return {"status": "ready"}
-
-    @router.get("/health/live")
-    async def liveness() -> dict[str, str]:
-        return {"status": "alive"}
-
-    return router
diff --git a/fireflyframework_agentic/exposure/rest/middleware.py b/fireflyframework_agentic/exposure/rest/middleware.py
deleted file mode 100644
index 447ec98a..00000000
--- a/fireflyframework_agentic/exposure/rest/middleware.py
+++ /dev/null
@@ -1,301 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Middleware for the REST exposure layer: request ID injection, CORS, rate limiting."""
-
-from __future__ import annotations
-
-import hmac
-import logging
-import time
-import uuid
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    from fastapi.middleware.cors import CORSMiddleware
-    from starlette.middleware.base import BaseHTTPMiddleware
-    from starlette.requests import Request
-    from starlette.responses import JSONResponse, Response
-else:
-    try:
-        from starlette.middleware.base import BaseHTTPMiddleware
-        from starlette.requests import Request
-        from starlette.responses import JSONResponse, Response
-    except ImportError:  # pragma: no cover - optional dep
-        BaseHTTPMiddleware = None
-        Request = None
-        Response = None
-        JSONResponse = None
-
-    try:
-        from fastapi.middleware.cors import CORSMiddleware  # type: ignore[import-not-found]
-    except ImportError:  # pragma: no cover - optional dep
-        CORSMiddleware = None
-
-from fireflyframework_agentic.observability.tracer import (
-    extract_trace_context,
-    inject_trace_context,
-    trace_context_scope,
-)
-
-logger = logging.getLogger(__name__)
-
-
-def add_request_id_middleware(app: Any) -> None:
-    """Add middleware that injects a unique ``X-Request-ID`` header."""
-    if BaseHTTPMiddleware is None:
-        raise ImportError("starlette is required for add_request_id_middleware")
-
-    class RequestIDMiddleware(BaseHTTPMiddleware):
-        async def dispatch(self, request: Request, call_next: Any) -> Response:
-            request_id = request.headers.get("X-Request-ID", str(uuid.uuid4()))
-            response: Response = await call_next(request)
-            response.headers["X-Request-ID"] = request_id
-            return response
-
-    app.add_middleware(RequestIDMiddleware)
-
-
-def add_cors_middleware(
-    app: Any,
-    *,
-    allow_origins: list[str] | None = None,
-    allow_methods: list[str] | None = None,
-) -> None:
-    """Add CORS middleware with configurable origins.
-
-    Security Note:
-        By default, this middleware uses a restrictive CORS policy (no origins allowed)
-        for production security. You must explicitly specify allowed origins.
-
-        **INSECURE (Development Only):**
-            add_cors_middleware(app, allow_origins=["*"])
-
-        **SECURE (Production):**
-            add_cors_middleware(app, allow_origins=["https://myapp.com"])
-
-    Args:
-        app: The FastAPI or Starlette application.
-        allow_origins: List of allowed origin URLs. Defaults to [] (no origins allowed).
-        allow_methods: List of allowed HTTP methods. Defaults to standard methods.
-    """
-    if CORSMiddleware is None:
-        raise ImportError("fastapi is required for add_cors_middleware")
-
-    # Secure default: no origins allowed
-    if allow_origins is None:
-        allow_origins = []
-        logger.warning(
-            "CORS: No origins specified, defaulting to secure policy (no origins allowed). "
-            "Set allow_origins=['*'] for development or specify exact origins for production."
-        )
-
-    # Warn about wildcard usage
-    if "*" in allow_origins:
-        logger.warning(
-            "CORS: Wildcard origin ('*') allows requests from ANY domain. "
-            "This is INSECURE for production. Specify exact origins instead."
-        )
-
-    app.add_middleware(
-        CORSMiddleware,
-        allow_origins=allow_origins,
-        allow_credentials="*" not in allow_origins,
-        allow_methods=allow_methods or ["GET", "POST", "PUT", "DELETE", "OPTIONS"],
-        allow_headers=["*"],
-    )
-
-
-class RateLimiter:
-    """Simple in-memory sliding-window rate limiter.
-
-    Parameters:
-        max_requests: Maximum requests per *window_seconds*.
-        window_seconds: Time window for the rate limit.
-    """
-
-    def __init__(self, max_requests: int = 60, window_seconds: float = 60.0) -> None:
-        self._max = max_requests
-        self._window = window_seconds
-        self._timestamps: dict[str, list[float]] = {}
-
-    def is_allowed(self, key: str) -> bool:
-        """Return *True* if the request is within the rate limit."""
-        now = time.monotonic()
-        # Cleanup stale entries to prevent unbounded memory growth
-        if len(self._timestamps) > 10000:
-            stale_keys = [k for k, v in self._timestamps.items() if not v or now - v[-1] > self._window]
-            for k in stale_keys:
-                del self._timestamps[k]
-        ts = self._timestamps.setdefault(key, [])
-        ts[:] = [t for t in ts if now - t < self._window]
-        if len(ts) >= self._max:
-            return False
-        ts.append(now)
-        return True
-
-
-def add_auth_middleware(
-    app: Any,
-    *,
-    api_keys: list[str] | None = None,
-    bearer_tokens: list[str] | None = None,
-    auth_header: str = "Authorization",
-    api_key_header: str = "X-API-Key",
-    exclude_paths: list[str] | None = None,
-) -> None:
-    """Add authentication middleware to a FastAPI/Starlette application.
-
-    Supports two authentication modes:
-
-    * **API Key** -- checked via the ``X-API-Key`` header.
-    * **Bearer Token** -- checked via the ``Authorization: Bearer <token>`` header.
-
-    When both are configured, a request is accepted if *either* method succeeds.
-    Unauthenticated requests receive a ``401 Unauthorized`` response.
-
-    Parameters:
-        app: The FastAPI or Starlette application.
-        api_keys: List of valid API keys.
-        bearer_tokens: List of valid bearer tokens.
-        auth_header: Header name for bearer tokens.
-        api_key_header: Header name for API keys.
-        exclude_paths: URL paths excluded from auth (e.g. ``["/health"]``).
-    """
-    if BaseHTTPMiddleware is None or JSONResponse is None:
-        raise ImportError("starlette is required for add_auth_middleware")
-
-    _JSONResponse = JSONResponse  # noqa: N806 — local rebinding to narrow Optional for nested class
-    _api_keys = set(api_keys or [])
-    _bearer_tokens = set(bearer_tokens or [])
-    _exclude = set(exclude_paths or ["/health", "/health/ready", "/health/live", "/docs", "/openapi.json"])
-
-    class AuthMiddleware(BaseHTTPMiddleware):
-        async def dispatch(self, request: Request, call_next: Any) -> Response:
-            if request.url.path in _exclude:
-                return await call_next(request)
-
-            # Try API key
-            if _api_keys:
-                key = request.headers.get(api_key_header, "")
-                if any(hmac.compare_digest(key, k) for k in _api_keys):
-                    return await call_next(request)
-
-            # Try bearer token
-            if _bearer_tokens:
-                auth_value = request.headers.get(auth_header, "")
-                if auth_value.startswith("Bearer "):
-                    token = auth_value[7:]
-                    if any(hmac.compare_digest(token, t) for t in _bearer_tokens):
-                        return await call_next(request)
-
-            # If no auth methods configured, allow all requests
-            if not _api_keys and not _bearer_tokens:
-                return await call_next(request)
-
-            return _JSONResponse(
-                {"detail": "Unauthorized"},
-                status_code=401,
-            )
-
-    app.add_middleware(AuthMiddleware)
-
-
-def add_rate_limit_middleware(
-    app: Any,
-    *,
-    max_requests: int = 60,
-    window_seconds: float = 60.0,
-    key_func: Any | None = None,
-) -> None:
-    """Add rate-limiting middleware to a FastAPI/Starlette application.
-
-    Parameters:
-        app: The FastAPI or Starlette application.
-        max_requests: Maximum requests per window per client.
-        window_seconds: Rate limit window in seconds.
-        key_func: Optional callable ``(Request) -> str`` for the rate key.
-            Defaults to the client's IP address.
-    """
-    if BaseHTTPMiddleware is None or JSONResponse is None:
-        raise ImportError("starlette is required for add_rate_limit_middleware")
-
-    _JSONResponse = JSONResponse  # noqa: N806 — local rebinding to narrow Optional for nested class
-    limiter = RateLimiter(max_requests=max_requests, window_seconds=window_seconds)
-
-    class RateLimitMiddleware(BaseHTTPMiddleware):
-        async def dispatch(self, request: Request, call_next: Any) -> Response:
-            rk = key_func(request) if key_func is not None else (request.client.host if request.client else "unknown")
-            if not limiter.is_allowed(rk):
-                return _JSONResponse(
-                    {"detail": "Rate limit exceeded"},
-                    status_code=429,
-                )
-            return await call_next(request)
-
-    app.add_middleware(RateLimitMiddleware)
-
-
-def add_trace_propagation_middleware(app: Any) -> None:
-    """Add W3C Trace Context propagation middleware.
-
-    This middleware automatically extracts trace context from incoming HTTP
-    requests (via ``traceparent`` and ``tracestate`` headers) and injects
-    trace context into outgoing HTTP responses.
-
-    This enables distributed tracing across microservices and external systems
-    that support the W3C Trace Context standard.
-
-    Parameters:
-        app: The FastAPI or Starlette application.
-
-    Example::
-
-        from fastapi import FastAPI
-        from fireflyframework_agentic.exposure.rest.middleware import add_trace_propagation_middleware
-
-        app = FastAPI()
-        add_trace_propagation_middleware(app)
-
-        # Now all requests will automatically participate in distributed traces
-
-    See Also:
-        - https://www.w3.org/TR/trace-context/
-        - :func:`~fireflyframework_agentic.observability.tracer.extract_trace_context`
-        - :func:`~fireflyframework_agentic.observability.tracer.inject_trace_context`
-    """
-    if BaseHTTPMiddleware is None:
-        raise ImportError("starlette is required for add_trace_propagation_middleware")
-
-    class TracePropagationMiddleware(BaseHTTPMiddleware):
-        async def dispatch(self, request: Request, call_next: Any) -> Response:
-            # Extract trace context from incoming request
-            headers = dict(request.headers)
-            span_context = extract_trace_context(headers)
-
-            # Process request within trace context scope
-            with trace_context_scope(span_context):
-                response: Response = await call_next(request)
-
-            # Inject trace context into outgoing response
-            response_headers = dict(response.headers)
-            inject_trace_context(response_headers)
-            for key, value in response_headers.items():
-                if key.lower() not in response.headers:
-                    response.headers[key] = value
-
-            return response
-
-    app.add_middleware(TracePropagationMiddleware)
-    logger.info("Trace propagation middleware enabled")
diff --git a/fireflyframework_agentic/exposure/rest/router.py b/fireflyframework_agentic/exposure/rest/router.py
deleted file mode 100644
index 70cd7e49..00000000
--- a/fireflyframework_agentic/exposure/rest/router.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Auto-generated agent routes.
-
-:func:`create_agent_router` generates REST endpoints for every registered
-agent: ``POST /agents/{name}/run`` and ``GET /agents``.
-"""
-
-from __future__ import annotations
-
-import base64
-import logging
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    from fastapi import APIRouter, HTTPException
-    from pydantic_ai.messages import BinaryContent, DocumentUrl, ImageUrl
-    from starlette.responses import StreamingResponse
-else:
-    try:
-        from fastapi import APIRouter, HTTPException  # type: ignore[import-not-found]
-    except ImportError:  # pragma: no cover - optional dep
-        APIRouter = None
-        HTTPException = None
-
-    try:
-        from pydantic_ai.messages import BinaryContent, DocumentUrl, ImageUrl
-    except ImportError:  # pragma: no cover - optional dep
-        BinaryContent = None
-        DocumentUrl = None
-        ImageUrl = None
-
-    try:
-        from starlette.responses import StreamingResponse
-    except ImportError:  # pragma: no cover - optional dep
-        StreamingResponse = None
-
-from fireflyframework_agentic.agents.registry import agent_registry
-from fireflyframework_agentic.exposure.rest.schemas import AgentRequest, AgentResponse
-from fireflyframework_agentic.exposure.rest.streaming import sse_stream, sse_stream_incremental
-from fireflyframework_agentic.memory.manager import MemoryManager
-
-logger = logging.getLogger(__name__)
-
-# Server-side memory manager for REST conversations
-_rest_memory = MemoryManager(working_scope_id="rest")
-
-
-def _resolve_prompt(request: AgentRequest) -> Any:
-    """Convert an AgentRequest prompt into a pydantic-ai compatible format."""
-    if isinstance(request.prompt, str):
-        return request.prompt
-
-    if BinaryContent is None or DocumentUrl is None or ImageUrl is None:
-        raise ImportError(
-            "pydantic-ai is required for multimodal prompts. "
-            "Install it with: pip install fireflyframework-agentic[rest]"
-        )
-
-    parts: list[Any] = []
-    for part in request.prompt:
-        if part.type == "text":
-            parts.append(part.content)
-        elif part.type == "image_url":
-            parts.append(ImageUrl(url=part.content))
-        elif part.type == "document_url":
-            parts.append(DocumentUrl(url=part.content))
-        elif part.type == "binary" and part.media_type:
-            data = base64.b64decode(part.content)
-            parts.append(BinaryContent(data=data, media_type=part.media_type))
-        else:
-            parts.append(part.content)
-    return parts
-
-
-def create_agent_router() -> APIRouter:
-    """Create a FastAPI router with agent invocation endpoints."""
-    if APIRouter is None or HTTPException is None or StreamingResponse is None:
-        raise ImportError(
-            "fastapi is required for the REST router. Install it with: pip install fireflyframework-agentic[rest]"
-        )
-
-    router = APIRouter(prefix="/agents", tags=["agents"])
-    # Local rebindings so type checkers narrow inside nested functions
-    _HTTPException = HTTPException  # noqa: N806 — local alias to narrow Optional
-    _StreamingResponse = StreamingResponse  # noqa: N806 — local alias to narrow Optional
-
-    @router.get("/")
-    async def list_agents() -> list[dict[str, Any]]:
-        return [info.model_dump() for info in agent_registry.list_agents()]
-
-    @router.post("/{name}/run", response_model=AgentResponse)
-    async def run_agent(name: str, request: AgentRequest) -> AgentResponse:
-        if not agent_registry.has(name):
-            raise _HTTPException(status_code=404, detail=f"Agent '{name}' not found")
-        agent = agent_registry.get(name)
-        try:
-            prompt = _resolve_prompt(request)
-            conv_id = request.conversation_id
-            result = await agent.run(prompt, deps=request.deps, conversation_id=conv_id)
-            output = result.output if hasattr(result, "output") else str(result)
-            return AgentResponse(agent_name=name, output=output)
-        except Exception:
-            logger.exception("Agent '%s' run failed", name)
-            return AgentResponse(agent_name=name, output=None, success=False, error="Internal server error")
-
-    @router.post("/{name}/stream")
-    async def stream_agent(name: str, request: AgentRequest) -> Any:
-        """Stream agent responses in buffered mode (chunks/messages).
-
-        This endpoint uses buffered streaming where the model's output is
-        streamed in chunks or complete messages. Good for most use cases.
-        """
-        if not agent_registry.has(name):
-            raise _HTTPException(status_code=404, detail=f"Agent '{name}' not found")
-        agent = agent_registry.get(name)
-        prompt = _resolve_prompt(request)
-        conv_id = request.conversation_id
-        return _StreamingResponse(
-            sse_stream(agent, prompt, deps=request.deps, conversation_id=conv_id),
-            media_type="text/event-stream",
-        )
-
-    @router.post("/{name}/stream/incremental")
-    async def stream_agent_incremental(
-        name: str,
-        request: AgentRequest,
-        debounce_ms: float = 0.0,
-    ) -> Any:
-        """Stream agent responses in incremental mode (token-by-token).
-
-        This endpoint provides true token-by-token streaming with minimal
-        latency. Tokens are sent as soon as they arrive from the model,
-        without buffering. Ideal for interactive applications where users
-        want to see responses immediately.
-
-        Args:
-            debounce_ms: Optional debounce delay in milliseconds to batch
-                rapid tokens. Default 0 = no debouncing.
-        """
-        if not agent_registry.has(name):
-            raise _HTTPException(status_code=404, detail=f"Agent '{name}' not found")
-        agent = agent_registry.get(name)
-        prompt = _resolve_prompt(request)
-        conv_id = request.conversation_id
-        return _StreamingResponse(
-            sse_stream_incremental(
-                agent,
-                prompt,
-                debounce_ms=debounce_ms,
-                deps=request.deps,
-                conversation_id=conv_id,
-            ),
-            media_type="text/event-stream",
-        )
-
-    # -- Conversation management ---------------------------------------------
-
-    @router.post("/conversations", tags=["conversations"])
-    async def create_conversation() -> dict[str, str]:
-        """Create a new conversation and return its ID."""
-        conv_id = _rest_memory.new_conversation()
-        return {"conversation_id": conv_id}
-
-    @router.get("/conversations/{conversation_id}", tags=["conversations"])
-    async def get_conversation(conversation_id: str) -> dict[str, Any]:
-        """Return the message history for a conversation."""
-        messages = _rest_memory.get_message_history(conversation_id)
-        serialized = []
-        for msg in messages:
-            dumper = getattr(msg, "model_dump", None)
-            if dumper is not None:
-                serialized.append(dumper(mode="json"))
-            else:
-                serialized.append({"content": str(msg)})
-        return {
-            "conversation_id": conversation_id,
-            "message_count": len(messages),
-            "messages": serialized,
-        }
-
-    @router.delete("/conversations/{conversation_id}", tags=["conversations"])
-    async def delete_conversation(conversation_id: str) -> dict[str, str]:
-        """Clear a conversation's history."""
-        _rest_memory.clear_conversation(conversation_id)
-        return {"status": "cleared", "conversation_id": conversation_id}
-
-    return router
diff --git a/fireflyframework_agentic/exposure/rest/schemas.py b/fireflyframework_agentic/exposure/rest/schemas.py
deleted file mode 100644
index cf1d9bce..00000000
--- a/fireflyframework_agentic/exposure/rest/schemas.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Request and response Pydantic models for the REST exposure layer."""
-
-from __future__ import annotations
-
-from typing import Any
-
-from pydantic import BaseModel
-
-
-class MultiModalPart(BaseModel):
-    """A single multimodal content part in a REST request.
-
-    Attributes:
-        type: Content type: ``"text"``, ``"image_url"``, ``"document_url"``,
-            ``"audio_url"``, ``"video_url"``, or ``"binary"``.
-        content: The content value (text string, URL, or base64 data).
-        media_type: MIME type for binary content.
-    """
-
-    type: str = "text"
-    content: str = ""
-    media_type: str | None = None
-
-
-class AgentRequest(BaseModel):
-    """Request body for agent invocation.
-
-    *prompt* can be a plain string or a list of multimodal parts for VLM
-    use cases (images, documents, etc.).
-
-    When *conversation_id* is provided, the server maintains
-    conversation history across requests.
-    """
-
-    prompt: str | list[MultiModalPart] = ""
-    deps: Any = None
-    model_settings: dict[str, Any] | None = None
-    conversation_id: str | None = None
-
-
-class AgentResponse(BaseModel):
-    """Response body from an agent invocation."""
-
-    agent_name: str
-    output: Any
-    success: bool = True
-    error: str | None = None
-
-
-class HealthResponse(BaseModel):
-    """Health check response."""
-
-    status: str = "ok"
-    agents: int = 0
-    details: dict[str, str] = {}
diff --git a/fireflyframework_agentic/exposure/rest/streaming.py b/fireflyframework_agentic/exposure/rest/streaming.py
deleted file mode 100644
index e584d1e3..00000000
--- a/fireflyframework_agentic/exposure/rest/streaming.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Server-Sent Events (SSE) streaming support for agent responses."""
-
-from __future__ import annotations
-
-import json
-from collections.abc import AsyncIterator
-from typing import Any, cast
-
-from fireflyframework_agentic.types import AgentLike
-
-
-async def sse_stream(agent: AgentLike, prompt: Any, **kwargs: Any) -> AsyncIterator[str]:
-    """Yield SSE-formatted events from an agent's streaming response.
-
-    Each yielded string is a complete SSE event (``data: ...\\n\\n``).
-
-    This uses buffered streaming mode (chunks/messages).
-    """
-    async with await cast("Any", agent).run_stream(prompt, **kwargs) as stream:
-        async for chunk in stream.stream_text():
-            yield f"data: {json.dumps({'text': chunk})}\n\n"
-    yield "data: [DONE]\n\n"
-
-
-async def sse_stream_incremental(
-    agent: AgentLike,
-    prompt: Any,
-    debounce_ms: float = 0.0,
-    **kwargs: Any,
-) -> AsyncIterator[str]:
-    """Yield SSE-formatted events with true token-by-token streaming.
-
-    This provides minimal latency streaming by yielding individual tokens
-    as they arrive from the model, without buffering into chunks.
-
-    Args:
-        agent: The agent to run.
-        prompt: The prompt to send.
-        debounce_ms: Optional debounce delay in milliseconds to batch
-            rapid tokens. Default 0 = no debouncing.
-        **kwargs: Additional arguments passed to run_stream().
-
-    Yields:
-        SSE-formatted token events with minimal latency.
-
-    Example SSE event:
-        data: {"token": "Hello"}\\n\\n
-    """
-    async with await cast("Any", agent).run_stream(prompt, streaming_mode="incremental", **kwargs) as stream:
-        async for token in stream.stream_tokens(debounce_ms=debounce_ms):
-            yield f"data: {json.dumps({'token': token})}\n\n"
-    yield "data: [DONE]\n\n"
diff --git a/fireflyframework_agentic/exposure/rest/websocket.py b/fireflyframework_agentic/exposure/rest/websocket.py
deleted file mode 100644
index 141cf474..00000000
--- a/fireflyframework_agentic/exposure/rest/websocket.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""WebSocket endpoint for bidirectional multi-turn agent conversations.
-
-Clients connect to ``/ws/agents/{name}`` and send JSON messages.
-The server responds with streamed tokens and final results over the
-same connection, enabling real-time conversational UIs.
-
-Message protocol
-----------------
-
-**Client → Server** (JSON)::
-
-    {
-        "prompt": "Hello, agent!",
-        "conversation_id": "optional-id",
-        "deps": null
-    }
-
-**Server → Client** (JSON, one or more)::
-
-    {"type": "token",   "data": "partial text..."}
-    {"type": "result",  "data": "full output", "success": true}
-    {"type": "error",   "data": "error message", "success": false}
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import uuid
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    from fastapi import APIRouter, WebSocket, WebSocketDisconnect
-else:
-    try:
-        from fastapi import APIRouter, WebSocket, WebSocketDisconnect  # type: ignore[import-not-found]
-    except ImportError:  # pragma: no cover - optional dep
-        APIRouter = None
-        WebSocket = None
-        WebSocketDisconnect = None
-
-from fireflyframework_agentic.agents.registry import agent_registry
-from fireflyframework_agentic.memory.manager import MemoryManager
-
-logger = logging.getLogger(__name__)
-
-
-def create_websocket_router() -> APIRouter:
-    """Create a FastAPI router with the agent WebSocket endpoint."""
-    if APIRouter is None or WebSocketDisconnect is None:
-        raise ImportError(
-            "WebSocket support requires 'fastapi'. Install with: pip install fireflyframework-agentic[rest]"
-        )
-
-    _WebSocketDisconnect = WebSocketDisconnect  # noqa: N806 — local alias to narrow Optional for `except` clause
-    router = APIRouter(tags=["websocket"])
-    _ws_memory = MemoryManager(working_scope_id="ws")
-
-    @router.websocket("/ws/agents/{name}")
-    async def agent_ws(websocket: WebSocket, name: str) -> None:
-        """Multi-turn WebSocket conversation with a registered agent."""
-        if not agent_registry.has(name):
-            await websocket.close(code=4004, reason=f"Agent '{name}' not found")
-            return
-
-        await websocket.accept()
-        agent = agent_registry.get(name)
-        conversation_id: str | None = None
-
-        # Use a per-connection memory scope to avoid cross-talk between
-        # concurrent WebSocket sessions sharing the same agent.
-        conn_id = uuid.uuid4().hex[:8]
-        _ws_memory.fork(working_scope_id=f"ws:{conn_id}")
-
-        try:
-            while True:
-                raw = await websocket.receive_text()
-                try:
-                    msg: dict[str, Any] = json.loads(raw)
-                except json.JSONDecodeError:
-                    await _send_error(websocket, "Invalid JSON")
-                    continue
-
-                prompt = msg.get("prompt", "")
-                if not prompt:
-                    await _send_error(websocket, "Missing 'prompt' field")
-                    continue
-
-                # Conversation management
-                conversation_id = msg.get("conversation_id") or conversation_id
-                if conversation_id is None:
-                    conversation_id = uuid.uuid4().hex
-                    await websocket.send_json(
-                        {"type": "conversation_id", "data": conversation_id},
-                    )
-
-                deps = msg.get("deps")
-
-                # Attempt streaming; if it fails, report the error rather than
-                # falling through to run() which would double-process.
-                try:
-                    final: str | None = None
-
-                    if hasattr(agent, "run_stream"):
-                        try:
-                            async with await agent.run_stream(  # type: ignore[attr-defined]
-                                prompt,
-                                deps=deps,
-                                conversation_id=conversation_id,
-                            ) as stream:
-                                full_output: list[str] = []
-                                async for token in stream.stream_text(delta=True):
-                                    full_output.append(token)
-                                    await websocket.send_json(
-                                        {"type": "token", "data": token},
-                                    )
-                                final = "".join(full_output)
-                        except Exception as exc:
-                            # Streaming not supported or failed — fall back
-                            logger.debug("Streaming failed for '%s': %s", name, exc)
-                            final = None
-
-                    if final is None:
-                        result = await agent.run(
-                            prompt,
-                            deps=deps,
-                            conversation_id=conversation_id,
-                        )
-                        final = result.output if hasattr(result, "output") else str(result)
-
-                    await websocket.send_json(
-                        {"type": "result", "data": final, "success": True},
-                    )
-
-                except Exception as exc:
-                    logger.exception("WebSocket agent error for '%s'", name)
-                    await _send_error(websocket, str(exc))
-
-        except _WebSocketDisconnect:
-            logger.debug("WebSocket client disconnected from agent '%s'", name)
-
-    return router
-
-
-async def _send_error(websocket: Any, message: str) -> None:
-    """Send an error frame to the client."""
-    await websocket.send_json(
-        {"type": "error", "data": message, "success": False},
-    )
diff --git a/fireflyframework_agentic/observability/__init__.py b/fireflyframework_agentic/observability/__init__.py
index db42dd92..c956a1e7 100644
--- a/fireflyframework_agentic/observability/__init__.py
+++ b/fireflyframework_agentic/observability/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Observability subpackage -- tracing, metrics, events, and exporters."""
+"""Observability subpackage -- tracing, metrics, events, and cost/usage tracking."""
 
 from fireflyframework_agentic.observability.budget import (
     BudgetGate,
@@ -31,7 +31,6 @@
 )
 from fireflyframework_agentic.observability.decorators import metered, traced
 from fireflyframework_agentic.observability.events import FireflyEvent, FireflyEvents, default_events
-from fireflyframework_agentic.observability.exporters import ProviderBundle, configure_exporters
 from fireflyframework_agentic.observability.metrics import FireflyMetrics, default_metrics
 from fireflyframework_agentic.observability.sinks import (
     CostSink,
@@ -39,14 +38,10 @@
     JSONLFileSink,
     LoggingSink,
     OTelMetricsSink,
-    WebhookSink,
 )
 from fireflyframework_agentic.observability.tracer import (
     FireflyTracer,
     default_tracer,
-    extract_trace_context,
-    inject_trace_context,
-    trace_context_scope,
 )
 from fireflyframework_agentic.observability.usage import (
     UsageRecord,
@@ -71,24 +66,18 @@
     "JSONLFileSink",
     "LoggingSink",
     "OTelMetricsSink",
-    "ProviderBundle",
     "ScopeContext",
     "UnknownModelCostError",
     "UsageRecord",
     "UsageSummary",
     "UsageTracker",
-    "WebhookSink",
-    "configure_exporters",
     "default_events",
     "default_metrics",
     "default_tracer",
     "default_usage_tracker",
-    "extract_trace_context",
     "genai_prices_cost",
-    "inject_trace_context",
     "metered",
     "provider_reported_cost",
     "resolve_cost",
-    "trace_context_scope",
     "traced",
 ]
diff --git a/fireflyframework_agentic/observability/exporters.py b/fireflyframework_agentic/observability/exporters.py
deleted file mode 100644
index db49b675..00000000
--- a/fireflyframework_agentic/observability/exporters.py
+++ /dev/null
@@ -1,234 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""OpenTelemetry exporter configuration helpers.
-
-:func:`configure_exporters` is the single place vendor knowledge lives. It
-sets up traces, metrics, and logs providers and attaches the exporters chosen
-by which kwargs are passed (console, OTLP).
-
-Application code never imports vendor exporters directly --- it uses
-``trace.get_tracer(...)``, ``metrics.get_meter(...)``, and Python ``logging``,
-all of which the providers configured here transparently route to whichever
-backend is active.
-"""
-
-from __future__ import annotations
-
-import logging
-import socket
-import uuid
-from dataclasses import dataclass
-from importlib.metadata import PackageNotFoundError, version
-
-from opentelemetry import _logs as otel_logs
-from opentelemetry import metrics as otel_metrics
-from opentelemetry import trace
-from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
-from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
-from opentelemetry.sdk._logs.export import ConsoleLogExporter as _ConsoleLogExporter
-
-# Newer SDKs renamed ConsoleLogExporter to ConsoleLogRecordExporter; tolerate both.
-try:
-    from opentelemetry.sdk._logs.export import (
-        ConsoleLogRecordExporter as ConsoleLogExporter,  # type: ignore[import-not-found]
-    )
-except ImportError:
-    ConsoleLogExporter = _ConsoleLogExporter  # type: ignore[assignment, misc]
-from opentelemetry.sdk.metrics import MeterProvider
-from opentelemetry.sdk.metrics.export import (
-    ConsoleMetricExporter,
-    PeriodicExportingMetricReader,
-)
-from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import (
-    BatchSpanProcessor,
-    ConsoleSpanExporter,
-    SimpleSpanProcessor,
-)
-
-try:
-    from opentelemetry.exporter.otlp.proto.grpc._log_exporter import (  # type: ignore[import-not-found]
-        OTLPLogExporter,
-    )
-    from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (  # type: ignore[import-not-found]
-        OTLPMetricExporter,
-    )
-    from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (  # type: ignore[import-not-found]
-        OTLPSpanExporter,
-    )
-except ImportError:  # pragma: no cover - optional dep
-    OTLPLogExporter = None  # type: ignore[assignment,misc]
-    OTLPMetricExporter = None  # type: ignore[assignment,misc]
-    OTLPSpanExporter = None  # type: ignore[assignment,misc]
-
-logger = logging.getLogger(__name__)
-
-_FIREFLY_LOGGER_NAME = "fireflyframework_agentic"
-
-
-def _service_version() -> str:
-    """Return the installed package version for the OTel ``service.version``
-    resource attribute. Falls back to ``"unknown"`` if the package is not
-    installed (e.g. running from a source checkout without ``uv sync``).
-    """
-    try:
-        return version("fireflyframework-agentic")
-    except PackageNotFoundError:
-        return "unknown"
-
-
-@dataclass(frozen=True)
-class ProviderBundle:
-    """The three OTel providers configured by :func:`configure_exporters`.
-
-    Returned so callers can attach extra processors, but the providers are
-    also registered globally so ``trace.get_tracer(...)`` / ``metrics.get_meter(...)``
-    pick them up automatically.
-
-    Use the ``tracer`` attribute to reach the ``TracerProvider`` directly, e.g.
-    ``configure_exporters(...).tracer.add_span_processor(...)``.
-    """
-
-    tracer: TracerProvider
-    meter: MeterProvider
-    log: LoggerProvider
-
-
-class _ConfigState:
-    """Module-level guard so repeat calls don't double-register exporters or
-    stack a second LoggingHandler on the firefly logger.
-
-    Encapsulated as a class rather than two ``module-level`` globals so
-    static analysers (CodeQL, the github-code-quality bot) correctly see
-    the read-then-write pattern as a real read-after-write rather than
-    incorrectly flagging the writes as "unused global variables". Same
-    runtime behaviour as the previous ``_configured_signature`` and
-    ``_logging_handler_installed`` globals.
-    """
-
-    signature: tuple[object, ...] | None = None
-    handler: LoggingHandler | None = None
-
-
-_state = _ConfigState()
-
-
-def configure_exporters(
-    *,
-    service_name: str = _FIREFLY_LOGGER_NAME,
-    otlp_endpoint: str | None = None,
-    console: bool = False,
-    metric_export_interval_ms: int = 60_000,
-) -> ProviderBundle:
-    """Set up trace, metric, and log providers with the requested exporters.
-
-    Parameters:
-        service_name: OTel ``service.name`` resource attribute.
-        otlp_endpoint: When set, attaches gRPC OTLP exporters for traces,
-            metrics, and logs. Vendor-neutral path (Jaeger, Tempo, ADOT, ...).
-        console: When *True*, attaches console exporters for all three signal
-            types. Useful for local development.
-        metric_export_interval_ms: How often the metric reader flushes
-            histograms and counters. Default 60s.
-
-    Returns:
-        A :class:`ProviderBundle` exposing the three providers. The providers
-        are also registered globally; in most cases callers do not need to
-        touch the bundle.
-
-    Notes:
-        - **Idempotent**: repeat calls with identical effective configuration
-          are a no-op. The kwargs become a signature tuple keyed against
-          ``_state.signature``.
-        - A :class:`LoggingHandler` is attached to the
-          ``fireflyframework_agentic`` parent logger so both
-          ``logger.info(...)`` calls and :class:`FireflyEvents` payloads are
-          delivered through OTel logs to whichever exporter is active.
-    """
-    signature = (
-        service_name,
-        otlp_endpoint,
-        console,
-        metric_export_interval_ms,
-    )
-    if _state.signature == signature:
-        # Nothing to do; existing providers already serve get_tracer/get_meter.
-        return ProviderBundle(
-            tracer=trace.get_tracer_provider(),  # type: ignore[return-value]
-            meter=otel_metrics.get_meter_provider(),  # type: ignore[return-value]
-            log=otel_logs.get_logger_provider(),  # type: ignore[return-value]
-        )
-
-    resource = Resource.create(
-        {
-            "service.name": service_name,
-            "service.version": _service_version(),
-            "service.instance.id": f"{socket.gethostname()}-{uuid.uuid4().hex[:8]}",
-        }
-    )
-
-    tracer_provider = TracerProvider(resource=resource)
-    metric_readers: list[PeriodicExportingMetricReader] = []
-    logger_provider = LoggerProvider(resource=resource)
-
-    # ── Console exporters ──────────────────────────────────────────────────
-    if console:
-        tracer_provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter()))
-        metric_readers.append(
-            PeriodicExportingMetricReader(
-                ConsoleMetricExporter(),
-                export_interval_millis=metric_export_interval_ms,
-            )
-        )
-        logger_provider.add_log_record_processor(BatchLogRecordProcessor(ConsoleLogExporter()))
-        logger.info("Console exporters attached")
-
-    # ── OTLP exporters (Jaeger, Tempo, ADOT, generic collectors) ──────────
-    if otlp_endpoint:
-        if OTLPSpanExporter is None or OTLPMetricExporter is None or OTLPLogExporter is None:
-            logger.warning(
-                "opentelemetry-exporter-otlp-proto-grpc is not installed; "
-                "OTLP export disabled. Install the package to enable it."
-            )
-        else:
-            tracer_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(endpoint=otlp_endpoint)))
-            metric_readers.append(
-                PeriodicExportingMetricReader(
-                    OTLPMetricExporter(endpoint=otlp_endpoint),
-                    export_interval_millis=metric_export_interval_ms,
-                )
-            )
-            logger_provider.add_log_record_processor(BatchLogRecordProcessor(OTLPLogExporter(endpoint=otlp_endpoint)))
-            logger.info("OTLP exporters attached: %s", otlp_endpoint)
-
-    # MeterProvider takes its readers up front, not via add_*().
-    meter_provider = MeterProvider(resource=resource, metric_readers=metric_readers)
-
-    trace.set_tracer_provider(tracer_provider)
-    otel_metrics.set_meter_provider(meter_provider)
-    otel_logs.set_logger_provider(logger_provider)
-
-    # Bridge Python logging -> OTel logs once; replace any prior handler we
-    # installed so repeat calls with new providers point at the new ones.
-    firefly_logger = logging.getLogger(_FIREFLY_LOGGER_NAME)
-    if _state.handler is not None:
-        firefly_logger.removeHandler(_state.handler)
-    handler = LoggingHandler(level=logging.INFO, logger_provider=logger_provider)
-    firefly_logger.addHandler(handler)
-    _state.handler = handler
-
-    _state.signature = signature
-    return ProviderBundle(tracer=tracer_provider, meter=meter_provider, log=logger_provider)
diff --git a/fireflyframework_agentic/observability/sinks.py b/fireflyframework_agentic/observability/sinks.py
index 564e4ed5..202916e1 100644
--- a/fireflyframework_agentic/observability/sinks.py
+++ b/fireflyframework_agentic/observability/sinks.py
@@ -7,14 +7,9 @@
 
 import logging
 import threading
-import time
-from collections.abc import Callable
 from datetime import UTC, datetime
 from pathlib import Path
-from queue import Empty, Queue
-from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
-
-import httpx
+from typing import TYPE_CHECKING, Protocol, runtime_checkable
 
 from fireflyframework_agentic.observability.events import default_events
 from fireflyframework_agentic.observability.metrics import default_metrics
@@ -138,113 +133,3 @@ def _maybe_rotate(self, incoming_bytes: int) -> None:
 
     def flush(self) -> None: ...
     def close(self) -> None: ...
-
-
-def _default_post(url: str, json: list[dict], headers: dict, timeout: float) -> Any:
-    """Default POST function. Replaced in tests via WebhookSink(_post=...)."""
-    return httpx.post(url, json=json, headers=headers, timeout=timeout)
-
-
-class WebhookSink:
-    """Batch records and POST them to an HTTP endpoint.
-
-    Parameters:
-        url: Endpoint URL.
-        batch_size: Records per POST. Drained sooner on ``flush_interval_s``.
-        flush_interval_s: Background flush cadence in seconds.
-        headers: Extra HTTP headers (Authorization, etc.).
-        max_retries: How many times to retry a 5xx response before dropping.
-        timeout_s: Per-request HTTP timeout.
-        _post: Internal hook for tests.
-    """
-
-    def __init__(
-        self,
-        url: str,
-        *,
-        batch_size: int = 50,
-        flush_interval_s: float = 5.0,
-        headers: dict[str, str] | None = None,
-        max_retries: int = 3,
-        timeout_s: float = 10.0,
-        _post: Callable[[str, list[dict], dict, float], Any] | None = None,
-    ) -> None:
-        self._url = url
-        self._batch_size = batch_size
-        self._interval = flush_interval_s
-        self._headers = headers or {}
-        self._max_retries = max_retries
-        self._timeout = timeout_s
-        self._post = _post or _default_post
-        self._queue: Queue[UsageRecord] = Queue()
-        self._stop = threading.Event()
-        self._thread = threading.Thread(target=self._run, name="WebhookSink", daemon=True)
-        self._thread.start()
-
-    def emit(self, record: UsageRecord) -> None:
-        self._queue.put(record)
-
-    def _run(self) -> None:
-        buf: list[UsageRecord] = []
-        last_flush = time.monotonic()
-        while not self._stop.is_set():
-            try:
-                rec = self._queue.get(timeout=0.1)
-                buf.append(rec)
-            except Empty:
-                pass
-            now = time.monotonic()
-            if len(buf) >= self._batch_size or (buf and now - last_flush >= self._interval):
-                self._send(buf)
-                buf = []
-                last_flush = now
-        # Drain remaining on stop.
-        while True:
-            try:
-                buf.append(self._queue.get_nowait())
-            except Empty:
-                break
-        if buf:
-            self._send(buf)
-
-    def _send(self, batch: list[UsageRecord]) -> None:
-        payload = [r.model_dump(mode="json") for r in batch]
-        delay = 0.1
-        for attempt in range(self._max_retries + 1):
-            try:
-                resp = self._post(self._url, payload, self._headers, self._timeout)
-                status = int(getattr(resp, "status_code", 0))
-                if 200 <= status < 300:
-                    return
-                if 500 <= status < 600 and attempt < self._max_retries:
-                    time.sleep(delay)
-                    delay *= 2
-                    continue
-                logger.warning("WebhookSink: dropping batch (status %d)", status)
-                self._record_sink_error()
-                return
-            except Exception:  # noqa: BLE001
-                if attempt < self._max_retries:
-                    time.sleep(delay)
-                    delay *= 2
-                    continue
-                logger.warning("WebhookSink: dropping batch after exhausted retries", exc_info=True)
-                self._record_sink_error()
-                return
-
-    @staticmethod
-    def _record_sink_error() -> None:
-        try:
-            default_metrics.record_error(operation="cost_sink_errors")
-        except Exception:  # noqa: BLE001
-            logger.debug("Failed to emit cost_sink_errors metric", exc_info=True)
-
-    def flush(self) -> None:
-        """Block until the queue is empty (best-effort)."""
-        while not self._queue.empty():
-            time.sleep(0.01)
-
-    def close(self) -> None:
-        """Stop background thread and drain remaining records."""
-        self._stop.set()
-        self._thread.join(timeout=self._interval + 2.0)
diff --git a/fireflyframework_agentic/observability/tracer.py b/fireflyframework_agentic/observability/tracer.py
index 974439b6..a488fd7c 100644
--- a/fireflyframework_agentic/observability/tracer.py
+++ b/fireflyframework_agentic/observability/tracer.py
@@ -16,26 +16,19 @@
 
 :class:`FireflyTracer` wraps the OpenTelemetry tracer with convenience
 methods for creating agent- and tool-scoped spans.
-
-This module also provides W3C Trace Context propagation utilities for
-distributed tracing across HTTP and queue boundaries.
 """
 
 from __future__ import annotations
 
 from collections.abc import Generator
 from contextlib import contextmanager
-from contextvars import ContextVar
 from typing import Any
 
 from opentelemetry import trace
-from opentelemetry.trace import Span, SpanContext, StatusCode, TraceFlags, Tracer, TraceState
+from opentelemetry.trace import Span, StatusCode, Tracer
 
 _TRACER_NAME = "fireflyframework_agentic"
 
-# Context variable for trace propagation across async boundaries
-_trace_context: ContextVar[SpanContext | None] = ContextVar("trace_context", default=None)
-
 
 class FireflyTracer:
     """High-level tracer that creates spans with Firefly-specific attributes.
@@ -118,183 +111,3 @@ def set_error(span: Span, error: Exception) -> None:
 
 # Module-level default tracer
 default_tracer = FireflyTracer()
-
-
-# -- W3C Trace Context Propagation ------------------------------------------
-
-
-def inject_trace_context(headers: dict[str, str]) -> None:
-    """Inject W3C Trace Context headers into an outgoing request/message.
-
-    This function follows the W3C Trace Context specification to propagate
-    trace information across HTTP and message queue boundaries. It adds
-    ``traceparent`` and ``tracestate`` headers to the provided dictionary.
-
-    Parameters:
-        headers: Dictionary of headers to inject trace context into. Modified in-place.
-
-    Example:
-        Inject trace context for HTTP request::
-
-            headers = {}
-            inject_trace_context(headers)
-            response = await http_client.post(url, headers=headers, ...)
-
-        Inject trace context for Kafka message::
-
-            headers = {}
-            inject_trace_context(headers)
-            await producer.send(
-                topic,
-                value=message,
-                headers=[(k, v.encode()) for k, v in headers.items()]
-            )
-
-    See Also:
-        - https://www.w3.org/TR/trace-context/
-        - :func:`extract_trace_context`
-    """
-    span = trace.get_current_span()
-    if span is None:
-        return
-
-    ctx = span.get_span_context()
-    if not ctx.is_valid:
-        return
-
-    # W3C traceparent header format:
-    # version-trace_id-parent_id-trace_flags
-    # Example: 00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01
-    traceparent = f"00-{ctx.trace_id:032x}-{ctx.span_id:016x}-{ctx.trace_flags:02x}"
-    headers["traceparent"] = traceparent
-
-    # Include tracestate if present
-    if ctx.trace_state:
-        headers["tracestate"] = ctx.trace_state.to_header()
-
-
-def extract_trace_context(headers: dict[str, str]) -> SpanContext | None:
-    """Extract W3C Trace Context from incoming request/message headers.
-
-    This function parses ``traceparent`` and ``tracestate`` headers according
-    to the W3C Trace Context specification and returns a SpanContext that can
-    be used to continue a distributed trace.
-
-    Parameters:
-        headers: Dictionary of headers containing trace context. Keys are
-            case-insensitive (``traceparent`` or ``Traceparent`` both work).
-
-    Returns:
-        SpanContext if valid trace context is found, None otherwise.
-
-    Example:
-        Extract trace context from HTTP request::
-
-            from opentelemetry import trace
-
-            span_context = extract_trace_context(request.headers)
-            if span_context:
-                with trace.use_span(
-                    trace.NonRecordingSpan(span_context),
-                    end_on_exit=False
-                ):
-                    # Your code runs within the distributed trace
-                    await agent.run(prompt)
-
-        Extract trace context from Kafka message::
-
-            headers = {k: v.decode() for k, v in message.headers}
-            span_context = extract_trace_context(headers)
-            if span_context:
-                _trace_context.set(span_context)
-                await process_message(message)
-
-    See Also:
-        - https://www.w3.org/TR/trace-context/
-        - :func:`inject_trace_context`
-    """
-    # Case-insensitive header lookup
-    headers_lower = {k.lower(): v for k, v in headers.items()}
-    traceparent = headers_lower.get("traceparent")
-
-    if not traceparent:
-        return None
-
-    try:
-        # Parse W3C traceparent: version-trace_id-parent_id-trace_flags
-        parts = traceparent.split("-")
-        if len(parts) != 4:
-            return None
-
-        version, trace_id_hex, span_id_hex, flags_hex = parts
-
-        # Only support version 00
-        if version != "00":
-            return None
-
-        trace_id = int(trace_id_hex, 16)
-        span_id = int(span_id_hex, 16)
-        trace_flags = TraceFlags(int(flags_hex, 16))
-
-        # Parse tracestate if present
-        tracestate_header = headers_lower.get("tracestate")
-        trace_state = TraceState.from_header([tracestate_header]) if tracestate_header else None
-
-        return SpanContext(
-            trace_id=trace_id,
-            span_id=span_id,
-            is_remote=True,
-            trace_flags=trace_flags,
-            trace_state=trace_state,
-        )
-    except (ValueError, TypeError):
-        # Invalid trace context format
-        return None
-
-
-def get_trace_context() -> SpanContext | None:
-    """Get the current trace context from the context variable.
-
-    Returns:
-        The active SpanContext, or None if no context is set.
-    """
-    return _trace_context.get()
-
-
-def set_trace_context(context: SpanContext | None) -> None:
-    """Set the trace context in the context variable.
-
-    Parameters:
-        context: SpanContext to set, or None to clear.
-    """
-    _trace_context.set(context)
-
-
-@contextmanager
-def trace_context_scope(context: SpanContext | None) -> Generator[None]:
-    """Context manager that sets trace context for the duration of the scope.
-
-    Parameters:
-        context: SpanContext to use within the scope.
-
-    Example::
-
-        span_context = extract_trace_context(headers)
-        with trace_context_scope(span_context):
-            # All spans created here will be children of the extracted context
-            with default_tracer.agent_span("my_agent"):
-                result = await agent.run(prompt)
-    """
-    token = _trace_context.set(context)
-    try:
-        if context:
-            # Make OpenTelemetry use this context as the parent
-            with trace.use_span(
-                trace.NonRecordingSpan(context),
-                end_on_exit=False,
-            ):
-                yield
-        else:
-            yield
-    finally:
-        _trace_context.reset(token)
diff --git a/fireflyframework_agentic/security/__init__.py b/fireflyframework_agentic/security/__init__.py
index 292343e7..d6e7c6ea 100644
--- a/fireflyframework_agentic/security/__init__.py
+++ b/fireflyframework_agentic/security/__init__.py
@@ -15,7 +15,6 @@
 """Security features for production deployments.
 
 This module provides:
-- **RBAC** (Role-Based Access Control) with JWT authentication
 - **Encryption** for sensitive data at rest
 - **SQL injection** prevention for database tools
 """
@@ -23,7 +22,6 @@
 from fireflyframework_agentic.security.encryption import AESEncryptionProvider, EncryptedMemoryStore, EncryptionProvider
 from fireflyframework_agentic.security.output_guard import OutputGuard, default_output_guard
 from fireflyframework_agentic.security.prompt_guard import PromptGuard, default_prompt_guard
-from fireflyframework_agentic.security.rbac import RBACManager, require_permission
 
 __all__ = [
     "AESEncryptionProvider",
@@ -31,8 +29,6 @@
     "EncryptionProvider",
     "OutputGuard",
     "PromptGuard",
-    "RBACManager",
     "default_output_guard",
     "default_prompt_guard",
-    "require_permission",
 ]
diff --git a/fireflyframework_agentic/security/rbac.py b/fireflyframework_agentic/security/rbac.py
deleted file mode 100644
index b587d536..00000000
--- a/fireflyframework_agentic/security/rbac.py
+++ /dev/null
@@ -1,452 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Role-Based Access Control (RBAC) with JWT authentication.
-
-This module provides a production-ready RBAC system with:
-- JWT token validation
-- Role and permission management
-- Multi-tenant support
-- Decorator-based permission checking
-
-Example:
-    Basic RBAC setup::
-
-        from fireflyframework_agentic.security.rbac import RBACManager, require_permission
-
-        rbac = RBACManager(jwt_secret="your-secret-key")
-
-        # Validate token and check permissions
-        @require_permission("agents.execute", rbac=rbac)
-        async def run_agent(token: str, agent_name: str):
-            # This function only runs if the user has the permission
-            return await agent.run(...)
-
-    Multi-tenant RBAC::
-
-        rbac = RBACManager(jwt_secret="secret", multi_tenant=True)
-
-        # Token includes tenant_id
-        token = rbac.create_token(
-            user_id="user123",
-            roles=["agent_runner"],
-            tenant_id="acme_corp"
-        )
-"""
-
-from __future__ import annotations
-
-import functools
-import inspect
-import logging
-from collections.abc import Callable
-from datetime import UTC, datetime, timedelta
-from typing import Any
-
-try:
-    import jwt
-except ImportError:  # pragma: no cover - optional dep
-    jwt = None  # type: ignore[assignment]
-
-from fireflyframework_agentic.config import get_config
-
-logger = logging.getLogger(__name__)
-
-
-class RBACManager:
-    """Role-Based Access Control manager with JWT support.
-
-    Manages user roles, permissions, and JWT token validation for
-    securing agent execution and API access in production deployments.
-
-    Parameters:
-        jwt_secret: Secret key for JWT token signing and verification.
-        jwt_algorithm: JWT algorithm (default: HS256).
-        token_expiry_hours: Hours until tokens expire.
-        multi_tenant: Whether to enforce tenant isolation.
-        roles: Role-to-permissions mapping.
-
-    Example::
-
-        rbac = RBACManager(
-            jwt_secret="my-secret-key",
-            roles={
-                "admin": ["*"],
-                "agent_runner": ["agents.execute", "agents.list"],
-                "viewer": ["agents.list"],
-            }
-        )
-
-        # Create token
-        token = rbac.create_token(user_id="user123", roles=["agent_runner"])
-
-        # Validate and extract claims
-        claims = rbac.validate_token(token)
-
-        # Check permission
-        if rbac.has_permission(claims, "agents.execute"):
-            # Allow execution
-            pass
-    """
-
-    def __init__(
-        self,
-        jwt_secret: str | None = None,
-        *,
-        jwt_algorithm: str = "HS256",
-        token_expiry_hours: int = 24,
-        multi_tenant: bool = False,
-        roles: dict[str, list[str]] | None = None,
-    ) -> None:
-        # ``jwt_secret`` is optional so that subclasses or callers that only need
-        # the permission/role machinery (e.g. with externally-issued tokens from
-        # an external identity provider) can construct an RBACManager without a
-        # symmetric secret.
-        # ``create_token``/``validate_token`` raise if invoked without one.
-        self._jwt_secret = jwt_secret
-        self._jwt_algorithm = jwt_algorithm
-        self._token_expiry_hours = token_expiry_hours
-        self._multi_tenant = multi_tenant
-
-        # Default role-to-permissions mapping
-        self._roles = roles or {
-            "admin": ["*"],  # Wildcard: all permissions
-            "agent_runner": ["agents.execute", "agents.list", "tools.execute"],
-            "agent_viewer": ["agents.list"],
-            "pipeline_runner": ["pipelines.execute", "pipelines.list"],
-        }
-
-    def create_token(
-        self,
-        user_id: str,
-        roles: list[str],
-        *,
-        tenant_id: str | None = None,
-        custom_claims: dict[str, Any] | None = None,
-    ) -> str:
-        """Create a JWT token with user claims.
-
-        Args:
-            user_id: Unique user identifier.
-            roles: List of role names assigned to the user.
-            tenant_id: Optional tenant ID for multi-tenant deployments.
-            custom_claims: Additional custom claims to include in the token.
-
-        Returns:
-            Signed JWT token string.
-
-        Raises:
-            ImportError: If PyJWT is not installed.
-        """
-        if jwt is None:
-            raise ImportError(
-                "JWT support requires 'pyjwt'. Install with: pip install fireflyframework-agentic[security]"
-            )
-
-        if self._jwt_secret is None:
-            raise ValueError("RBACManager has no jwt_secret; cannot create_token")
-
-        now = datetime.now(UTC)
-        expiry = now + timedelta(hours=self._token_expiry_hours)
-
-        payload = {
-            "sub": user_id,
-            "roles": roles,
-            "iat": now,
-            "exp": expiry,
-        }
-
-        if self._multi_tenant and tenant_id:
-            payload["tenant_id"] = tenant_id
-        elif self._multi_tenant and not tenant_id:
-            raise ValueError("tenant_id is required when multi_tenant is enabled")
-
-        if custom_claims:
-            payload.update(custom_claims)
-
-        token = jwt.encode(payload, self._jwt_secret, algorithm=self._jwt_algorithm)
-        return token
-
-    def validate_token(self, token: str) -> dict[str, Any]:
-        """Validate a JWT token and return its claims.
-
-        Args:
-            token: JWT token string.
-
-        Returns:
-            Dictionary of token claims (user_id, roles, etc.).
-
-        Raises:
-            ValueError: If token is invalid or expired.
-            ImportError: If PyJWT is not installed.
-        """
-        if jwt is None:
-            raise ImportError(
-                "JWT support requires 'pyjwt'. Install with: pip install fireflyframework-agentic[security]"
-            )
-
-        if self._jwt_secret is None:
-            raise ValueError("RBACManager has no jwt_secret; cannot validate HS256 tokens")
-
-        try:
-            payload = jwt.decode(
-                token,
-                self._jwt_secret,
-                algorithms=[self._jwt_algorithm],
-            )
-            return payload
-        except jwt.ExpiredSignatureError as exc:
-            raise ValueError("Token has expired") from exc
-        except jwt.InvalidTokenError as exc:
-            raise ValueError(f"Invalid token: {exc}") from exc
-
-    def has_permission(self, claims: dict[str, Any], permission: str) -> bool:
-        """Check if the user has a specific permission.
-
-        Args:
-            claims: Token claims from validate_token().
-            permission: Permission string to check (e.g., "agents.execute").
-
-        Returns:
-            True if user has the permission, False otherwise.
-        """
-        roles = claims.get("roles", [])
-
-        for role in roles:
-            permissions = self._roles.get(role, [])
-
-            # Wildcard permission grants everything
-            if "*" in permissions:
-                return True
-
-            # Exact match
-            if permission in permissions:
-                return True
-
-            # Prefix match (e.g., "agents.*" grants "agents.execute")
-            for perm in permissions:
-                if perm.endswith(".*"):
-                    prefix = perm[:-2]
-                    if permission.startswith(f"{prefix}."):
-                        return True
-
-        return False
-
-    def check_tenant_access(
-        self,
-        claims: dict[str, Any],
-        tenant_id: str,
-    ) -> bool:
-        """Check if the user has access to a specific tenant.
-
-        Args:
-            claims: Token claims from validate_token().
-            tenant_id: Tenant ID to check access for.
-
-        Returns:
-            True if user has access, False otherwise.
-        """
-        if not self._multi_tenant:
-            return True  # No tenant isolation
-
-        token_tenant = claims.get("tenant_id")
-        return token_tenant == tenant_id
-
-    def get_user_id(self, claims: dict[str, Any]) -> str:
-        """Extract user ID from token claims.
-
-        Args:
-            claims: Token claims from validate_token().
-
-        Returns:
-            User ID string.
-        """
-        return claims.get("sub", "")
-
-    def get_roles(self, claims: dict[str, Any]) -> list[str]:
-        """Extract roles from token claims.
-
-        Args:
-            claims: Token claims from validate_token().
-
-        Returns:
-            List of role names.
-        """
-        return claims.get("roles", [])
-
-    def get_permissions(self, claims: dict[str, Any]) -> list[str]:
-        """Get all permissions for the user based on their roles.
-
-        Args:
-            claims: Token claims from validate_token().
-
-        Returns:
-            List of all permissions granted to the user.
-        """
-        roles = self.get_roles(claims)
-        permissions = set()
-
-        for role in roles:
-            role_perms = self._roles.get(role, [])
-            permissions.update(role_perms)
-
-        return list(permissions)
-
-
-def require_permission(
-    permission: str,
-    *,
-    rbac: RBACManager | None = None,
-    token_param: str = "token",
-) -> Callable:
-    """Decorator to require a specific permission for a function.
-
-    Args:
-        permission: Required permission string.
-        rbac: RBACManager instance. If None, uses default from config.
-        token_param: Name of the function parameter containing the JWT token.
-
-    Returns:
-        Decorator function.
-
-    Example::
-
-        @require_permission("agents.execute")
-        async def run_agent(token: str, agent_name: str, prompt: str):
-            # This only runs if token has "agents.execute" permission
-            return await agent.run(prompt)
-
-        # Usage
-        try:
-            result = await run_agent(
-                token="eyJ...",
-                agent_name="my_agent",
-                prompt="Hello"
-            )
-        except ValueError:
-            print("Permission denied or invalid token")
-    """
-
-    def decorator(func: Callable) -> Callable:
-        @functools.wraps(func)
-        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
-            # Get RBAC manager
-            manager = rbac or _get_default_rbac()
-
-            if manager is None:
-                raise ValueError(
-                    "No RBAC manager configured. Set FIREFLY_AGENTIC_RBAC_ENABLED=true "
-                    "and FIREFLY_AGENTIC_RBAC_JWT_SECRET in environment."
-                )
-
-            # Extract token from args/kwargs using signature binding
-            try:
-                sig = inspect.signature(func)
-                bound = sig.bind(*args, **kwargs)
-                bound.apply_defaults()
-                token = bound.arguments.get(token_param)
-            except TypeError as exc:
-                raise ValueError(f"Missing required parameter: {token_param}") from exc
-            if not token:
-                raise ValueError(f"Missing required parameter: {token_param}")
-
-            # Validate token and check permission
-            try:
-                claims = manager.validate_token(token)
-            except ValueError as exc:
-                logger.warning("Token validation failed: %s", exc)
-                raise
-
-            if not manager.has_permission(claims, permission):
-                user_id = manager.get_user_id(claims)
-                roles = manager.get_roles(claims)
-                logger.warning(
-                    "Permission denied: user=%s, roles=%s, required=%s",
-                    user_id,
-                    roles,
-                    permission,
-                )
-                raise ValueError(f"Permission denied: {permission}")
-
-            # Call the original function
-            return await func(*args, **kwargs)
-
-        @functools.wraps(func)
-        def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
-            # Get RBAC manager
-            manager = rbac or _get_default_rbac()
-
-            if manager is None:
-                raise ValueError(
-                    "No RBAC manager configured. Set FIREFLY_AGENTIC_RBAC_ENABLED=true "
-                    "and FIREFLY_AGENTIC_RBAC_JWT_SECRET in environment."
-                )
-
-            # Extract token from args/kwargs
-            try:
-                sig = inspect.signature(func)
-                bound = sig.bind(*args, **kwargs)
-                bound.apply_defaults()
-                token = bound.arguments.get(token_param)
-            except TypeError as exc:
-                raise ValueError(f"Missing required parameter: {token_param}") from exc
-            if not token:
-                raise ValueError(f"Missing required parameter: {token_param}")
-
-            # Validate token and check permission
-            try:
-                claims = manager.validate_token(token)
-            except ValueError as exc:
-                logger.warning("Token validation failed: %s", exc)
-                raise
-
-            if not manager.has_permission(claims, permission):
-                user_id = manager.get_user_id(claims)
-                roles = manager.get_roles(claims)
-                logger.warning(
-                    "Permission denied: user=%s, roles=%s, required=%s",
-                    user_id,
-                    roles,
-                    permission,
-                )
-                raise ValueError(f"Permission denied: {permission}")
-
-            # Call the original function
-            return func(*args, **kwargs)
-
-        # Return appropriate wrapper based on function type
-        if inspect.iscoroutinefunction(func):
-            return async_wrapper
-        return sync_wrapper
-
-    return decorator
-
-
-def _get_default_rbac() -> RBACManager | None:
-    """Get the default RBAC manager from configuration."""
-    try:
-        cfg = get_config()
-        if not cfg.rbac_enabled or not cfg.rbac_jwt_secret:
-            return None
-
-        return RBACManager(
-            jwt_secret=cfg.rbac_jwt_secret,
-            multi_tenant=cfg.rbac_multi_tenant,
-        )
-    except Exception:  # noqa: BLE001
-        return None
-
-
-# Module-level default instance
-default_rbac: RBACManager | None = _get_default_rbac()
diff --git a/fireflyframework_agentic/vectorstores/pgvector_store.py b/fireflyframework_agentic/vectorstores/pgvector_store.py
index 7f6a5882..76dd51c4 100644
--- a/fireflyframework_agentic/vectorstores/pgvector_store.py
+++ b/fireflyframework_agentic/vectorstores/pgvector_store.py
@@ -174,9 +174,7 @@ async def _upsert(self, documents: list[VectorDocument], namespace: str) -> None
         for doc in documents:
             if doc.embedding is None:
                 raise VectorStoreError(f"VectorDocument {doc.id!r} has no embedding; pgvector requires one.")
-            rows.append(
-                (doc.id, namespace, _vector_literal(doc.embedding), doc.text, json.dumps(doc.metadata))
-            )
+            rows.append((doc.id, namespace, _vector_literal(doc.embedding), doc.text, json.dumps(doc.metadata)))
         if not rows:
             return
         pool = await self._ensure_pool()
diff --git a/fireflyframework_agentic/vectorstores/scoped.py b/fireflyframework_agentic/vectorstores/scoped.py
index 6261a2f0..100ac90a 100644
--- a/fireflyframework_agentic/vectorstores/scoped.py
+++ b/fireflyframework_agentic/vectorstores/scoped.py
@@ -64,9 +64,7 @@ def parse_scope_namespace(namespace: str) -> tuple[str, str]:
     """
     parts = namespace.split("/")
     if len(parts) != 4 or parts[0] != "t" or parts[2] != "w" or not parts[1] or not parts[3]:
-        raise ValueError(
-            f"not a scope namespace: {namespace!r}; expected 't/<tenant_id>/w/<workspace_id>'"
-        )
+        raise ValueError(f"not a scope namespace: {namespace!r}; expected 't/<tenant_id>/w/<workspace_id>'")
     return parts[1], parts[3]
 
 
@@ -80,9 +78,7 @@ class ScopedVectorStore(Protocol):
     can never be lost silently.
     """
 
-    async def upsert(
-        self, documents: list[VectorDocument], *, tenant_id: str, workspace_id: str
-    ) -> None: ...
+    async def upsert(self, documents: list[VectorDocument], *, tenant_id: str, workspace_id: str) -> None: ...
 
     async def search(
         self,
@@ -119,9 +115,7 @@ def __init__(self, inner: VectorStoreProtocol, *, stamp_metadata: bool = True) -
         self._inner = inner
         self._stamp_metadata = stamp_metadata
 
-    async def upsert(
-        self, documents: list[VectorDocument], *, tenant_id: str, workspace_id: str
-    ) -> None:
+    async def upsert(self, documents: list[VectorDocument], *, tenant_id: str, workspace_id: str) -> None:
         namespace = scope_namespace(tenant_id, workspace_id)
         scoped_docs = [self._scope_document(doc, tenant_id, workspace_id, namespace) for doc in documents]
         await self._inner.upsert(scoped_docs, namespace=namespace)
@@ -159,9 +153,7 @@ async def close(self) -> None:
         if close_fn is not None:
             await close_fn()
 
-    def _scope_document(
-        self, doc: VectorDocument, tenant_id: str, workspace_id: str, namespace: str
-    ) -> VectorDocument:
+    def _scope_document(self, doc: VectorDocument, tenant_id: str, workspace_id: str, namespace: str) -> VectorDocument:
         metadata = dict(doc.metadata)
         if self._stamp_metadata:
             metadata["tenant_id"] = tenant_id
diff --git a/install.ps1 b/install.ps1
index 0af9d7b6..a7abb981 100644
--- a/install.ps1
+++ b/install.ps1
@@ -26,7 +26,7 @@
     Run in non-interactive mode with default options.
 
 .PARAMETER Extras
-    Optional extras to install (rest, kafka, rabbitmq, redis, queues, all).
+    Optional extras to install (all).
 
 .EXAMPLE
     .\install.ps1
@@ -37,7 +37,7 @@
 [CmdletBinding()]
 param(
     [switch]$NonInteractive,
-    [ValidateSet("", "rest", "kafka", "rabbitmq", "redis", "queues", "all")]
+    [ValidateSet("", "all")]
     [string]$Extras = ""
 )
 
@@ -286,23 +286,13 @@ function Select-Extras {
 
     $options = @(
         "Core only (no optional dependencies)",
-        "REST API (FastAPI + Uvicorn + SSE)",
-        "Kafka (aiokafka)",
-        "RabbitMQ (aio-pika)",
-        "Redis (redis-py)",
-        "All queues (Kafka + RabbitMQ + Redis)",
-        "Everything (REST + all queues + costs)"
+        "Everything (all optional dependencies)"
     )
 
     $choice = Read-Choice "Choose a configuration:" $options
 
     switch ($choice) {
-        2 { $script:SelectedExtras = "rest" }
-        3 { $script:SelectedExtras = "kafka" }
-        4 { $script:SelectedExtras = "rabbitmq" }
-        5 { $script:SelectedExtras = "redis" }
-        6 { $script:SelectedExtras = "queues" }
-        7 { $script:SelectedExtras = "all" }
+        2 { $script:SelectedExtras = "all" }
         default { $script:SelectedExtras = "" }
     }
 
diff --git a/install.sh b/install.sh
index 717bc5c5..74331e26 100755
--- a/install.sh
+++ b/install.sh
@@ -484,29 +484,19 @@ step_check_tools() {
 step_select_extras() {
     step_header 4 "Extras Selection"
 
-    info "Optional components add REST API and message queue support."
+    info "Optional components add embeddings, vector stores, and storage backends."
     printf "\n"
 
     local options=(
         "Core only      ${DIM}— no optional dependencies${RESET}"
-        "REST API       ${DIM}— FastAPI + Uvicorn + SSE streaming${RESET}"
-        "Kafka          ${DIM}— aiokafka (Apache Kafka)${RESET}"
-        "RabbitMQ       ${DIM}— aio-pika (AMQP)${RESET}"
-        "Redis          ${DIM}— redis-py (Pub/Sub)${RESET}"
-        "All queues     ${DIM}— Kafka + RabbitMQ + Redis${RESET}"
-        "Everything     ${DIM}— REST + all queues + costs${RESET}"
+        "Everything     ${DIM}— all optional dependencies${RESET}"
     )
 
     local choice
     choice="$(prompt_choice "Select a configuration:" "${options[@]}")"
 
     case "$choice" in
-        2) EXTRAS="rest" ;;
-        3) EXTRAS="kafka" ;;
-        4) EXTRAS="rabbitmq" ;;
-        5) EXTRAS="redis" ;;
-        6) EXTRAS="queues" ;;
-        7) EXTRAS="all" ;;
+        2) EXTRAS="all" ;;
         *) EXTRAS="" ;;
     esac
 
diff --git a/pyproject.toml b/pyproject.toml
index d7da4304..e575323e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "fireflyframework-agentic"
-version = "26.05.32"
+version = "26.05.33"
 description = "A GenAI metaframework built on Pydantic AI for building production-grade GenAI applications with agents, reasoning patterns, prompt engineering, observability, and more."
 readme = "README.md"
 license = { text = "Apache-2.0" }
@@ -45,23 +45,6 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-rest = [
-    "fastapi>=0.115.0",
-    "uvicorn[standard]>=0.34.0",
-    "sse-starlette>=2.0.0",
-]
-kafka = [
-    "aiokafka>=0.12.0",
-]
-rabbitmq = [
-    "aio-pika>=9.5.0",
-]
-redis = [
-    "redis>=5.2.0",
-]
-queues = [
-    "fireflyframework-agentic[kafka,rabbitmq,redis]",
-]
 postgres = [
     "asyncpg>=0.30.0",
     "sqlalchemy>=2.0.0",
@@ -71,7 +54,6 @@ mongodb = [
     "pymongo>=4.10.0",
 ]
 security = [
-    "pyjwt>=2.10.0",
     "cryptography>=44.0.0",
 ]
 embeddings = [
@@ -135,7 +117,7 @@ binary = [
     "extract-msg>=0.51",
 ]
 all = [
-    "fireflyframework-agentic[rest,queues,postgres,mongodb,security,embeddings,openai-embeddings,cohere-embeddings,google-embeddings,mistral-embeddings,voyage-embeddings,azure-embeddings,bedrock-embeddings,ollama-embeddings,vectorstores-chroma,vectorstores-pinecone,vectorstores-qdrant,vectorstores-pgvector,vectorstores-sqlite-vec,watch,binary]",
+    "fireflyframework-agentic[postgres,mongodb,security,embeddings,openai-embeddings,cohere-embeddings,google-embeddings,mistral-embeddings,voyage-embeddings,azure-embeddings,bedrock-embeddings,ollama-embeddings,vectorstores-chroma,vectorstores-pinecone,vectorstores-qdrant,vectorstores-pgvector,vectorstores-sqlite-vec,watch,binary]",
 ]
 dev = [
     "pytest>=8.3.0",
@@ -173,7 +155,6 @@ select = ["E", "F", "W", "I", "N", "UP", "B", "SIM", "TC", "PLC0415"]
 ignore = ["E501", "TC001", "TC002", "TC003", "UP040", "UP046", "B008"]
 
 [tool.ruff.lint.per-file-ignores]
-"fireflyframework_agentic/exposure/queues/__init__.py" = ["PLC0415"]
 # content.binary lazy-imports its optional heavy deps (pypdf, Pillow,
 # pillow-heif, cairosvg, py7zr, extract-msg, httpx) inside the handlers so the
 # module imports cleanly without the ``[binary]`` extra; the deferred import
diff --git a/tests/security/test_rbac.py b/tests/security/test_rbac.py
deleted file mode 100644
index 6cf32388..00000000
--- a/tests/security/test_rbac.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Unit tests for RBAC (Role-Based Access Control)."""
-
-from __future__ import annotations
-
-import pytest
-
-# Check if JWT is available
-pytest.importorskip("jwt", reason="JWT tests require pyjwt")
-
-from fireflyframework_agentic.security.rbac import RBACManager, require_permission
-
-
-class TestRBACManager:
-    """Test suite for RBACManager."""
-
-    def test_create_and_validate_token(self):
-        """Test creating and validating JWT tokens."""
-        rbac = RBACManager(jwt_secret="test-secret")
-
-        token = rbac.create_token(user_id="user123", roles=["agent_runner"])
-        claims = rbac.validate_token(token)
-
-        assert claims["sub"] == "user123"
-        assert claims["roles"] == ["agent_runner"]
-
-    def test_invalid_token(self):
-        """Test that invalid tokens are rejected."""
-        rbac = RBACManager(jwt_secret="test-secret")
-
-        with pytest.raises(ValueError, match="Invalid token"):
-            rbac.validate_token("invalid.token.here")
-
-    def test_wrong_secret(self):
-        """Test that tokens signed with different secret are rejected."""
-        rbac1 = RBACManager(jwt_secret="secret1")
-        rbac2 = RBACManager(jwt_secret="secret2")
-
-        token = rbac1.create_token(user_id="user123", roles=["admin"])
-
-        with pytest.raises(ValueError, match="Invalid token"):
-            rbac2.validate_token(token)
-
-    def test_has_permission_exact_match(self):
-        """Test permission checking with exact match."""
-        rbac = RBACManager(
-            jwt_secret="test-secret",
-            roles={
-                "agent_runner": ["agents.execute", "agents.list"],
-                "viewer": ["agents.list"],
-            },
-        )
-
-        token = rbac.create_token(user_id="user123", roles=["agent_runner"])
-        claims = rbac.validate_token(token)
-
-        assert rbac.has_permission(claims, "agents.execute")
-        assert rbac.has_permission(claims, "agents.list")
-        assert not rbac.has_permission(claims, "pipelines.execute")
-
-    def test_has_permission_wildcard(self):
-        """Test that wildcard permission grants everything."""
-        rbac = RBACManager(jwt_secret="test-secret", roles={"admin": ["*"]})
-
-        token = rbac.create_token(user_id="admin", roles=["admin"])
-        claims = rbac.validate_token(token)
-
-        assert rbac.has_permission(claims, "agents.execute")
-        assert rbac.has_permission(claims, "anything.anything")
-        assert rbac.has_permission(claims, "foo.bar.baz")
-
-    def test_has_permission_prefix_match(self):
-        """Test permission checking with prefix wildcard."""
-        rbac = RBACManager(jwt_secret="test-secret", roles={"agent_admin": ["agents.*"]})
-
-        token = rbac.create_token(user_id="user123", roles=["agent_admin"])
-        claims = rbac.validate_token(token)
-
-        assert rbac.has_permission(claims, "agents.execute")
-        assert rbac.has_permission(claims, "agents.list")
-        assert rbac.has_permission(claims, "agents.delete")
-        assert not rbac.has_permission(claims, "pipelines.execute")
-
-    def test_has_permission_no_role(self):
-        """Test that users without matching roles have no permissions."""
-        rbac = RBACManager(jwt_secret="test-secret", roles={"admin": ["*"]})
-
-        token = rbac.create_token(user_id="user123", roles=["unknown_role"])
-        claims = rbac.validate_token(token)
-
-        assert not rbac.has_permission(claims, "agents.execute")
-
-    def test_multi_tenant_token(self):
-        """Test multi-tenant token creation and validation."""
-        rbac = RBACManager(jwt_secret="test-secret", multi_tenant=True)
-
-        token = rbac.create_token(user_id="user123", roles=["agent_runner"], tenant_id="acme_corp")
-        claims = rbac.validate_token(token)
-
-        assert claims["tenant_id"] == "acme_corp"
-
-    def test_multi_tenant_requires_tenant_id(self):
-        """Test that multi-tenant mode requires tenant_id."""
-        rbac = RBACManager(jwt_secret="test-secret", multi_tenant=True)
-
-        with pytest.raises(ValueError, match="tenant_id is required"):
-            rbac.create_token(user_id="user123", roles=["admin"])
-
-    def test_check_tenant_access(self):
-        """Test tenant access checking."""
-        rbac = RBACManager(jwt_secret="test-secret", multi_tenant=True)
-
-        token = rbac.create_token(user_id="user123", roles=["admin"], tenant_id="tenant_a")
-        claims = rbac.validate_token(token)
-
-        assert rbac.check_tenant_access(claims, "tenant_a")
-        assert not rbac.check_tenant_access(claims, "tenant_b")
-
-    def test_custom_claims(self):
-        """Test adding custom claims to tokens."""
-        rbac = RBACManager(jwt_secret="test-secret")
-
-        token = rbac.create_token(
-            user_id="user123", roles=["admin"], custom_claims={"department": "engineering", "level": 5}
-        )
-        claims = rbac.validate_token(token)
-
-        assert claims["department"] == "engineering"
-        assert claims["level"] == 5
-
-    def test_get_user_id(self):
-        """Test extracting user ID from claims."""
-        rbac = RBACManager(jwt_secret="test-secret")
-
-        token = rbac.create_token(user_id="user123", roles=["admin"])
-        claims = rbac.validate_token(token)
-
-        assert rbac.get_user_id(claims) == "user123"
-
-    def test_get_roles(self):
-        """Test extracting roles from claims."""
-        rbac = RBACManager(jwt_secret="test-secret")
-
-        token = rbac.create_token(user_id="user123", roles=["admin", "agent_runner"])
-        claims = rbac.validate_token(token)
-
-        assert rbac.get_roles(claims) == ["admin", "agent_runner"]
-
-    def test_get_permissions(self):
-        """Test getting all permissions for a user."""
-        rbac = RBACManager(
-            jwt_secret="test-secret",
-            roles={
-                "agent_runner": ["agents.execute", "agents.list"],
-                "pipeline_runner": ["pipelines.execute"],
-            },
-        )
-
-        token = rbac.create_token(user_id="user123", roles=["agent_runner", "pipeline_runner"])
-        claims = rbac.validate_token(token)
-
-        permissions = rbac.get_permissions(claims)
-        assert "agents.execute" in permissions
-        assert "agents.list" in permissions
-        assert "pipelines.execute" in permissions
-
-
-@pytest.mark.asyncio
-class TestRequirePermissionDecorator:
-    """Test suite for @require_permission decorator."""
-
-    async def test_decorator_allows_with_permission(self):
-        """Test that decorator allows function when permission is granted."""
-        rbac = RBACManager(jwt_secret="test-secret", roles={"agent_runner": ["agents.execute"]})
-
-        @require_permission("agents.execute", rbac=rbac)
-        async def protected_function(token: str, data: str) -> str:
-            return f"Success: {data}"
-
-        token = rbac.create_token(user_id="user123", roles=["agent_runner"])
-        result = await protected_function(token=token, data="test")
-
-        assert result == "Success: test"
-
-    async def test_decorator_denies_without_permission(self):
-        """Test that decorator blocks function when permission is missing."""
-        rbac = RBACManager(jwt_secret="test-secret", roles={"viewer": ["agents.list"]})
-
-        @require_permission("agents.execute", rbac=rbac)
-        async def protected_function(token: str, data: str) -> str:
-            return f"Success: {data}"
-
-        token = rbac.create_token(user_id="user123", roles=["viewer"])
-
-        with pytest.raises(ValueError, match="Permission denied"):
-            await protected_function(token=token, data="test")
-
-    async def test_decorator_rejects_invalid_token(self):
-        """Test that decorator rejects invalid tokens."""
-        rbac = RBACManager(jwt_secret="test-secret")
-
-        @require_permission("agents.execute", rbac=rbac)
-        async def protected_function(token: str) -> str:
-            return "Success"
-
-        with pytest.raises(ValueError, match="Invalid token"):
-            await protected_function(token="invalid.token")
-
-    async def test_decorator_requires_token_parameter(self):
-        """Test that decorator requires token parameter."""
-        rbac = RBACManager(jwt_secret="test-secret")
-
-        @require_permission("agents.execute", rbac=rbac)
-        async def protected_function(token: str) -> str:
-            return "Success"
-
-        with pytest.raises(ValueError, match="Missing required parameter: token"):
-            await protected_function()
-
-    def test_decorator_with_sync_function(self):
-        """Test that decorator works with synchronous functions."""
-        rbac = RBACManager(jwt_secret="test-secret", roles={"admin": ["*"]})
-
-        @require_permission("agents.execute", rbac=rbac)
-        def protected_sync_function(token: str, data: str) -> str:
-            return f"Sync success: {data}"
-
-        token = rbac.create_token(user_id="admin", roles=["admin"])
-        result = protected_sync_function(token=token, data="test")
-
-        assert result == "Sync success: test"
diff --git a/tests/unit/core/test_config.py b/tests/unit/core/test_config.py
index a1cc2cc5..f0005c78 100644
--- a/tests/unit/core/test_config.py
+++ b/tests/unit/core/test_config.py
@@ -29,35 +29,35 @@ def test_default_config_is_valid(self) -> None:
         assert cfg.qos_consistency_runs >= 2
 
     def test_removed_cost_calculator_field_raises(self) -> None:
-        with pytest.raises(ValidationError, match="Removed cost-tracking config fields"):
+        with pytest.raises(ValidationError, match="Removed config fields"):
             FireflyAgenticConfig(cost_calculator="auto")
 
     def test_removed_budget_alert_threshold_field_raises(self) -> None:
-        with pytest.raises(ValidationError, match="Removed cost-tracking config fields"):
+        with pytest.raises(ValidationError, match="Removed config fields"):
             FireflyAgenticConfig(budget_alert_threshold_usd=5.0)
 
+    def test_removed_auth_api_keys_field_raises(self) -> None:
+        with pytest.raises(ValidationError, match="Removed config fields"):
+            FireflyAgenticConfig(auth_api_keys=["key1"])
 
-class TestConfigAuthAndUsageFields:
-    def test_auth_api_keys_default(self) -> None:
-        cfg = FireflyAgenticConfig()
-        assert cfg.auth_api_keys is None
+    def test_removed_auth_bearer_tokens_field_raises(self) -> None:
+        with pytest.raises(ValidationError, match="Removed config fields"):
+            FireflyAgenticConfig(auth_bearer_tokens=["tok1"])
+
+    def test_removed_cors_allowed_origins_field_raises(self) -> None:
+        with pytest.raises(ValidationError, match="Removed config fields"):
+            FireflyAgenticConfig(cors_allowed_origins=["https://app.example.com"])
 
-    def test_auth_bearer_tokens_default(self) -> None:
-        cfg = FireflyAgenticConfig()
-        assert cfg.auth_bearer_tokens is None
 
+class TestConfigUsageFields:
     def test_usage_tracker_max_records_default(self) -> None:
         cfg = FireflyAgenticConfig()
         assert cfg.usage_tracker_max_records == 10_000
 
     def test_custom_values(self) -> None:
         cfg = FireflyAgenticConfig(
-            auth_api_keys=["key1"],
-            auth_bearer_tokens=["tok1"],
             usage_tracker_max_records=500,
         )
-        assert cfg.auth_api_keys == ["key1"]
-        assert cfg.auth_bearer_tokens == ["tok1"]
         assert cfg.usage_tracker_max_records == 500
 
 
diff --git a/tests/unit/exposure/__init__.py b/tests/unit/exposure/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/unit/exposure/test_queues.py b/tests/unit/exposure/test_queues.py
deleted file mode 100644
index 2e45d32a..00000000
--- a/tests/unit/exposure/test_queues.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for queue exposure."""
-
-from __future__ import annotations
-
-import pytest
-
-from fireflyframework_agentic.exceptions import ExposureError
-from fireflyframework_agentic.exposure.queues.base import QueueMessage
-from fireflyframework_agentic.exposure.queues.router import QueueRouter
-
-
-class TestQueueMessage:
-    def test_message_creation(self):
-        msg = QueueMessage(body="hello", routing_key="test.key")
-        assert msg.body == "hello"
-        assert msg.routing_key == "test.key"
-
-    def test_message_defaults(self):
-        msg = QueueMessage(body="hi")
-        assert msg.headers == {}
-        assert msg.routing_key == ""
-        assert msg.reply_to == ""
-
-
-class TestQueueRouter:
-    def test_add_route(self):
-        router = QueueRouter()
-        router.add_route(r"test\..*", "test_agent")
-        assert len(router._routes) == 1
-
-    def test_resolve_matches_pattern(self):
-        router = QueueRouter()
-        router.add_route(r"summary\..*", "summariser")
-        assert router._resolve("summary.en") == "summariser"
-
-    def test_resolve_default_agent(self):
-        router = QueueRouter(default_agent="fallback")
-        assert router._resolve("unknown.key") == "fallback"
-
-    def test_resolve_no_match_no_default_raises(self):
-        router = QueueRouter()
-        with pytest.raises(ExposureError):
-            router._resolve("unknown.key")
diff --git a/tests/unit/exposure/test_rate_limit.py b/tests/unit/exposure/test_rate_limit.py
deleted file mode 100644
index a176b6a5..00000000
--- a/tests/unit/exposure/test_rate_limit.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""Tests for exposure/rest/middleware.py rate limiting."""
-
-from __future__ import annotations
-
-from fireflyframework_agentic.exposure.rest.middleware import RateLimiter
-
-
-class TestRateLimiter:
-    def test_allows_under_limit(self) -> None:
-        limiter = RateLimiter(max_requests=3, window_seconds=10)
-        assert limiter.is_allowed("client1") is True
-        assert limiter.is_allowed("client1") is True
-        assert limiter.is_allowed("client1") is True
-
-    def test_blocks_over_limit(self) -> None:
-        limiter = RateLimiter(max_requests=2, window_seconds=10)
-        limiter.is_allowed("c")
-        limiter.is_allowed("c")
-        assert limiter.is_allowed("c") is False
-
-    def test_separate_keys(self) -> None:
-        limiter = RateLimiter(max_requests=1, window_seconds=10)
-        assert limiter.is_allowed("a") is True
-        assert limiter.is_allowed("b") is True
-        assert limiter.is_allowed("a") is False
diff --git a/tests/unit/exposure/test_rest_app.py b/tests/unit/exposure/test_rest_app.py
deleted file mode 100644
index 6b023972..00000000
--- a/tests/unit/exposure/test_rest_app.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Integration tests for the REST exposure layer.
-
-Tests the full FastAPI app factory, agent router, health endpoints,
-and middleware wiring using ``httpx.AsyncClient`` with ``ASGITransport``.
-"""
-
-from __future__ import annotations
-
-import pytest
-
-pytest.importorskip("fastapi", reason="fastapi not installed")
-pytest.importorskip("httpx", reason="httpx not installed")
-
-import httpx
-
-from fireflyframework_agentic.exposure.rest.app import create_agentic_app
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _make_app(**kwargs):
-    """Create a test app with lifespan disabled."""
-    from fastapi import FastAPI
-
-    from fireflyframework_agentic.exposure.rest.health import create_health_router
-    from fireflyframework_agentic.exposure.rest.router import create_agent_router
-
-    app = FastAPI(title="test")
-    app.include_router(create_health_router())
-    app.include_router(create_agent_router())
-    return app
-
-
-@pytest.fixture()
-def app():
-    """Provide a minimal test application."""
-    return _make_app()
-
-
-@pytest.fixture()
-async def client(app):
-    """Provide an async httpx client bound to the test app."""
-    transport = httpx.ASGITransport(app=app)
-    async with httpx.AsyncClient(transport=transport, base_url="http://test") as c:
-        yield c
-
-
-# ---------------------------------------------------------------------------
-# Health endpoints
-# ---------------------------------------------------------------------------
-
-
-class TestHealthEndpoints:
-    async def test_health(self, client: httpx.AsyncClient):
-        resp = await client.get("/health")
-        assert resp.status_code == 200
-        body = resp.json()
-        assert body["status"] == "ok"
-        assert "agents" in body
-
-    async def test_readiness(self, client: httpx.AsyncClient):
-        resp = await client.get("/health/ready")
-        assert resp.status_code == 200
-        assert resp.json()["status"] == "ready"
-
-    async def test_liveness(self, client: httpx.AsyncClient):
-        resp = await client.get("/health/live")
-        assert resp.status_code == 200
-        assert resp.json()["status"] == "alive"
-
-
-# ---------------------------------------------------------------------------
-# Agent router
-# ---------------------------------------------------------------------------
-
-
-class TestAgentRouter:
-    async def test_list_agents(self, client: httpx.AsyncClient):
-        resp = await client.get("/agents/")
-        assert resp.status_code == 200
-        assert isinstance(resp.json(), list)
-
-    async def test_run_missing_agent_returns_404(self, client: httpx.AsyncClient):
-        resp = await client.post(
-            "/agents/nonexistent/run",
-            json={"prompt": "hello"},
-        )
-        assert resp.status_code == 404
-
-    async def test_stream_missing_agent_returns_404(self, client: httpx.AsyncClient):
-        resp = await client.post(
-            "/agents/nonexistent/stream",
-            json={"prompt": "hello"},
-        )
-        assert resp.status_code == 404
-
-
-# ---------------------------------------------------------------------------
-# Conversation endpoints
-# ---------------------------------------------------------------------------
-
-
-class TestConversationEndpoints:
-    async def test_create_conversation(self, client: httpx.AsyncClient):
-        resp = await client.post("/agents/conversations")
-        assert resp.status_code == 200
-        body = resp.json()
-        assert "conversation_id" in body
-
-    async def test_get_conversation(self, client: httpx.AsyncClient):
-        # First create one
-        create_resp = await client.post("/agents/conversations")
-        cid = create_resp.json()["conversation_id"]
-
-        resp = await client.get(f"/agents/conversations/{cid}")
-        assert resp.status_code == 200
-        body = resp.json()
-        assert body["conversation_id"] == cid
-        assert body["message_count"] == 0
-
-    async def test_delete_conversation(self, client: httpx.AsyncClient):
-        create_resp = await client.post("/agents/conversations")
-        cid = create_resp.json()["conversation_id"]
-
-        resp = await client.delete(f"/agents/conversations/{cid}")
-        assert resp.status_code == 200
-        assert resp.json()["status"] == "cleared"
-
-
-# ---------------------------------------------------------------------------
-# App factory
-# ---------------------------------------------------------------------------
-
-
-class TestAppFactory:
-    def test_create_agentic_app_returns_fastapi(self):
-        """Verify the factory produces a FastAPI instance with expected routes."""
-        from fastapi import FastAPI
-
-        app = create_agentic_app(cors=False, request_id=False)
-        assert isinstance(app, FastAPI)
-
-        # Should have health and agent routes
-        paths = {r.path for r in app.routes}
-        assert "/health" in paths
-        assert "/agents/" in paths
-
-    def test_create_agentic_app_rate_limit(self):
-        """Verify rate-limit middleware wiring doesn't crash."""
-        app = create_agentic_app(rate_limit=True, cors=False, request_id=False)
-        assert app is not None
-
-    def test_create_agentic_app_rate_limit_custom(self):
-        """Verify custom rate-limit config dict is accepted."""
-        app = create_agentic_app(
-            rate_limit={"max_requests": 10, "window_seconds": 30},
-            cors=False,
-            request_id=False,
-        )
-        assert app is not None
diff --git a/tests/unit/exposure/test_rest_utils.py b/tests/unit/exposure/test_rest_utils.py
deleted file mode 100644
index 29ddd0d5..00000000
--- a/tests/unit/exposure/test_rest_utils.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""Tests for REST exposure utilities — auth middleware and WebSocket router."""
-
-from __future__ import annotations
-
-
-class TestAuthMiddleware:
-    def test_add_auth_middleware_callable(self):
-        """Verify add_auth_middleware is a callable that accepts expected args."""
-        import inspect
-
-        from fireflyframework_agentic.exposure.rest.middleware import add_auth_middleware
-
-        sig = inspect.signature(add_auth_middleware)
-        params = list(sig.parameters.keys())
-        assert "app" in params
-        assert "api_keys" in params or "bearer_tokens" in params
-
-
-class TestWebSocketRouter:
-    def test_create_websocket_router_callable(self):
-        """Verify the factory function exists and is callable."""
-        from fireflyframework_agentic.exposure.rest.websocket import create_websocket_router
-
-        assert callable(create_websocket_router)
diff --git a/tests/unit/exposure/test_schemas.py b/tests/unit/exposure/test_schemas.py
deleted file mode 100644
index 6696a6f4..00000000
--- a/tests/unit/exposure/test_schemas.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for REST exposure schemas."""
-
-from __future__ import annotations
-
-from fireflyframework_agentic.exposure.rest.schemas import (
-    AgentRequest,
-    AgentResponse,
-    HealthResponse,
-)
-
-
-class TestSchemas:
-    def test_agent_request(self):
-        req = AgentRequest(prompt="hello")
-        assert req.prompt == "hello"
-
-    def test_agent_response(self):
-        resp = AgentResponse(agent_name="test", output="world")
-        assert resp.agent_name == "test"
-        assert resp.success is True
-
-    def test_agent_response_failure(self):
-        resp = AgentResponse(agent_name="test", output=None, success=False, error="boom")
-        assert not resp.success
-        assert resp.error == "boom"
-
-    def test_health_response(self):
-        h = HealthResponse(status="healthy")
-        assert h.status == "healthy"
diff --git a/tests/unit/observability/test_exporters.py b/tests/unit/observability/test_exporters.py
deleted file mode 100644
index c181095d..00000000
--- a/tests/unit/observability/test_exporters.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Tests for :func:`fireflyframework_agentic.observability.configure_exporters`."""
-
-from __future__ import annotations
-
-import logging
-
-import pytest
-
-from fireflyframework_agentic.observability import exporters as exporters_mod
-from fireflyframework_agentic.observability.exporters import (
-    ProviderBundle,
-    configure_exporters,
-)
-
-
-@pytest.fixture(autouse=True)
-def _reset_configured_signature():
-    """Each test starts with no prior configuration."""
-    exporters_mod._state.signature = None
-    if exporters_mod._state.handler is not None:
-        logging.getLogger("fireflyframework_agentic").removeHandler(exporters_mod._state.handler)
-    exporters_mod._state.handler = None
-    yield
-
-
-def test_returns_provider_bundle_with_three_providers():
-    bundle = configure_exporters(service_name="test")
-    assert isinstance(bundle, ProviderBundle)
-    assert bundle.tracer is not None
-    assert bundle.meter is not None
-    assert bundle.log is not None
-
-
-def test_console_exporters_attach_without_errors():
-    bundle = configure_exporters(service_name="test", console=True)
-    # The TracerProvider has at least one span processor when console=True.
-    # MeterProvider holds the reader internally; we can't introspect the
-    # list portably, but constructing with console=True must not raise.
-    assert bundle.tracer is not None
-
-
-def test_no_kwargs_still_builds_providers_but_attaches_no_exporters():
-    # Useful for tests / when the caller wants a noop telemetry pipeline.
-    bundle = configure_exporters(service_name="test-empty")
-    assert bundle.tracer is not None
-
-
-def test_logging_handler_attached_to_firefly_logger():
-    configure_exporters(service_name="test", console=True)
-    firefly_logger = logging.getLogger("fireflyframework_agentic")
-    # Our handler from observability.exporters must be present.
-    assert exporters_mod._state.handler is not None
-    assert exporters_mod._state.handler in firefly_logger.handlers
-
-
-def test_idempotent_repeat_call_is_no_op():
-    first = configure_exporters(service_name="test", console=True)
-    prior_tracer = first.tracer
-    prior_meter = first.meter
-    prior_log = first.log
-    second = configure_exporters(service_name="test", console=True)
-    # The module guard should prevent re-registration; we assert idempotency
-    # via the guard variable, not provider identity, because the no-op branch
-    # returns whatever global providers are currently registered.
-    assert exporters_mod._state.signature is not None
-    assert second.tracer is not None  # always returns something usable
-    assert prior_tracer is not None
-    assert prior_meter is not None
-    assert prior_log is not None
-
-
-def test_idempotent_repeat_call_does_not_double_attach_logging_handler():
-    configure_exporters(service_name="test", console=True)
-    firefly_logger = logging.getLogger("fireflyframework_agentic")
-    handlers_after_first = list(firefly_logger.handlers)
-    configure_exporters(service_name="test", console=True)
-    handlers_after_second = list(firefly_logger.handlers)
-    assert len(handlers_after_first) == len(handlers_after_second)
-
-
-def test_changing_signature_replaces_logging_handler():
-    configure_exporters(service_name="svc-a", console=True)
-    firefly_logger = logging.getLogger("fireflyframework_agentic")
-    first_handler = exporters_mod._state.handler
-    configure_exporters(service_name="svc-b", console=True)
-    second_handler = exporters_mod._state.handler
-    assert second_handler is not first_handler
-    assert first_handler not in firefly_logger.handlers
-    assert second_handler in firefly_logger.handlers
-
-
-def test_otlp_missing_dependency_warns_does_not_raise(caplog):
-    # In the test environment opentelemetry-exporter-otlp-proto-grpc may
-    # not be installed; configuring with otlp must degrade gracefully.
-    with caplog.at_level(logging.WARNING):
-        bundle = configure_exporters(
-            service_name="test",
-            otlp_endpoint="http://localhost:4317",
-        )
-    assert bundle is not None  # graceful degradation, not an exception
diff --git a/tests/unit/observability/test_sinks.py b/tests/unit/observability/test_sinks.py
index bcf48656..0a9616a7 100644
--- a/tests/unit/observability/test_sinks.py
+++ b/tests/unit/observability/test_sinks.py
@@ -6,7 +6,7 @@
 import json
 import logging
 from pathlib import Path
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 import pytest
 
@@ -16,7 +16,6 @@
     JSONLFileSink,
     LoggingSink,
     OTelMetricsSink,
-    WebhookSink,
     _emit_safely,
 )
 from fireflyframework_agentic.observability.usage import UsageRecord
@@ -123,47 +122,3 @@ def test_jsonl_file_sink_rotation(tmp_path: Path) -> None:
     sink.close()
     rotated = list(tmp_path.glob("cost.jsonl*"))
     assert len(rotated) > 1
-
-
-def test_webhook_sink_batches_and_flushes() -> None:
-    posts: list[list[dict]] = []
-
-    def fake_post(url: str, json: list[dict], headers: dict, timeout: float) -> MagicMock:
-        posts.append(json)
-        m = MagicMock()
-        m.status_code = 200
-        return m
-
-    sink = WebhookSink("https://example.test/cost", batch_size=3, flush_interval_s=10.0, _post=fake_post)
-    for i in range(5):
-        sink.emit(UsageRecord(agent=f"a{i}", cost_usd=0.01))
-    sink.close()  # forces drain
-    assert sum(len(b) for b in posts) == 5
-
-
-def test_webhook_sink_retries_5xx_then_succeeds() -> None:
-    attempts = {"n": 0}
-
-    def fake_post(url: str, json: list[dict], headers: dict, timeout: float) -> MagicMock:
-        attempts["n"] += 1
-        m = MagicMock()
-        m.status_code = 500 if attempts["n"] < 2 else 200
-        return m
-
-    sink = WebhookSink("https://example.test/cost", batch_size=1, flush_interval_s=10.0, max_retries=3, _post=fake_post)
-    sink.emit(UsageRecord(agent="a", cost_usd=0.01))
-    sink.close()
-    assert attempts["n"] >= 2
-
-
-def test_webhook_sink_drops_after_max_retries(caplog: pytest.LogCaptureFixture) -> None:
-    def always_fail(url: str, json: list[dict], headers: dict, timeout: float) -> MagicMock:
-        m = MagicMock()
-        m.status_code = 500
-        return m
-
-    sink = WebhookSink("https://x", batch_size=1, flush_interval_s=10.0, max_retries=2, _post=always_fail)
-    with caplog.at_level(logging.WARNING):
-        sink.emit(UsageRecord(agent="a", cost_usd=0.01))
-        sink.close()
-    assert any("drop" in r.message.lower() or "fail" in r.message.lower() for r in caplog.records)
diff --git a/tests/unit/observability/test_trace_propagation.py b/tests/unit/observability/test_trace_propagation.py
deleted file mode 100644
index f40e5f57..00000000
--- a/tests/unit/observability/test_trace_propagation.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# Copyright 2026 Firefly Software Foundation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Unit tests for W3C Trace Context propagation."""
-
-from __future__ import annotations
-
-import pytest
-from opentelemetry import trace
-from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.trace import SpanContext, TraceFlags
-
-from fireflyframework_agentic.observability.tracer import (
-    extract_trace_context,
-    get_trace_context,
-    inject_trace_context,
-    set_trace_context,
-    trace_context_scope,
-)
-
-
-@pytest.fixture(scope="module", autouse=True)
-def setup_tracing():
-    """Set up OpenTelemetry tracer provider for tests."""
-    provider = TracerProvider()
-    trace.set_tracer_provider(provider)
-    yield
-    # Reset after tests
-    trace._TRACER_PROVIDER = None
-
-
-class TestTraceContextInjection:
-    """Test suite for trace context injection."""
-
-    def test_inject_with_active_span(self):
-        """Test that inject adds traceparent header when span is active."""
-        tracer = trace.get_tracer(__name__)
-
-        headers = {}
-        with tracer.start_as_current_span("test-span"):
-            inject_trace_context(headers)
-
-        assert "traceparent" in headers
-        # Validate format: 00-{trace_id}-{span_id}-{flags}
-        parts = headers["traceparent"].split("-")
-        assert len(parts) == 4
-        assert parts[0] == "00"  # version
-        assert len(parts[1]) == 32  # trace_id (128-bit hex)
-        assert len(parts[2]) == 16  # span_id (64-bit hex)
-        assert len(parts[3]) == 2  # flags (8-bit hex)
-
-    def test_inject_without_active_span(self):
-        """Test that inject does nothing when no span is active."""
-        headers = {}
-        inject_trace_context(headers)
-
-        assert "traceparent" not in headers
-
-    def test_inject_preserves_existing_headers(self):
-        """Test that inject doesn't overwrite other headers."""
-        tracer = trace.get_tracer(__name__)
-
-        headers = {"x-custom-header": "value"}
-        with tracer.start_as_current_span("test-span"):
-            inject_trace_context(headers)
-
-        assert "x-custom-header" in headers
-        assert headers["x-custom-header"] == "value"
-        assert "traceparent" in headers
-
-
-class TestTraceContextExtraction:
-    """Test suite for trace context extraction."""
-
-    def test_extract_valid_traceparent(self):
-        """Test extraction of valid W3C traceparent header."""
-        headers = {"traceparent": "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01"}
-
-        context = extract_trace_context(headers)
-
-        assert context is not None
-        assert context.trace_id == 0x0AF7651916CD43DD8448EB211C80319C
-        assert context.span_id == 0xB7AD6B7169203331
-        assert context.trace_flags == TraceFlags.SAMPLED
-        assert context.is_remote
-
-    def test_extract_case_insensitive(self):
-        """Test that header names are case-insensitive."""
-        headers = {"TraceParent": "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01"}
-
-        context = extract_trace_context(headers)
-
-        assert context is not None
-        assert context.trace_id == 0x0AF7651916CD43DD8448EB211C80319C
-
-    def test_extract_missing_header(self):
-        """Test extraction returns None when traceparent is missing."""
-        headers = {}
-
-        context = extract_trace_context(headers)
-
-        assert context is None
-
-    def test_extract_invalid_format(self):
-        """Test extraction returns None for malformed traceparent."""
-        invalid_headers = [
-            {"traceparent": "invalid"},
-            {"traceparent": "00-abc"},  # Too few parts
-            {"traceparent": "01-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01"},  # Unsupported version
-            {"traceparent": "00-xxx-yyy-01"},  # Invalid hex
-        ]
-
-        for headers in invalid_headers:
-            context = extract_trace_context(headers)
-            assert context is None
-
-    def test_extract_with_tracestate(self):
-        """Test extraction of traceparent with tracestate."""
-        headers = {
-            "traceparent": "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01",
-            "tracestate": "vendor1=value1,vendor2=value2",
-        }
-
-        context = extract_trace_context(headers)
-
-        assert context is not None
-        assert context.trace_state is not None
-        # Note: TraceState parsing is handled by OpenTelemetry
-
-
-class TestTraceContextScope:
-    """Test suite for trace context scope management."""
-
-    def test_context_scope_sets_and_resets(self):
-        """Test that context scope properly sets and resets context."""
-        # Create a mock span context
-        context = SpanContext(
-            trace_id=0x0AF7651916CD43DD8448EB211C80319C,
-            span_id=0xB7AD6B7169203331,
-            is_remote=True,
-            trace_flags=TraceFlags.SAMPLED,
-        )
-
-        # Initially no context
-        assert get_trace_context() is None
-
-        # Inside scope, context is set
-        with trace_context_scope(context):
-            assert get_trace_context() == context
-
-        # After scope, context is reset
-        assert get_trace_context() is None
-
-    def test_nested_context_scopes(self):
-        """Test that nested scopes work correctly."""
-        context1 = SpanContext(
-            trace_id=1,
-            span_id=1,
-            is_remote=True,
-            trace_flags=TraceFlags.DEFAULT,
-        )
-        context2 = SpanContext(
-            trace_id=2,
-            span_id=2,
-            is_remote=True,
-            trace_flags=TraceFlags.DEFAULT,
-        )
-
-        with trace_context_scope(context1):
-            assert get_trace_context() == context1
-
-            with trace_context_scope(context2):
-                assert get_trace_context() == context2
-
-            # Outer context restored
-            assert get_trace_context() == context1
-
-        # All contexts cleared
-        assert get_trace_context() is None
-
-    def test_context_scope_with_none(self):
-        """Test that scope can be used with None context."""
-        with trace_context_scope(None):
-            assert get_trace_context() is None
-
-
-class TestTraceContextAccessors:
-    """Test suite for context variable accessors."""
-
-    def test_get_and_set_context(self):
-        """Test getting and setting trace context."""
-        context = SpanContext(
-            trace_id=0x0AF7651916CD43DD8448EB211C80319C,
-            span_id=0xB7AD6B7169203331,
-            is_remote=True,
-            trace_flags=TraceFlags.SAMPLED,
-        )
-
-        set_trace_context(context)
-        assert get_trace_context() == context
-
-        set_trace_context(None)
-        assert get_trace_context() is None
-
-
-class TestRoundTripPropagation:
-    """Test suite for full inject -> extract round-trip."""
-
-    def test_round_trip_preserves_context(self):
-        """Test that inject followed by extract preserves trace information."""
-        tracer = trace.get_tracer(__name__)
-
-        # Start a span and inject its context
-        with tracer.start_as_current_span("test-span") as span:
-            original_context = span.get_span_context()
-
-            headers = {}
-            inject_trace_context(headers)
-
-            # Extract the context from headers
-            extracted_context = extract_trace_context(headers)
-
-            assert extracted_context is not None
-            assert extracted_context.trace_id == original_context.trace_id
-            assert extracted_context.span_id == original_context.span_id
-            assert extracted_context.trace_flags == original_context.trace_flags
-
-    def test_multiple_services_chain(self):
-        """Test trace context propagation through multiple service hops."""
-        tracer = trace.get_tracer(__name__)
-
-        # Service A starts a trace
-        with tracer.start_as_current_span("service-a") as span_a:
-            context_a = span_a.get_span_context()
-
-            # Service A sends request to Service B
-            headers_to_b = {}
-            inject_trace_context(headers_to_b)
-
-            # Service B receives request
-            context_b = extract_trace_context(headers_to_b)
-            assert context_b is not None
-
-            # Service B continues the trace
-            with trace_context_scope(context_b), tracer.start_as_current_span("service-b") as span_b:
-                # Service B sends request to Service C
-                headers_to_c = {}
-                inject_trace_context(headers_to_c)
-
-                # Service C receives request
-                context_c = extract_trace_context(headers_to_c)
-                assert context_c is not None
-
-                # All services share the same trace ID
-                assert context_a.trace_id == context_b.trace_id == context_c.trace_id
-                # context_b has context_a's span as parent, context_c has context_b's span as parent
-                # (extracted contexts contain the parent span ID)
-                assert context_b.span_id == context_a.span_id  # B's parent is A
-                # The actual span created in B will have a different ID
-                assert span_b.get_span_context().span_id != context_a.span_id
diff --git a/tests/unit/vectorstores/test_scoped.py b/tests/unit/vectorstores/test_scoped.py
index 2bfa94b0..3cea7031 100644
--- a/tests/unit/vectorstores/test_scoped.py
+++ b/tests/unit/vectorstores/test_scoped.py
@@ -123,12 +123,8 @@ async def test_upsert_does_not_mutate_caller_documents(self) -> None:
     async def test_search_is_scope_isolated(self) -> None:
         inner = _FakeStore()
         store = TenantScopedVectorStore(inner)
-        await store.upsert(
-            [VectorDocument(id="1", text="a", embedding=[1.0])], tenant_id="acme", workspace_id="main"
-        )
-        await store.upsert(
-            [VectorDocument(id="2", text="b", embedding=[1.0])], tenant_id="other", workspace_id="main"
-        )
+        await store.upsert([VectorDocument(id="1", text="a", embedding=[1.0])], tenant_id="acme", workspace_id="main")
+        await store.upsert([VectorDocument(id="2", text="b", embedding=[1.0])], tenant_id="other", workspace_id="main")
         mine = await store.search([1.0], tenant_id="acme", workspace_id="main")
         assert [r.document.id for r in mine] == ["1"]
         foreign = await store.search([1.0], tenant_id="nobody", workspace_id="main")
@@ -137,9 +133,7 @@ async def test_search_is_scope_isolated(self) -> None:
     async def test_delete_is_scoped(self) -> None:
         inner = _FakeStore()
         store = TenantScopedVectorStore(inner)
-        await store.upsert(
-            [VectorDocument(id="1", text="a", embedding=[1.0])], tenant_id="acme", workspace_id="main"
-        )
+        await store.upsert([VectorDocument(id="1", text="a", embedding=[1.0])], tenant_id="acme", workspace_id="main")
         await store.delete(["1"], tenant_id="acme", workspace_id="main")
         assert await store.search([1.0], tenant_id="acme", workspace_id="main") == []
 
diff --git a/uv.lock b/uv.lock
index 37fceef5..7e3b501c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -22,19 +22,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d8/74/913c9b8fc566c6da650aecbddf25a5d8186b54138df265eb9eb546f56141/ag_ui_protocol-0.1.18-py3-none-any.whl", hash = "sha256:d151c0f0a34160647f1571163f7185746f4326b15a56d1560de5082a7a0e7a12", size = 12607, upload-time = "2026-04-21T20:45:00.097Z" },
 ]
 
-[[package]]
-name = "aio-pika"
-version = "9.6.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "aiormq" },
-    { name = "yarl" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/96/63/56354526f2e6e915c93bee6e4dedb35888fe82d6bc1a19f35f5a77e795ff/aio_pika-9.6.2.tar.gz", hash = "sha256:c49e9246080dc8ffa1bb0e4aca407bf3d8ad78c3ee3a93df88b68fe65d7a49b9", size = 70851, upload-time = "2026-03-22T19:03:20.878Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/25/05/256fa313f48bed075056d13593b92ce804be05d75f4f312be24edb82860a/aio_pika-9.6.2-py3-none-any.whl", hash = "sha256:2a5478af920d169795071c9c09c7542cd8cdece60438cf7804533dcbcce93b7f", size = 56269, upload-time = "2026-03-22T19:03:19.558Z" },
-]
-
 [[package]]
 name = "aiofile"
 version = "3.11.1"
@@ -124,31 +111,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/62/29/2f8418269e46454a26171bfdd6a055d74febf32234e474930f2f60a17145/aiohttp-3.13.5-cp314-cp314t-win_amd64.whl", hash = "sha256:18a2f6c1182c51baa1d28d68fea51513cb2a76612f038853c0ad3c145423d3d9", size = 505441, upload-time = "2026-03-31T22:00:12.791Z" },
 ]
 
-[[package]]
-name = "aiokafka"
-version = "0.14.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "async-timeout" },
-    { name = "packaging" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/89/5f/dfc1180fd22d1acdc91949ec36e97199c43742dacb057cb8efed3679ed04/aiokafka-0.14.0.tar.gz", hash = "sha256:8ffdc945798ba4d3d132b705d4244d0a1f493925efb57c637a2ca88ee82794e1", size = 601374, upload-time = "2026-04-29T10:43:03.574Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/30/b0/c9384541b2e4cc52a16402fc53fb9d44af0d78d37954cf8c7271c376ad47/aiokafka-0.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:db16e43fac4c1c5006131046c1bf370c580d6ac4495a10ac7778245710943179", size = 345859, upload-time = "2026-04-29T10:42:45.449Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/d1/fc266d9f4ffba4f197356c6ffdfbb0fe32e7cb874e240f299935d058ac06/aiokafka-0.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:32a8e91d88cf3ccf0778927715610d6579888c5f4748db4c2022cda25d628a48", size = 348284, upload-time = "2026-04-29T10:42:47.104Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/8e/0c4c270786dac79f3fca74c6166c3a25b61b0d26132be0d69f0d7f206f0a/aiokafka-0.14.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aad4a575a506e7784e25e430f27026fe2f4378560b21b7f4e8c9a54f0d06eaee", size = 1117867, upload-time = "2026-04-29T10:42:48.394Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/7f/3b89fbd0a3be9edfd5b51e20bb5cd695c851219b63c501c051cf84367fa9/aiokafka-0.14.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75e4a003502c9c3b5c705fa7c00d634ba146bf38fa5d525b80bb6ff6e3e779fe", size = 1108860, upload-time = "2026-04-29T10:42:50.249Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/59/849aba75cff93277bf6bf8b630de79e902949ff7ec48e4b12a64e6e32cae/aiokafka-0.14.0-cp313-cp313-win32.whl", hash = "sha256:a128e213cbc2bce0ea3db65a68920e52cebeeb8209bf001ac7aa022a8bd54d7d", size = 310889, upload-time = "2026-04-29T10:42:52.038Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/e5/52eab8f8515d23da7b5d90e2c5ba10eab9494a0314f749e3f73e003f4a50/aiokafka-0.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:d6fa16bef3544be87bd1a7a8317b9d85e3da59f3202326d9ff22735ed052746e", size = 329470, upload-time = "2026-04-29T10:42:53.536Z" },
-    { url = "https://files.pythonhosted.org/packages/50/9d/984803315fe2b883ea6e08b1d9c8a752bd5c16e966d8714bacc67c72c417/aiokafka-0.14.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:5d70615d1530ad19d0c4da8d87abaec0a12b9fdaabffdcd4e400efa0c50ef80c", size = 346672, upload-time = "2026-04-29T10:42:55.267Z" },
-    { url = "https://files.pythonhosted.org/packages/49/df/da314966b7f3c3117bd78b082563cb03dbe3007848cb8f4b0932faf390a0/aiokafka-0.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e2392360c370b1ba6564c57d2889e154ecdb43157a8f7b7d7afe5e3c02fcc1a", size = 349594, upload-time = "2026-04-29T10:42:56.565Z" },
-    { url = "https://files.pythonhosted.org/packages/57/7a/160516944ea0e0f68ea78e38f944c52f5248c7c7df26cba22a40b9f25709/aiokafka-0.14.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:201e38ecc595f9f65a945f1ef9085157ddf28f25cd2e482fd9efa1fcf4638213", size = 1114112, upload-time = "2026-04-29T10:42:57.869Z" },
-    { url = "https://files.pythonhosted.org/packages/68/c4/9841118a2157e913e8ebfbc0a2b58f7b60f1f7202040c3e1df8925ed1184/aiokafka-0.14.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1cd651e1f56571baae306fdd0b5509047ab9625797a24cd75902e139c5a20318", size = 1098571, upload-time = "2026-04-29T10:42:59.356Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/a1/0af8a37849a4108ae227f46c4c62f6beab31863cf66ba318fb73b0be5b26/aiokafka-0.14.0-cp314-cp314-win32.whl", hash = "sha256:128127eb96dab98150b636bb5f480c80e15f02f82a118eec206a521c8cf7cf7c", size = 314107, upload-time = "2026-04-29T10:43:01.111Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/18/fb46c65f758900c71d0f1c73b7802720f99cabcb1f4a11676573f9bc1b8f/aiokafka-0.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:aa385039aa9b235359319bbdcf48c9c86a75d81c9c547d645056d00361238903", size = 333320, upload-time = "2026-04-29T10:43:02.424Z" },
-]
-
 [[package]]
 name = "aiolimiter"
 version = "1.2.1"
@@ -158,19 +120,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f3/ba/df6e8e1045aebc4778d19b8a3a9bc1808adb1619ba94ca354d9ba17d86c3/aiolimiter-1.2.1-py3-none-any.whl", hash = "sha256:d3f249e9059a20badcb56b61601a83556133655c11d1eb3dd3e04ff069e5f3c7", size = 6711, upload-time = "2024-12-08T15:31:49.874Z" },
 ]
 
-[[package]]
-name = "aiormq"
-version = "6.9.4"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pamqp" },
-    { name = "yarl" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/6c/0e/db90154d52d399108903fe603e5110a533c42065180265dd003788264080/aiormq-6.9.4.tar.gz", hash = "sha256:0e7c01b662804e1cc7ace9a17794e8c1192a27fc2afa96162362a6e61ae8e8ef", size = 49232, upload-time = "2026-03-23T09:18:19.493Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/6c/48/1ce3773f392f02ceda37aee168fade9d725483a9592c202d06044cd093ff/aiormq-6.9.4-py3-none-any.whl", hash = "sha256:726a8586695e863fba68cf88842065ab12348c9438dcebdfc9d0bddaf6083277", size = 32166, upload-time = "2026-03-23T09:18:17.523Z" },
-]
-
 [[package]]
 name = "aiosignal"
 version = "1.4.0"
@@ -241,15 +190,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/74/f5/9373290775639cb67a2fce7f629a1c240dce9f12fe927bc32b2736e16dfc/argcomplete-3.6.3-py3-none-any.whl", hash = "sha256:f5007b3a600ccac5d25bbce33089211dfd49eab4a7718da3f10e3082525a92ce", size = 43846, upload-time = "2025-10-20T03:33:33.021Z" },
 ]
 
-[[package]]
-name = "async-timeout"
-version = "5.0.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a5/ae/136395dfbfe00dfc94da3f3e136d0b13f394cba8f4841120e34226265780/async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3", size = 9274, upload-time = "2024-11-06T16:41:39.6Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fe/ba/e2081de779ca30d473f21f5b30e0e737c438205440784c7dfc81efc2b029/async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c", size = 6233, upload-time = "2024-11-06T16:41:37.9Z" },
-]
-
 [[package]]
 name = "asyncpg"
 version = "0.31.0"
@@ -1080,22 +1020,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/53/81/87d5241036046ea17c5c8db228f4c9e04e07e53b627015d4496a99449aaf/extract_msg-0.55.0-py3-none-any.whl", hash = "sha256:baf0cdee9a8d267b70c366bc57ceb03dbfa1e7ab2dca6824169a7fe623f0917c", size = 336033, upload-time = "2025-08-12T16:07:54.886Z" },
 ]
 
-[[package]]
-name = "fastapi"
-version = "0.136.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "annotated-doc" },
-    { name = "pydantic" },
-    { name = "starlette" },
-    { name = "typing-extensions" },
-    { name = "typing-inspection" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/5d/45/c130091c2dfa061bbfe3150f2a5091ef1adf149f2a8d2ae769ecaf6e99a2/fastapi-0.136.1.tar.gz", hash = "sha256:7af665ad7acfa0a3baf8983d393b6b471b9da10ede59c60045f49fbc89a0fa7f", size = 397448, upload-time = "2026-04-23T16:49:44.046Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/5a/ff/2e4eca3ade2c22fe1dea7043b8ee9dabe47753349eb1b56a202de8af6349/fastapi-0.136.1-py3-none-any.whl", hash = "sha256:a6e9d7eeada96c93a4d69cb03836b44fa34e2854accb7244a1ece36cd4781c3f", size = 117683, upload-time = "2026-04-23T16:49:42.437Z" },
-]
-
 [[package]]
 name = "fastavro"
 version = "1.12.2"
@@ -1211,7 +1135,7 @@ wheels = [
 
 [[package]]
 name = "fireflyframework-agentic"
-version = "26.5.31"
+version = "26.5.33"
 source = { editable = "." }
 dependencies = [
     { name = "genai-prices" },
@@ -1228,8 +1152,6 @@ dependencies = [
 
 [package.optional-dependencies]
 all = [
-    { name = "aio-pika" },
-    { name = "aiokafka" },
     { name = "asyncpg" },
     { name = "boto3" },
     { name = "cairosvg" },
@@ -1237,7 +1159,6 @@ all = [
     { name = "cohere" },
     { name = "cryptography" },
     { name = "extract-msg" },
-    { name = "fastapi" },
     { name = "google-generativeai" },
     { name = "httpx" },
     { name = "mistralai" },
@@ -1248,15 +1169,11 @@ all = [
     { name = "pillow-heif" },
     { name = "pinecone" },
     { name = "py7zr" },
-    { name = "pyjwt" },
     { name = "pymongo" },
     { name = "pypdf" },
     { name = "qdrant-client" },
-    { name = "redis" },
     { name = "sqlalchemy" },
     { name = "sqlite-vec" },
-    { name = "sse-starlette" },
-    { name = "uvicorn", extra = ["standard"] },
     { name = "voyageai" },
     { name = "watchfiles" },
 ]
@@ -1295,9 +1212,6 @@ embeddings = [
 google-embeddings = [
     { name = "google-generativeai" },
 ]
-kafka = [
-    { name = "aiokafka" },
-]
 mistral-embeddings = [
     { name = "mistralai" },
 ]
@@ -1315,29 +1229,12 @@ postgres = [
     { name = "asyncpg" },
     { name = "sqlalchemy" },
 ]
-queues = [
-    { name = "aio-pika" },
-    { name = "aiokafka" },
-    { name = "redis" },
-]
-rabbitmq = [
-    { name = "aio-pika" },
-]
 reasoning-eval = [
     { name = "numpy" },
     { name = "pandas" },
 ]
-redis = [
-    { name = "redis" },
-]
-rest = [
-    { name = "fastapi" },
-    { name = "sse-starlette" },
-    { name = "uvicorn", extra = ["standard"] },
-]
 security = [
     { name = "cryptography" },
-    { name = "pyjwt" },
 ]
 vectorstores-chroma = [
     { name = "chromadb" },
@@ -1363,8 +1260,6 @@ watch = [
 
 [package.metadata]
 requires-dist = [
-    { name = "aio-pika", marker = "extra == 'rabbitmq'", specifier = ">=9.5.0" },
-    { name = "aiokafka", marker = "extra == 'kafka'", specifier = ">=0.12.0" },
     { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30.0" },
     { name = "asyncpg", marker = "extra == 'vectorstores-pgvector'", specifier = ">=0.30.0" },
     { name = "boto3", marker = "extra == 'bedrock-embeddings'", specifier = ">=1.35.0" },
@@ -1373,9 +1268,7 @@ requires-dist = [
     { name = "cohere", marker = "extra == 'cohere-embeddings'", specifier = ">=5.0.0" },
     { name = "cryptography", marker = "extra == 'security'", specifier = ">=44.0.0" },
     { name = "extract-msg", marker = "extra == 'binary'", specifier = ">=0.51" },
-    { name = "fastapi", marker = "extra == 'rest'", specifier = ">=0.115.0" },
-    { name = "fireflyframework-agentic", extras = ["kafka", "rabbitmq", "redis"], marker = "extra == 'queues'" },
-    { name = "fireflyframework-agentic", extras = ["rest", "queues", "postgres", "mongodb", "security", "embeddings", "openai-embeddings", "cohere-embeddings", "google-embeddings", "mistral-embeddings", "voyage-embeddings", "azure-embeddings", "bedrock-embeddings", "ollama-embeddings", "vectorstores-chroma", "vectorstores-pinecone", "vectorstores-qdrant", "vectorstores-pgvector", "vectorstores-sqlite-vec", "watch", "binary"], marker = "extra == 'all'" },
+    { name = "fireflyframework-agentic", extras = ["postgres", "mongodb", "security", "embeddings", "openai-embeddings", "cohere-embeddings", "google-embeddings", "mistral-embeddings", "voyage-embeddings", "azure-embeddings", "bedrock-embeddings", "ollama-embeddings", "vectorstores-chroma", "vectorstores-pinecone", "vectorstores-qdrant", "vectorstores-pgvector", "vectorstores-sqlite-vec", "watch", "binary"], marker = "extra == 'all'" },
     { name = "genai-prices", specifier = ">=0.0.1" },
     { name = "google-generativeai", marker = "extra == 'google-embeddings'", specifier = ">=0.8.0" },
     { name = "httpx", specifier = ">=0.28.0" },
@@ -1401,7 +1294,6 @@ requires-dist = [
     { name = "pydantic", specifier = ">=2.10.0" },
     { name = "pydantic-ai", specifier = ">=1.99.0" },
     { name = "pydantic-settings", specifier = ">=2.7.0" },
-    { name = "pyjwt", marker = "extra == 'security'", specifier = ">=2.10.0" },
     { name = "pymongo", marker = "extra == 'mongodb'", specifier = ">=4.10.0" },
     { name = "pypdf", marker = "extra == 'binary'", specifier = ">=4.3.0" },
     { name = "pyright", marker = "extra == 'dev'", specifier = ">=1.1.0" },
@@ -1411,17 +1303,14 @@ requires-dist = [
     { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.0.0" },
     { name = "python-dotenv", specifier = ">=1.0.0" },
     { name = "qdrant-client", marker = "extra == 'vectorstores-qdrant'", specifier = ">=1.12.0" },
-    { name = "redis", marker = "extra == 'redis'", specifier = ">=5.2.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.0" },
     { name = "sqlalchemy", marker = "extra == 'postgres'", specifier = ">=2.0.0" },
     { name = "sqlite-vec", marker = "extra == 'vectorstores-sqlite-vec'", specifier = ">=0.1.6" },
-    { name = "sse-starlette", marker = "extra == 'rest'", specifier = ">=2.0.0" },
     { name = "testcontainers", marker = "extra == 'dev'", specifier = ">=4.10.0" },
-    { name = "uvicorn", extras = ["standard"], marker = "extra == 'rest'", specifier = ">=0.34.0" },
     { name = "voyageai", marker = "extra == 'voyage-embeddings'", specifier = ">=0.3.0" },
     { name = "watchfiles", marker = "extra == 'watch'", specifier = ">=0.24.0" },
 ]
-provides-extras = ["rest", "kafka", "rabbitmq", "redis", "queues", "postgres", "mongodb", "security", "embeddings", "openai-embeddings", "cohere-embeddings", "google-embeddings", "mistral-embeddings", "voyage-embeddings", "azure-embeddings", "bedrock-embeddings", "ollama-embeddings", "reasoning-eval", "vectorstores-chroma", "vectorstores-sqlite-vec", "vectorstores-pinecone", "vectorstores-qdrant", "vectorstores-pgvector", "watch", "binary", "all", "dev"]
+provides-extras = ["postgres", "mongodb", "security", "embeddings", "openai-embeddings", "cohere-embeddings", "google-embeddings", "mistral-embeddings", "voyage-embeddings", "azure-embeddings", "bedrock-embeddings", "ollama-embeddings", "reasoning-eval", "vectorstores-chroma", "vectorstores-sqlite-vec", "vectorstores-pinecone", "vectorstores-qdrant", "vectorstores-pgvector", "watch", "binary", "all", "dev"]
 
 [[package]]
 name = "flatbuffers"
@@ -3094,15 +2983,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]
 
-[[package]]
-name = "pamqp"
-version = "3.3.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/fb/62/35bbd3d3021e008606cd0a9532db7850c65741bbf69ac8a3a0d8cfeb7934/pamqp-3.3.0.tar.gz", hash = "sha256:40b8795bd4efcf2b0f8821c1de83d12ca16d5760f4507836267fd7a02b06763b", size = 30993, upload-time = "2024-01-12T20:37:25.085Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ac/8d/c1e93296e109a320e508e38118cf7d1fc2a4d1c2ec64de78565b3c445eb5/pamqp-3.3.0-py2.py3-none-any.whl", hash = "sha256:c901a684794157ae39b52cbf700db8c9aae7a470f13528b9d7b4e5f7202f8eb0", size = 33848, upload-time = "2024-01-12T20:37:21.359Z" },
-]
-
 [[package]]
 name = "pandas"
 version = "3.0.3"
@@ -4342,15 +4222,6 @@ version = "1.22"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/48/75/bfa342a2ebfc9623b701f1c6995b9906fd6dd2cedf6bce777d09e23303ac/red-black-tree-mod-1.22.tar.gz", hash = "sha256:38e3652903a2bf96379c27c2082ca0b7b905158662dd7ef0c97f4fd93a9aa908", size = 34173, upload-time = "2023-12-26T14:00:22.056Z" }
 
-[[package]]
-name = "redis"
-version = "7.4.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/7b/7f/3759b1d0d72b7c92f0d70ffd9dc962b7b7b5ee74e135f9d7d8ab06b8a318/redis-7.4.0.tar.gz", hash = "sha256:64a6ea7bf567ad43c964d2c30d82853f8df927c5c9017766c55a1d1ed95d18ad", size = 4943913, upload-time = "2026-03-24T09:14:37.53Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/74/3a/95deec7db1eb53979973ebd156f3369a72732208d1391cd2e5d127062a32/redis-7.4.0-py3-none-any.whl", hash = "sha256:a9c74a5c893a5ef8455a5adb793a31bb70feb821c86eccb62eebef5a19c429ec", size = 409772, upload-time = "2026-03-24T09:14:35.968Z" },
-]
-
 [[package]]
 name = "referencing"
 version = "0.37.0"