From a9cacf4eb71697351ee658a570636f04bbf31ad5 Mon Sep 17 00:00:00 2001
From: Declan Brady <declan.brady@scale.com>
Date: Mon, 22 Jun 2026 15:59:33 -0400
Subject: [PATCH 01/10] =?UTF-8?q?feat(harness):=20unified=20harness=20surf?=
 =?UTF-8?q?ace=20=E2=80=94=20foundation=20(span=20derivation,=20delivery?=
 =?UTF-8?q?=20adapters,=20emitter)=20(#412)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/harness-integration.yml     |  40 ++
 src/agentex/lib/core/harness/__init__.py      |  30 ++
 src/agentex/lib/core/harness/auto_send.py     | 156 ++++++
 src/agentex/lib/core/harness/emitter.py       |  80 +++
 .../lib/core/harness/span_derivation.py       | 154 ++++++
 src/agentex/lib/core/harness/tracer.py        |  88 ++++
 src/agentex/lib/core/harness/types.py         |  93 ++++
 .../lib/core/harness/yield_delivery.py        |  31 ++
 tests/lib/core/harness/__init__.py            |   0
 .../lib/core/harness/conformance/__init__.py  |   0
 tests/lib/core/harness/conformance/runner.py  |  48 ++
 .../harness/conformance/test_conformance.py   |  43 ++
 tests/lib/core/harness/test_auto_send.py      | 490 ++++++++++++++++++
 tests/lib/core/harness/test_emitter.py        | 148 ++++++
 .../lib/core/harness/test_span_derivation.py  | 286 ++++++++++
 tests/lib/core/harness/test_tracer.py         |  93 ++++
 tests/lib/core/harness/test_types.py          |  53 ++
 tests/lib/core/harness/test_yield_delivery.py |  89 ++++
 18 files changed, 1922 insertions(+)
 create mode 100644 .github/workflows/harness-integration.yml
 create mode 100644 src/agentex/lib/core/harness/__init__.py
 create mode 100644 src/agentex/lib/core/harness/auto_send.py
 create mode 100644 src/agentex/lib/core/harness/emitter.py
 create mode 100644 src/agentex/lib/core/harness/span_derivation.py
 create mode 100644 src/agentex/lib/core/harness/tracer.py
 create mode 100644 src/agentex/lib/core/harness/types.py
 create mode 100644 src/agentex/lib/core/harness/yield_delivery.py
 create mode 100644 tests/lib/core/harness/__init__.py
 create mode 100644 tests/lib/core/harness/conformance/__init__.py
 create mode 100644 tests/lib/core/harness/conformance/runner.py
 create mode 100644 tests/lib/core/harness/conformance/test_conformance.py
 create mode 100644 tests/lib/core/harness/test_auto_send.py
 create mode 100644 tests/lib/core/harness/test_emitter.py
 create mode 100644 tests/lib/core/harness/test_span_derivation.py
 create mode 100644 tests/lib/core/harness/test_tracer.py
 create mode 100644 tests/lib/core/harness/test_types.py
 create mode 100644 tests/lib/core/harness/test_yield_delivery.py

diff --git a/.github/workflows/harness-integration.yml b/.github/workflows/harness-integration.yml
new file mode 100644
index 000000000..51893f10f
--- /dev/null
+++ b/.github/workflows/harness-integration.yml
@@ -0,0 +1,40 @@
+name: Harness Integration
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    paths:
+      - "src/agentex/lib/core/harness/**"
+      - "src/agentex/lib/adk/_modules/**"
+      - ".github/workflows/harness-integration.yml"
+
+jobs:
+  conformance:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
+        with:
+          version: '0.10.2'
+
+      - name: Bootstrap
+        run: ./scripts/bootstrap
+
+      # Defer to scripts/test so the harness suite runs under the exact same
+      # invocation as the main CI test job: DEFER_PYDANTIC_BUILD=false and
+      # `uv run --isolated --all-packages --all-extras pytest`, across the
+      # min/max supported Python versions. Running `uv run pytest` directly
+      # would risk an all-extras-only dep passing locally but failing in CI.
+      - name: Conformance suite
+        run: ./scripts/test tests/lib/core/harness/ -v
+
+  # Live integration matrix (harness x {sync, async, temporal}) is added per-harness
+  # in the migration plans. Placeholder job keeps the workflow valid until then.
+  live-matrix:
+    runs-on: ubuntu-latest
+    if: false  # enabled once the first harness's test agents land
+    steps:
+      - run: echo "populated by migration PRs"  # TODO(harness-migration): enable per-harness; see migration PRs 4-8
diff --git a/src/agentex/lib/core/harness/__init__.py b/src/agentex/lib/core/harness/__init__.py
new file mode 100644
index 000000000..067751d63
--- /dev/null
+++ b/src/agentex/lib/core/harness/__init__.py
@@ -0,0 +1,30 @@
+"""Shared, harness-independent machinery for the unified harness surface.
+
+The Agentex StreamTaskMessage* stream is the single source of truth; this
+package derives spans from it and delivers it (yield or auto-send), so every
+harness tap gets streaming + tracing + turn usage uniformly.
+"""
+
+from agentex.lib.core.harness.types import (
+    OpenSpan,
+    CloseSpan,
+    TurnUsage,
+    SpanSignal,
+    TurnResult,
+    HarnessTurn,
+    StreamTaskMessage,
+)
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+
+__all__ = [
+    "UnifiedEmitter",
+    "SpanTracer",
+    "OpenSpan",
+    "CloseSpan",
+    "SpanSignal",
+    "StreamTaskMessage",
+    "TurnUsage",
+    "TurnResult",
+    "HarnessTurn",
+]
diff --git a/src/agentex/lib/core/harness/auto_send.py b/src/agentex/lib/core/harness/auto_send.py
new file mode 100644
index 000000000..2ecd6b583
--- /dev/null
+++ b/src/agentex/lib/core/harness/auto_send.py
@@ -0,0 +1,156 @@
+"""Auto-send delivery: canonical stream -> adk.streaming side effects + tracing."""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+from datetime import datetime
+
+from agentex.types.text_delta import TextDelta
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.types import TurnUsage, TurnResult, StreamTaskMessage
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.core.harness.span_derivation import SpanDeriver
+
+try:
+    from agentex.lib.utils.logging import make_logger
+
+    logger = make_logger(__name__)
+except Exception:  # ddtrace may be absent in some envs; fall back to stdlib
+    import logging
+
+    logger = logging.getLogger(__name__)
+
+
+async def auto_send(
+    events: AsyncIterator[StreamTaskMessage],
+    task_id: str,
+    tracer: SpanTracer | None = None,
+    streaming: Any = None,
+    usage: TurnUsage | None = None,
+    created_at: datetime | None = None,
+) -> TurnResult:
+    """Push the canonical stream to the task stream via adk.streaming.
+
+    Opens a streaming context per message (keyed by index), streams deltas via
+    ctx.stream_update, and closes via ctx.close() on Done. Posts tool
+    request/response full messages by opening a context with the content and
+    closing it immediately (no deltas). Derives and traces spans from the same
+    stream. Returns the last text segment's text + usage.
+
+    Index-keyed routing: each Start(index=i) opens a context stored in
+    ctx_map[i]; Delta(index=i) routes to ctx_map.get(i); Done(index=i) closes
+    and removes ctx_map[i]. Events with index is None are skipped. The finally
+    block closes all remaining open contexts.
+
+    final_text last-segment semantics: a new Start(TextContent) resets
+    final_text_parts so that multi-step turns return the LAST text segment.
+    Full(TextContent) also overwrites final_text_parts (same semantics).
+
+    AGX1-378: created_at is forwarded to every streaming_task_message_context
+    call so callers can back-date message timestamps.
+
+    Mirrors the open/close/stream_update pattern from
+    src/agentex/lib/adk/_modules/_langgraph_async.py:
+      - context opened via streaming_task_message_context(...).__aenter__()
+      - context closed via ctx.close() (not __aexit__)
+      - deltas pushed as StreamTaskMessageDelta with parent_task_message set
+        from ctx.task_message
+
+    For async + temporal agents (call from inside an activity).
+    """
+    if streaming is None:
+        from agentex.lib import adk
+
+        streaming = adk.streaming
+
+    deriver = SpanDeriver() if tracer is not None else None
+    final_text_parts: list[str] = []
+    ctx_map: dict[int, Any] = {}
+
+    async def _close_all() -> None:
+        # Guard each close independently: a failure on one context (e.g. a
+        # backend hiccup during teardown) must not abandon the remaining open
+        # contexts, otherwise their task messages would never be finalized.
+        for ctx in list(ctx_map.values()):
+            try:
+                await ctx.close()
+            except Exception as exc:
+                logger.warning("[harness.auto_send] context close failed during teardown: %s", exc)
+        ctx_map.clear()
+
+    try:
+        async for event in events:
+            if deriver is not None and tracer is not None:
+                for signal in deriver.observe(event):
+                    await tracer.handle(signal)
+
+            if isinstance(event, StreamTaskMessageStart):
+                if event.index is None:
+                    continue
+                i = event.index
+                # Reset final_text_parts when a new text segment starts
+                if isinstance(event.content, TextContent):
+                    final_text_parts = []
+                ctx = streaming.streaming_task_message_context(
+                    task_id=task_id,
+                    initial_content=event.content,
+                    created_at=created_at,
+                )
+                ctx_map[i] = await ctx.__aenter__()
+
+            elif isinstance(event, StreamTaskMessageDelta):
+                if event.index is None:
+                    continue
+                ctx = ctx_map.get(event.index)
+                if ctx is not None and event.delta is not None:
+                    # Reconstruct the delta with parent_task_message set from
+                    # the context's task_message (mirrors _langgraph_async.py
+                    # lines 72-78 and 117-127).
+                    delta_with_parent = StreamTaskMessageDelta(
+                        parent_task_message=ctx.task_message,
+                        delta=event.delta,
+                        type="delta",
+                        index=event.index,
+                    )
+                    await ctx.stream_update(delta_with_parent)
+                    if isinstance(event.delta, TextDelta) and event.delta.text_delta:
+                        final_text_parts.append(event.delta.text_delta)
+
+            elif isinstance(event, StreamTaskMessageDone):
+                if event.index is None:
+                    continue
+                ctx = ctx_map.pop(event.index, None)
+                if ctx is not None:
+                    await ctx.close()
+
+            elif isinstance(event, StreamTaskMessageFull):
+                # Full messages: post the full message by opening a context
+                # with the content and closing it immediately (no deltas;
+                # StreamingTaskMessageContext.close() persists initial_content
+                # when the accumulator is empty). Use async with so the context
+                # is closed even if close() raises (__aexit__ delegates to
+                # close()).
+                # Full(TextContent) also resets final_text_parts for
+                # last-segment semantics.
+                if isinstance(event.content, TextContent):
+                    final_text_parts = [event.content.content]
+                async with streaming.streaming_task_message_context(
+                    task_id=task_id,
+                    initial_content=event.content,
+                    created_at=created_at,
+                ):
+                    pass
+
+    finally:
+        await _close_all()
+        if deriver is not None and tracer is not None:
+            for signal in deriver.flush():
+                await tracer.handle(signal)
+
+    return TurnResult(final_text="".join(final_text_parts), usage=usage or TurnUsage())
diff --git a/src/agentex/lib/core/harness/emitter.py b/src/agentex/lib/core/harness/emitter.py
new file mode 100644
index 000000000..5b56793bf
--- /dev/null
+++ b/src/agentex/lib/core/harness/emitter.py
@@ -0,0 +1,80 @@
+"""UnifiedEmitter: the single facade agent authors use for either delivery mode."""
+
+from __future__ import annotations
+
+from typing import AsyncGenerator
+from datetime import datetime
+
+from agentex.lib.core.harness.types import TurnResult, HarnessTurn, StreamTaskMessage
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.auto_send import auto_send
+from agentex.lib.core.harness.yield_delivery import yield_events
+
+
+class UnifiedEmitter:
+    """Ties trace context + chosen delivery together.
+
+    Tracing modes (the `tracer` arg):
+    - tracer=None (default): auto-construct a SpanTracer if `trace_id` is present.
+    - tracer=False: disable tracing entirely, regardless of `trace_id`.
+    - tracer=<SpanTracer>: use the supplied instance.
+
+    `tracing` and `streaming` are injection escape-hatches for tests/advanced
+    use; leave them None in production so the real adk modules are used.
+    """
+
+    tracer: SpanTracer | None
+
+    def __init__(
+        self,
+        task_id: str,
+        trace_id: str | None,
+        parent_span_id: str | None,
+        tracer: SpanTracer | bool | None = None,
+        tracing: object | None = None,
+        streaming: object | None = None,
+    ):
+        self.task_id = task_id
+        self.trace_id = trace_id
+        self.parent_span_id = parent_span_id
+        self._streaming = streaming
+        if tracer is False:
+            self.tracer = None
+        elif isinstance(tracer, SpanTracer):
+            self.tracer = tracer
+        elif trace_id:
+            self.tracer = SpanTracer(
+                trace_id=trace_id,
+                parent_span_id=parent_span_id,
+                task_id=task_id,
+                tracing=tracing,
+            )
+        else:
+            self.tracer = None
+
+    async def yield_turn(self, turn: HarnessTurn) -> AsyncGenerator[StreamTaskMessage, None]:
+        """Sync HTTP ACP delivery: forward events, trace as side effect."""
+        async for event in yield_events(turn.events, tracer=self.tracer):
+            yield event
+
+    async def auto_send_turn(self, turn: HarnessTurn, created_at: datetime | None = None) -> TurnResult:
+        """Async/temporal delivery: push to the task stream, return TurnResult.
+
+        Pass `created_at` (e.g. `workflow.now()` under Temporal) to stamp the
+        turn's messages with a deterministic timestamp; it is forwarded to the
+        streaming contexts. Default None preserves server-side timestamps.
+        """
+        # `turn.usage()` is only valid AFTER `turn.events` is exhausted (the
+        # HarnessTurn single-pass contract: real turns populate usage while the
+        # stream is consumed). So drive delivery first, then read usage — do NOT
+        # pass `usage=turn.usage()` eagerly here (that would capture the empty
+        # default before the stream runs).
+        result = await auto_send(
+            turn.events,
+            task_id=self.task_id,
+            tracer=self.tracer,
+            streaming=self._streaming,
+            created_at=created_at,
+        )
+        result.usage = turn.usage()
+        return result
diff --git a/src/agentex/lib/core/harness/span_derivation.py b/src/agentex/lib/core/harness/span_derivation.py
new file mode 100644
index 000000000..cecb24bcc
--- /dev/null
+++ b/src/agentex/lib/core/harness/span_derivation.py
@@ -0,0 +1,154 @@
+"""Pure reducer: canonical StreamTaskMessage* stream -> span open/close signals.
+
+Has no dependency on adk; unit-testable in isolation. Delivery adapters feed it
+every event and act on the returned signals.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+
+from agentex.lib.core.harness.types import OpenSpan, CloseSpan, SpanSignal, StreamTaskMessage
+from agentex.types.tool_request_delta import ToolRequestDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+
+
+@dataclass
+class _ToolReqMeta:
+    tool_call_id: str
+    name: str
+    arguments: dict[str, object]
+    args_buf: str = ""  # accumulated streamed argument fragments
+
+
+class SpanDeriver:
+    """Stateful reducer over the canonical stream.
+
+    Tool span: open on Done of a ToolRequestContent index; close on matching
+    ToolResponseContent by tool_call_id. Reasoning span: open on
+    Start(ReasoningContent); close on that index's Done.
+
+    Deliberate contracts:
+      - A `Full(ToolResponseContent)` whose tool_call_id was never opened is
+        ignored (no CloseSpan emitted).
+      - A `Done` for an index that was never a tool_request/reasoning Start is
+        ignored (no signal emitted).
+      - Events with `index is None` are skipped entirely; without a stable index
+        they cannot be reliably paired, and aliasing them to a sentinel would
+        let unrelated None-indexed events cross-match.
+      - `flush()` closes anything still open as incomplete; unclosed tool spans
+        are emitted in the order they were opened.
+    """
+
+    def __init__(self) -> None:
+        self._tool_by_index: dict[int, _ToolReqMeta] = {}
+        self._reasoning_index_open: set[int] = set()
+        # insertion-ordered set of open tool_call_ids (dict keys preserve order)
+        self._open_tool_ids: dict[str, None] = {}
+
+    def observe(self, event: StreamTaskMessage) -> list[SpanSignal]:
+        if isinstance(event, StreamTaskMessageStart):
+            return self._on_start(event)
+        if isinstance(event, StreamTaskMessageDelta):
+            return self._on_delta(event)
+        if isinstance(event, StreamTaskMessageFull):
+            return self._on_full(event)
+        if isinstance(event, StreamTaskMessageDone):
+            return self._on_done(event)
+        return []
+
+    def flush(self) -> list[SpanSignal]:
+        """Close anything still open at end of stream, marked incomplete."""
+        signals: list[SpanSignal] = []
+        for tcid in list(self._open_tool_ids):
+            signals.append(CloseSpan(key=tcid, output=None, is_complete=False))
+        self._open_tool_ids.clear()
+        for idx in sorted(self._reasoning_index_open):
+            signals.append(CloseSpan(key=f"reasoning:{idx}", output=None, is_complete=False))
+        self._reasoning_index_open.clear()
+        return signals
+
+    def _on_start(self, event: StreamTaskMessageStart) -> list[SpanSignal]:
+        if event.index is None:
+            return []
+        idx = event.index
+        content = event.content
+        if isinstance(content, ToolRequestContent):
+            self._tool_by_index[idx] = _ToolReqMeta(
+                tool_call_id=content.tool_call_id,
+                name=content.name,
+                arguments=dict(content.arguments or {}),
+            )
+            return []
+        if content.type == "reasoning":
+            self._reasoning_index_open.add(idx)
+            return [OpenSpan(key=f"reasoning:{idx}", kind="reasoning", name="reasoning", input={})]
+        return []
+
+    def _on_delta(self, event: StreamTaskMessageDelta) -> list[SpanSignal]:
+        if event.index is None:
+            return []
+        idx = event.index
+        delta = event.delta
+        if isinstance(delta, ToolRequestDelta):
+            meta = self._tool_by_index.get(idx)
+            if meta is not None and delta.arguments_delta:
+                meta.args_buf += delta.arguments_delta
+        return []
+
+    def _on_full(self, event: StreamTaskMessageFull) -> list[SpanSignal]:
+        """Handle a Full event.
+
+        A `Full(ToolRequestContent)` opens a tool span (keyed by tool_call_id)
+        if it is not already open; the matching `Full(ToolResponseContent)`
+        closes it. This handles harnesses (e.g. LangGraph) that emit tool calls
+        as a single Full rather than Start+Done.
+        """
+        content = event.content
+        if isinstance(content, ToolRequestContent):
+            tcid = content.tool_call_id
+            if tcid not in self._open_tool_ids:
+                self._open_tool_ids[tcid] = None
+                args = dict(content.arguments or {})
+                return [OpenSpan(key=tcid, kind="tool", name=content.name, input=args)]
+            return []
+        if isinstance(content, ToolResponseContent):
+            tcid = content.tool_call_id
+            if tcid in self._open_tool_ids:
+                self._open_tool_ids.pop(tcid, None)
+                return [
+                    CloseSpan(
+                        key=tcid,
+                        output=content.content,
+                        is_complete=True,
+                        is_error=content.is_error,
+                    )
+                ]
+        return []
+
+    def _on_done(self, event: StreamTaskMessageDone) -> list[SpanSignal]:
+        if event.index is None:
+            return []
+        idx = event.index
+        meta = self._tool_by_index.pop(idx, None)
+        if meta is not None:
+            args = meta.arguments
+            if meta.args_buf:
+                try:
+                    args = json.loads(meta.args_buf)
+                except json.JSONDecodeError:
+                    args = {"_raw": meta.args_buf}
+            self._open_tool_ids[meta.tool_call_id] = None
+            return [OpenSpan(key=meta.tool_call_id, kind="tool", name=meta.name, input=args)]
+        if idx in self._reasoning_index_open:
+            self._reasoning_index_open.discard(idx)
+            return [CloseSpan(key=f"reasoning:{idx}", output=None, is_complete=True)]
+        return []
diff --git a/src/agentex/lib/core/harness/tracer.py b/src/agentex/lib/core/harness/tracer.py
new file mode 100644
index 000000000..4ca4d628b
--- /dev/null
+++ b/src/agentex/lib/core/harness/tracer.py
@@ -0,0 +1,88 @@
+"""Adapter from SpanSignals to adk.tracing spans (best-effort, overridable)."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from agentex.lib.core.harness.types import OpenSpan, CloseSpan, SpanSignal
+
+try:
+    from agentex.lib.utils.logging import make_logger
+
+    logger = make_logger(__name__)
+except Exception:  # ddtrace may be absent in some envs; fall back to stdlib
+    import logging
+
+    logger = logging.getLogger(__name__)
+
+
+class SpanTracer:
+    """Opens/closes adk.tracing child spans in response to span signals.
+
+    `tracing` defaults to the real `adk.tracing` module; inject a fake in tests
+    or a custom tracer to override. No-op when `trace_id` is falsy. Never raises.
+
+    The real TracingModule.end_span does NOT accept an output kwarg — output is
+    recorded by mutating span.output before calling end_span, matching the pattern
+    used throughout the codebase (see _langgraph_tracing.py on_tool_end etc.).
+
+    Span-lifecycle contract: the `_open` dict (span key -> span object) is scoped
+    to a single turn. Pairing is by `key`:
+    - A duplicate OpenSpan for a key already in `_open` silently replaces the
+      earlier span; the earlier span is then orphaned (never closed / leaked).
+    - A CloseSpan for an unknown key is a no-op.
+    - Unpaired opens accumulate in `_open` for the lifetime of the tracer; since
+      a tracer is expected to live for one turn, this is bounded and acceptable.
+    """
+
+    def __init__(
+        self,
+        trace_id: str | None,
+        parent_span_id: str | None,
+        tracing: Any = None,
+        task_id: str | None = None,
+    ):
+        self.trace_id = trace_id
+        self.parent_span_id = parent_span_id
+        self.task_id = task_id
+        if tracing is None:
+            from agentex.lib import adk
+
+            tracing = adk.tracing
+        self._tracing = tracing
+        self._open: dict[str, Any] = {}  # span key -> span object
+
+    async def handle(self, signal: SpanSignal) -> None:
+        if not self.trace_id:
+            return
+        try:
+            if isinstance(signal, OpenSpan):
+                span = await self._tracing.start_span(
+                    trace_id=self.trace_id,
+                    name=signal.name,
+                    input=signal.input,
+                    parent_id=self.parent_span_id,
+                    task_id=self.task_id,
+                )
+                if span is not None:
+                    self._open[signal.key] = span
+            elif isinstance(signal, CloseSpan):
+                span = self._open.pop(signal.key, None)
+                if span is not None:
+                    # Output is recorded by mutating span.output before end_span.
+                    # The real TracingModule.end_span signature is:
+                    #   end_span(trace_id, span, start_to_close_timeout, heartbeat_timeout, retry_policy)
+                    # It does not accept an output= kwarg.
+                    span.output = signal.output
+                    # Tool failure status (ToolResponseContent.is_error) is recorded
+                    # on span.data when the harness reports one; Span has no dedicated
+                    # error field. None means no status was reported, so leave data alone.
+                    if signal.is_error is not None:
+                        data = span.data if isinstance(span.data, dict) else {}
+                        span.data = {**data, "is_error": signal.is_error}
+                    await self._tracing.end_span(
+                        trace_id=self.trace_id,
+                        span=span,
+                    )
+        except Exception as exc:  # best-effort: tracing never breaks delivery
+            logger.warning("[harness.tracer] span signal failed: %s", exc)
diff --git a/src/agentex/lib/core/harness/types.py b/src/agentex/lib/core/harness/types.py
new file mode 100644
index 000000000..b37dc1e51
--- /dev/null
+++ b/src/agentex/lib/core/harness/types.py
@@ -0,0 +1,93 @@
+"""Types for the unified harness surface."""
+
+from __future__ import annotations
+
+from typing import Any, Union, Literal, Protocol, AsyncIterator, runtime_checkable
+from dataclasses import field, dataclass
+
+from pydantic import BaseModel, ConfigDict
+
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+
+# The canonical stream element. Taps yield these; delivery adapters consume them.
+StreamTaskMessage = Union[
+    StreamTaskMessageStart,
+    StreamTaskMessageDelta,
+    StreamTaskMessageFull,
+    StreamTaskMessageDone,
+]
+
+SpanKind = Literal["tool", "reasoning", "subagent"]
+
+
+@dataclass
+class OpenSpan:
+    """Signal to open a child span. `key` pairs an open with its close."""
+
+    key: str
+    kind: SpanKind
+    name: str
+    input: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class CloseSpan:
+    """Signal to close the span previously opened with the same `key`."""
+
+    key: str
+    output: Any = None
+    is_complete: bool = True  # False when closed by flush() without a result
+    is_error: bool | None = None  # tool failure status; None when the harness reports no status
+
+
+SpanSignal = Union[OpenSpan, CloseSpan]
+
+
+class TurnUsage(BaseModel):
+    """Harness-independent turn usage/cost, attached to the turn span.
+
+    Token field names align with agentex.lib.core.observability.llm_metrics.
+    """
+
+    model_config = ConfigDict(from_attributes=True, populate_by_name=True)
+
+    model: str | None = None
+    input_tokens: int | None = None
+    output_tokens: int | None = None
+    cached_input_tokens: int | None = None
+    reasoning_tokens: int | None = None
+    total_tokens: int | None = None
+    cost_usd: float | None = None
+    duration_ms: int | None = None
+    num_llm_calls: int = 0
+    num_tool_calls: int = 0
+    num_reasoning_blocks: int = 0
+
+
+class TurnResult(BaseModel):
+    """Returned to the caller after a turn is delivered."""
+
+    model_config = ConfigDict(from_attributes=True, populate_by_name=True)
+
+    final_text: str = ""
+    usage: TurnUsage = TurnUsage()
+
+
+@runtime_checkable
+class HarnessTurn(Protocol):
+    """A single harness turn: a canonical stream plus its normalized usage.
+
+    Python async generators cannot cleanly return a value to their consumer, so
+    a tap exposes usage via `usage()` (valid only after `events` is exhausted)
+    rather than via StopAsyncIteration.
+    """
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]: ...
+
+    def usage(self) -> TurnUsage: ...
diff --git a/src/agentex/lib/core/harness/yield_delivery.py b/src/agentex/lib/core/harness/yield_delivery.py
new file mode 100644
index 000000000..69b39f152
--- /dev/null
+++ b/src/agentex/lib/core/harness/yield_delivery.py
@@ -0,0 +1,31 @@
+"""Yield delivery: pass the canonical stream through, tracing as a side effect."""
+
+from __future__ import annotations
+
+from typing import AsyncIterator, AsyncGenerator
+
+from agentex.lib.core.harness.types import StreamTaskMessage
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.span_derivation import SpanDeriver
+
+
+async def yield_events(
+    events: AsyncIterator[StreamTaskMessage],
+    tracer: SpanTracer | None = None,
+) -> AsyncGenerator[StreamTaskMessage, None]:
+    """Forward each event to the caller; derive + trace spans as a side effect.
+
+    For sync HTTP ACP agents that yield events back over the response. When
+    `tracer` is None, this is a pure passthrough.
+    """
+    deriver = SpanDeriver() if tracer is not None else None
+    try:
+        async for event in events:
+            if deriver is not None and tracer is not None:
+                for signal in deriver.observe(event):
+                    await tracer.handle(signal)
+            yield event
+    finally:
+        if deriver is not None and tracer is not None:
+            for signal in deriver.flush():
+                await tracer.handle(signal)
diff --git a/tests/lib/core/harness/__init__.py b/tests/lib/core/harness/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/lib/core/harness/conformance/__init__.py b/tests/lib/core/harness/conformance/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/lib/core/harness/conformance/runner.py b/tests/lib/core/harness/conformance/runner.py
new file mode 100644
index 000000000..81a74860c
--- /dev/null
+++ b/tests/lib/core/harness/conformance/runner.py
@@ -0,0 +1,48 @@
+"""Shared conformance engine: every harness tap registers fixtures here.
+
+A fixture is (name, list[StreamTaskMessage]). The runner asserts that span
+derivation over the events is identical regardless of delivery channel, which is
+the cross-channel guarantee from the spec.
+
+Registry shared-state hazard: `_REGISTRY` is process-global. Every `test_*.py`
+module that calls `register()` at import time contributes to it, so a module
+that parametrizes over `all_fixtures()` will see fixtures registered by ANY
+other conformance module imported earlier in the same pytest process (collection
+order is not guaranteed). To stay deterministic, each future harness conformance
+module should register and parametrize over its OWN fixtures (e.g. keep a
+module-local list it both registers and parametrizes), rather than relying on
+cross-module global accumulation via `all_fixtures()`.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from agentex.lib.core.harness.types import SpanSignal, StreamTaskMessage
+from agentex.lib.core.harness.span_derivation import SpanDeriver
+
+
+@dataclass
+class Fixture:
+    name: str
+    events: list[StreamTaskMessage]
+
+
+_REGISTRY: list[Fixture] = []
+
+
+def register(fixture: Fixture) -> None:
+    _REGISTRY.append(fixture)
+
+
+def all_fixtures() -> list[Fixture]:
+    return list(_REGISTRY)
+
+
+def derive_all(events: list[StreamTaskMessage]) -> list[SpanSignal]:
+    d = SpanDeriver()
+    out: list[SpanSignal] = []
+    for e in events:
+        out.extend(d.observe(e))
+    out.extend(d.flush())
+    return out
diff --git a/tests/lib/core/harness/conformance/test_conformance.py b/tests/lib/core/harness/conformance/test_conformance.py
new file mode 100644
index 000000000..d9eec1c15
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_conformance.py
@@ -0,0 +1,43 @@
+import pytest
+
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+
+from .runner import Fixture, register, derive_all, all_fixtures
+
+register(
+    Fixture(
+        name="builtin-single-tool",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ToolRequestContent(
+                    type="tool_request", author="agent", tool_call_id="c", name="Bash", arguments={}
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+            StreamTaskMessageFull(
+                type="full",
+                index=1,
+                content=ToolResponseContent(
+                    type="tool_response", author="agent", tool_call_id="c", name="Bash", content="ok"
+                ),
+            ),
+        ],
+    )
+)
+
+
+@pytest.mark.parametrize("fixture", all_fixtures(), ids=lambda f: f.name)
+def test_span_derivation_is_deterministic(fixture):
+    """Exercises the cross-channel guarantee: yield and auto-send observe the
+    same event stream, so span derivation must be deterministic/idempotent."""
+    # Deriving twice over the same events yields identical signals (the property
+    # that makes yield vs auto-send equivalent, since both observe the same stream).
+    assert derive_all(fixture.events) == derive_all(fixture.events)
diff --git a/tests/lib/core/harness/test_auto_send.py b/tests/lib/core/harness/test_auto_send.py
new file mode 100644
index 000000000..1948e9196
--- /dev/null
+++ b/tests/lib/core/harness/test_auto_send.py
@@ -0,0 +1,490 @@
+"""Tests for auto_send delivery adapter.
+
+The fake mirrors the real StreamingTaskMessageContext API exactly:
+- streaming_task_message_context(...) returns a context object (synchronously)
+- open the context via __aenter__ (returns self after creating the task message)
+- stream deltas via ctx.stream_update(StreamTaskMessageDelta(...))
+- close via ctx.close() (NOT __aexit__)
+
+This mirrors _langgraph_async.py lines 62-78 and 100-127.
+"""
+
+import types as _types
+from datetime import datetime
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.tool_request_delta import ToolRequestDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.core.harness.auto_send import auto_send
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+
+
+class _FakeCtx:
+    """Mirrors StreamingTaskMessageContext: __aenter__ opens (returns self with task_message set),
+    close() closes. stream_update records the call.
+
+    task_message is a real TaskMessage instance so that auto_send can use it
+    as parent_task_message in StreamTaskMessageDelta without Pydantic validation errors.
+    """
+
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        # Real TaskMessage so StreamTaskMessageDelta(parent_task_message=...) passes validation
+        self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        # __aexit__ delegates to close in the real impl; keep for safety
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    """Mirrors StreamingService: streaming_task_message_context returns a context object."""
+
+    def __init__(self):
+        self.sink = []
+        self.recorded_created_at: list[datetime | None] = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        self.recorded_created_at.append(created_at)
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+async def _gen(events):
+    for e in events:
+        yield e
+
+
+# ---------------------------------------------------------------------------
+# Test 1: text streaming — open, stream deltas, close; return accumulated text
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_streams_text_and_returns_final_text():
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="Hel"),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="lo"),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+
+    assert result.final_text == "Hello"
+
+    kinds = [s[0] for s in streaming.sink]
+    # A context was created for the text content
+    assert kinds[0] == "ctx"
+    # It was opened and closed
+    assert "open" in kinds
+    assert "close" in kinds
+    # Exactly two updates were streamed (one per delta)
+    updates = [s for s in streaming.sink if s[0] == "update"]
+    assert len(updates) == 2
+
+
+# ---------------------------------------------------------------------------
+# Test 2: tool_request Full + tool_response Full — each posts one full message
+# (open context with the content, no deltas, close immediately)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_posts_full_tool_messages():
+    streaming = _FakeStreaming()
+    events = [
+        # Two Full events post two messages (open+close immediately, no deltas).
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="c1",
+                name="Bash",
+                arguments={"cmd": "ls"},
+            ),
+        ),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="c1",
+                name="Bash",
+                content="file.py",
+            ),
+        ),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+
+    assert result.final_text == ""
+
+    # Each Full event opens and closes exactly one context.
+    ctx_events = [s for s in streaming.sink if s[0] == "ctx"]
+    assert len(ctx_events) == 2
+    content_types = [s[1] for s in ctx_events]
+    assert content_types == ["tool_request", "tool_response"]
+
+    # Each context is opened and closed
+    opens = [s for s in streaming.sink if s[0] == "open"]
+    closes = [s for s in streaming.sink if s[0] == "close"]
+    assert len(opens) == 2
+    assert len(closes) == 2
+
+    # No stream_update calls (full messages have no deltas)
+    updates = [s for s in streaming.sink if s[0] == "update"]
+    assert len(updates) == 0
+
+
+# ---------------------------------------------------------------------------
+# Test 3: tracing — spans are derived and handed to the tracer
+# ---------------------------------------------------------------------------
+
+
+class _RecordTracing:
+    def __init__(self):
+        self.started, self.ended = [], []
+
+    async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None):
+        self.started.append(name)
+        return _types.SimpleNamespace()
+
+    async def end_span(self, *, trace_id, span):
+        self.ended.append(getattr(span, "output", None))
+
+
+@pytest.mark.asyncio
+async def test_auto_send_derives_tool_spans_via_tracer():
+    fake_tracing = _RecordTracing()
+    tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake_tracing)
+    streaming = _FakeStreaming()
+
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="c1",
+                name="Bash",
+                arguments={},
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="c1",
+                name="Bash",
+                content="ok",
+            ),
+        ),
+    ]
+
+    result = await auto_send(_gen(events), task_id="task1", tracer=tracer, streaming=streaming)
+
+    assert result.final_text == ""
+    assert fake_tracing.started == ["Bash"]
+    assert fake_tracing.ended == ["ok"]
+
+
+# ---------------------------------------------------------------------------
+# Test 4: text followed by a tool Full — text context is closed before Full
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_closes_text_context_before_full_message():
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="Hi"),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="c2",
+                name="read_file",
+                arguments={},
+            ),
+        ),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+    assert result.final_text == "Hi"
+
+    # Verify ordering: text ctx opens, updates, closes; then tool_request ctx opens, closes
+    event_sequence = [(s[0], s[1]) for s in streaming.sink]
+    text_open_idx = next(i for i, s in enumerate(event_sequence) if s == ("open", "text"))
+    text_close_idx = next(i for i, s in enumerate(event_sequence) if s == ("close", "text"))
+    tool_open_idx = next(i for i, s in enumerate(event_sequence) if s == ("open", "tool_request"))
+    assert text_open_idx < text_close_idx < tool_open_idx
+
+
+# ---------------------------------------------------------------------------
+# Test 5: midstream error — propagates AND the open context is closed (finally)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_open_context_closed_on_midstream_error():
+    streaming = _FakeStreaming()
+
+    async def _exploding_gen():
+        yield StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        )
+        raise RuntimeError("boom")
+
+    with pytest.raises(RuntimeError, match="boom"):
+        await auto_send(_exploding_gen(), task_id="task1", tracer=None, streaming=streaming)
+
+    # The text context that was opened mid-stream was closed by the finally block.
+    assert ("open", "text") in [(s[0], s[1]) for s in streaming.sink]
+    assert ("close", "text") in [(s[0], s[1]) for s in streaming.sink]
+
+
+# ---------------------------------------------------------------------------
+# Test 6: streamed tool_request delivered (AGX1-377 core)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_streams_tool_request():
+    """A Start(ToolRequestContent) MUST open a streaming context (AGX1-377)."""
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="c_tool",
+                name="Bash",
+                arguments={},
+            ),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=ToolRequestDelta(
+                type="tool_request",
+                tool_call_id="c_tool",
+                name="Bash",
+                arguments_delta='{"cmd": "ls"}',
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+
+    assert result.final_text == ""
+
+    ctx_events = [s for s in streaming.sink if s[0] == "ctx"]
+    assert len(ctx_events) == 1
+    assert ctx_events[0][1] == "tool_request"
+
+    opens = [s for s in streaming.sink if s[0] == "open"]
+    closes = [s for s in streaming.sink if s[0] == "close"]
+    assert len(opens) == 1
+    assert len(closes) == 1
+
+    updates = [s for s in streaming.sink if s[0] == "update"]
+    assert len(updates) == 1
+
+
+# ---------------------------------------------------------------------------
+# Test 7: interleaved indexes route correctly
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_interleaved_indexes_route_correctly():
+    """Deltas must be routed to the correct index-keyed context."""
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageStart(
+            type="start",
+            index=1,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="A"),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=1,
+            delta=TextDelta(type="text", text_delta="B"),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageDone(type="done", index=1),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+
+    ctx_events = [s for s in streaming.sink if s[0] == "ctx"]
+    assert len(ctx_events) == 2
+
+    opens = [s for s in streaming.sink if s[0] == "open"]
+    assert len(opens) == 2
+
+    updates = [s for s in streaming.sink if s[0] == "update"]
+    assert len(updates) == 2
+
+    update_deltas = [s[1].delta for s in streaming.sink if s[0] == "update"]
+    text_deltas = [d.text_delta for d in update_deltas if isinstance(d, TextDelta)]
+    assert set(text_deltas) == {"A", "B"}
+
+
+# ---------------------------------------------------------------------------
+# Test 8: final_text returns last text segment for multi-step
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_final_text_last_segment():
+    """final_text must be the LAST text segment, not accumulated across all turns."""
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="First"),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageStart(
+            type="start",
+            index=1,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=1,
+            delta=TextDelta(type="text", text_delta="Second"),
+        ),
+        StreamTaskMessageDone(type="done", index=1),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+    assert result.final_text == "Second"
+
+
+# ---------------------------------------------------------------------------
+# Test 9: Full(TextContent) contributes to final_text
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_full_text_content_sets_final_text():
+    """A Full(TextContent) must contribute its text to final_text."""
+    streaming = _FakeStreaming()
+    events = [
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=TextContent(type="text", author="agent", content="hello"),
+        ),
+    ]
+    result = await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming)
+    assert result.final_text == "hello"
+
+
+# ---------------------------------------------------------------------------
+# Test 10: created_at is forwarded to streaming context (AGX1-378)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_created_at_forwarded():
+    """created_at must be forwarded to every streaming_task_message_context call."""
+    streaming = _FakeStreaming()
+    dt = datetime(2025, 1, 15, 12, 0, 0)
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="c_ts",
+                name="Bash",
+                arguments={},
+            ),
+        ),
+    ]
+    await auto_send(_gen(events), task_id="task1", tracer=None, streaming=streaming, created_at=dt)
+
+    assert all(ts == dt for ts in streaming.recorded_created_at)
diff --git a/tests/lib/core/harness/test_emitter.py b/tests/lib/core/harness/test_emitter.py
new file mode 100644
index 000000000..df155ec44
--- /dev/null
+++ b/tests/lib/core/harness/test_emitter.py
@@ -0,0 +1,148 @@
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.types import TurnUsage
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+
+
+class _FakeTracing:
+    async def start_span(self, **kw):
+        return None
+
+    async def end_span(self, **kw):
+        pass
+
+
+class _FakeCtx:
+    """Minimal StreamingTaskMessageContext fake (see test_auto_send.py)."""
+
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self):
+        self.sink = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+class _Turn:
+    def __init__(self, events_list, usage):
+        self._events_list = events_list
+        self._usage = usage
+
+    @property
+    async def events(self):
+        for e in self._events_list:
+            yield e
+
+    def usage(self):
+        return self._usage
+
+
+@pytest.mark.asyncio
+async def test_emitter_yield_mode_passes_through():
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="hi")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = _Turn(events, TurnUsage(model="m"))
+    emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+    out = [e async for e in emitter.yield_turn(turn)]
+    assert out == events
+
+
+@pytest.mark.asyncio
+async def test_emitter_tracing_default_on_when_trace_id_present():
+    # Inject a fake tracing backend so the test env doesn't need temporalio.
+    # This exercises the default-on path (tracer=None) when trace_id is truthy.
+    emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id="p", tracing=_FakeTracing())
+    assert emitter.tracer is not None
+
+
+@pytest.mark.asyncio
+async def test_emitter_tracing_overridable_off():
+    emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id="p", tracer=False)
+    assert emitter.tracer is None
+
+
+@pytest.mark.asyncio
+async def test_emitter_auto_send_turn_returns_usage():
+    usage = TurnUsage(model="m", input_tokens=5)
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hello")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = _Turn(events, usage)
+    fake = _FakeStreaming()
+    emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, streaming=fake)
+    result = await emitter.auto_send_turn(turn)
+    assert result.usage == usage
+    assert result.final_text == "Hello"
+
+
+class _ContractTurn:
+    """A turn that honors the single-pass contract: usage() is the empty default
+    UNTIL `events` is exhausted, then the real usage (this is how real harness
+    turns behave — they populate usage while the stream is consumed)."""
+
+    def __init__(self, events_list, real_usage):
+        self._events_list = events_list
+        self._real_usage = real_usage
+        self._exhausted = False
+
+    @property
+    async def events(self):
+        for e in self._events_list:
+            yield e
+        self._exhausted = True
+
+    def usage(self):
+        return self._real_usage if self._exhausted else TurnUsage(model="m")
+
+
+@pytest.mark.asyncio
+async def test_emitter_auto_send_turn_reads_usage_after_exhaustion():
+    # Regression: auto_send_turn must read turn.usage() AFTER consuming the
+    # stream, not eagerly when building the auto_send call (which would capture
+    # the empty default and lose real token usage on the auto_send path).
+    real_usage = TurnUsage(model="m", input_tokens=11, output_tokens=22, total_tokens=33, num_llm_calls=2)
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="hi")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = _ContractTurn(events, real_usage)
+    emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, streaming=_FakeStreaming())
+    result = await emitter.auto_send_turn(turn)
+    assert result.usage == real_usage
+    assert result.usage.input_tokens == 11 and result.usage.total_tokens == 33
diff --git a/tests/lib/core/harness/test_span_derivation.py b/tests/lib/core/harness/test_span_derivation.py
new file mode 100644
index 000000000..51e2ede2c
--- /dev/null
+++ b/tests/lib/core/harness/test_span_derivation.py
@@ -0,0 +1,286 @@
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.types import OpenSpan, CloseSpan
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.tool_request_delta import ToolRequestDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.core.harness.span_derivation import SpanDeriver
+
+
+def _signals(deriver, events):
+    out = []
+    for e in events:
+        out.extend(deriver.observe(e))
+    out.extend(deriver.flush())
+    return out
+
+
+def _tool_req(idx, tcid, name, args):
+    return StreamTaskMessageStart(
+        type="start",
+        index=idx,
+        content=ToolRequestContent(type="tool_request", author="agent", tool_call_id=tcid, name=name, arguments=args),
+    )
+
+
+def test_text_only_yields_no_spans():
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=None),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    assert _signals(d, events) == []
+
+
+def test_single_tool_opens_on_done_closes_on_response():
+    d = SpanDeriver()
+    events = [
+        _tool_req(0, "call_1", "Bash", {"cmd": "ls"}),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response", author="agent", tool_call_id="call_1", name="Bash", content="files"
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    assert sigs == [
+        OpenSpan(key="call_1", kind="tool", name="Bash", input={"cmd": "ls"}),
+        CloseSpan(key="call_1", output="files", is_complete=True),
+    ]
+    # No status reported -> CloseSpan carries is_error=None.
+    assert sigs[1].is_error is None
+
+
+def test_tool_response_is_error_propagates_to_close_span():
+    """ToolResponseContent.is_error flows onto the CloseSpan so a derived tool
+    span can be marked as a failure (AGX1-371)."""
+    d = SpanDeriver()
+    events = [
+        _tool_req(0, "call_err", "Bash", {"cmd": "false"}),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="call_err",
+                name="Bash",
+                content="boom",
+                is_error=True,
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    assert sigs == [
+        OpenSpan(key="call_err", kind="tool", name="Bash", input={"cmd": "false"}),
+        CloseSpan(key="call_err", output="boom", is_complete=True, is_error=True),
+    ]
+
+
+def test_reasoning_opens_on_start_closes_on_done():
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageStart(
+            type="start", index=0, content=ReasoningContent(type="reasoning", author="agent", summary=[], content=[])
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    sigs = _signals(d, events)
+    assert sigs[0] == OpenSpan(key="reasoning:0", kind="reasoning", name="reasoning", input={})
+    assert sigs[1] == CloseSpan(key="reasoning:0", output=None, is_complete=True)
+
+
+def test_parallel_tools_pair_by_tool_call_id():
+    d = SpanDeriver()
+    events = [
+        _tool_req(0, "a", "T1", {}),
+        _tool_req(1, "b", "T2", {}),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageDone(type="done", index=1),
+        StreamTaskMessageFull(
+            type="full",
+            index=2,
+            content=ToolResponseContent(
+                type="tool_response", author="agent", tool_call_id="b", name="T2", content="rb"
+            ),
+        ),
+        StreamTaskMessageFull(
+            type="full",
+            index=3,
+            content=ToolResponseContent(
+                type="tool_response", author="agent", tool_call_id="a", name="T1", content="ra"
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    opens = [s for s in sigs if isinstance(s, OpenSpan)]
+    closes = [s for s in sigs if isinstance(s, CloseSpan)]
+    assert {o.key for o in opens} == {"a", "b"}
+    assert [c.key for c in closes] == ["b", "a"]
+    assert all(c.is_complete for c in closes)
+
+
+def test_streamed_args_accumulate_into_open_input():
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request", author="agent", tool_call_id="c", name="Bash", arguments={}
+            ),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=ToolRequestDelta(type="tool_request", tool_call_id="c", name="Bash", arguments_delta='{"cmd":'),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=ToolRequestDelta(type="tool_request", tool_call_id="c", name="Bash", arguments_delta='"ls"}'),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    sigs = _signals(d, events)
+    assert sigs[0] == OpenSpan(key="c", kind="tool", name="Bash", input={"cmd": "ls"})
+
+
+def test_unclosed_tool_closed_incomplete_on_flush():
+    d = SpanDeriver()
+    events = [
+        _tool_req(0, "x", "Bash", {}),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    sigs = _signals(d, events)
+    assert sigs[0] == OpenSpan(key="x", kind="tool", name="Bash", input={})
+    assert sigs[1] == CloseSpan(key="x", output=None, is_complete=False)
+
+
+def test_none_index_is_skipped():
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=None,
+            content=ToolRequestContent(
+                type="tool_request", author="agent", tool_call_id="n", name="Bash", arguments={}
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=None),
+    ]
+    assert _signals(d, events) == []
+
+
+def test_orphan_tool_response_ignored():
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=ToolResponseContent(
+                type="tool_response", author="agent", tool_call_id="z", name="Bash", content="r"
+            ),
+        ),
+    ]
+    assert _signals(d, events) == []
+
+
+def test_full_tool_request_opens_span():
+    """Full(ToolRequestContent) must open a tool span (for LangGraph-style harnesses)."""
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_x",
+                name="Bash",
+                arguments={"cmd": "ls"},
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    assert sigs[0] == OpenSpan(key="call_x", kind="tool", name="Bash", input={"cmd": "ls"})
+    assert sigs[1] == CloseSpan(key="call_x", output=None, is_complete=False)
+
+
+def test_full_tool_request_and_response_paired():
+    """Full(ToolRequestContent) + Full(ToolResponseContent) produces a complete span pair."""
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_y",
+                name="Grep",
+                arguments={},
+            ),
+        ),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="call_y",
+                name="Grep",
+                content="result",
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    assert sigs == [
+        OpenSpan(key="call_y", kind="tool", name="Grep", input={}),
+        CloseSpan(key="call_y", output="result", is_complete=True),
+    ]
+
+
+def test_full_tool_request_does_not_double_open():
+    """A Full(ToolRequestContent) for an already-open tool_call_id is a no-op."""
+    d = SpanDeriver()
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_z",
+                name="X",
+                arguments={},
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_z",
+                name="X",
+                arguments={},
+            ),
+        ),
+    ]
+    sigs = _signals(d, events)
+    opens = [s for s in sigs if isinstance(s, OpenSpan)]
+    assert len(opens) == 1
+    assert opens[0].key == "call_z"
diff --git a/tests/lib/core/harness/test_tracer.py b/tests/lib/core/harness/test_tracer.py
new file mode 100644
index 000000000..ed40cf595
--- /dev/null
+++ b/tests/lib/core/harness/test_tracer.py
@@ -0,0 +1,93 @@
+from typing import override
+
+import pytest
+
+from agentex.lib.core.harness.types import OpenSpan, CloseSpan
+from agentex.lib.core.harness.tracer import SpanTracer
+
+
+class _FakeSpan:
+    def __init__(self, name):
+        self.name = name
+        self.output = None
+        self.data = None
+
+
+class _FakeTracing:
+    def __init__(self):
+        self.started = []
+        self.ended = []
+        self.ended_spans = []
+
+    async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None):
+        self.started.append((name, parent_id, input))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id, span):
+        self.ended.append((span.name, span.output))
+        self.ended_spans.append(span)
+
+
+@pytest.mark.asyncio
+async def test_open_then_close_starts_and_ends_span():
+    fake = _FakeTracing()
+    tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake)
+    await tracer.handle(OpenSpan(key="call_1", kind="tool", name="Bash", input={"cmd": "ls"}))
+    await tracer.handle(CloseSpan(key="call_1", output="files", is_complete=True))
+    assert fake.started == [("Bash", "p1", {"cmd": "ls"})]
+    assert fake.ended == [("Bash", "files")]
+
+
+@pytest.mark.asyncio
+async def test_close_records_is_error_on_span_data():
+    """A CloseSpan carrying is_error records the status on span.data (AGX1-371)."""
+    fake = _FakeTracing()
+    tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake)
+    await tracer.handle(OpenSpan(key="call_err", kind="tool", name="Bash", input={}))
+    await tracer.handle(CloseSpan(key="call_err", output="boom", is_complete=True, is_error=True))
+    assert fake.ended_spans[0].data == {"is_error": True}
+
+
+@pytest.mark.asyncio
+async def test_close_without_status_leaves_span_data_untouched():
+    """is_error=None (no status reported) must not write to span.data."""
+    fake = _FakeTracing()
+    tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake)
+    await tracer.handle(OpenSpan(key="call_1", kind="tool", name="Bash", input={}))
+    await tracer.handle(CloseSpan(key="call_1", output="files", is_complete=True))
+    assert fake.ended_spans[0].data is None
+
+
+@pytest.mark.asyncio
+async def test_no_trace_id_is_noop():
+    fake = _FakeTracing()
+    tracer = SpanTracer(trace_id="", parent_span_id=None, tracing=fake)
+    await tracer.handle(OpenSpan(key="k", kind="tool", name="X"))
+    await tracer.handle(CloseSpan(key="k"))
+    assert fake.started == [] and fake.ended == []
+
+
+@pytest.mark.asyncio
+async def test_tracing_failure_is_swallowed():
+    class _Boom(_FakeTracing):
+        @override
+        async def start_span(self, **kw):
+            raise RuntimeError("backend down")
+
+    tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=_Boom())
+    # Must not raise.
+    await tracer.handle(OpenSpan(key="k", kind="tool", name="X"))
+    await tracer.handle(CloseSpan(key="k"))
+    assert tracer._open == {}
+
+
+@pytest.mark.asyncio
+async def test_duplicate_open_replaces_silently():
+    fake = _FakeTracing()
+    tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake)
+    await tracer.handle(OpenSpan(key="k", kind="tool", name="A"))
+    await tracer.handle(OpenSpan(key="k", kind="tool", name="B"))
+    await tracer.handle(CloseSpan(key="k"))
+    # Both opens started spans, but only the second ("B") is closed.
+    assert [name for name, _, _ in fake.started] == ["A", "B"]
+    assert fake.ended == [("B", None)]
diff --git a/tests/lib/core/harness/test_types.py b/tests/lib/core/harness/test_types.py
new file mode 100644
index 000000000..68bc89ce2
--- /dev/null
+++ b/tests/lib/core/harness/test_types.py
@@ -0,0 +1,53 @@
+from typing import AsyncIterator
+
+from agentex.lib.core.harness.types import (
+    OpenSpan,
+    CloseSpan,
+    TurnUsage,
+    TurnResult,
+    HarnessTurn,
+    StreamTaskMessage,
+)
+
+
+def test_open_close_span_construct():
+    o = OpenSpan(key="call_1", kind="tool", name="Bash", input={"cmd": "ls"})
+    c = CloseSpan(key="call_1", output="files", is_complete=True)
+    assert o.key == c.key == "call_1"
+    assert o.kind == "tool"
+    assert c.is_complete is True
+
+
+def test_turn_usage_defaults_are_none():
+    u = TurnUsage(model="claude-opus-4-6")
+    assert u.model == "claude-opus-4-6"
+    assert u.input_tokens is None
+    assert u.num_tool_calls == 0
+
+
+def test_turn_result_wraps_usage():
+    r = TurnResult(final_text="hi", usage=TurnUsage(model="m"))
+    assert r.final_text == "hi"
+    assert r.usage.model == "m"
+
+
+def test_close_span_defaults():
+    c = CloseSpan(key="x")
+    assert c.output is None
+    assert c.is_complete is True
+
+
+def test_harness_turn_runtime_check():
+    class _Turn:
+        @property
+        def events(self) -> AsyncIterator[StreamTaskMessage]:
+            async def _gen() -> AsyncIterator[StreamTaskMessage]:
+                if False:
+                    yield  # pragma: no cover
+
+            return _gen()
+
+        def usage(self) -> TurnUsage:
+            return TurnUsage(model="m")
+
+    assert isinstance(_Turn(), HarnessTurn) is True
diff --git a/tests/lib/core/harness/test_yield_delivery.py b/tests/lib/core/harness/test_yield_delivery.py
new file mode 100644
index 000000000..f3f491d84
--- /dev/null
+++ b/tests/lib/core/harness/test_yield_delivery.py
@@ -0,0 +1,89 @@
+import types as _types
+
+import pytest
+
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.core.harness.yield_delivery import yield_events
+
+
+class _RecordTracing:
+    def __init__(self):
+        self.started, self.ended = [], []
+
+    async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None):
+        self.started.append(name)
+        return _types.SimpleNamespace()  # supports arbitrary attribute assignment (span.output = ...)
+
+    async def end_span(self, *, trace_id, span):
+        self.ended.append(getattr(span, "output", None))
+
+
+async def _gen(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_yield_passes_events_through_and_traces():
+    fake = _RecordTracing()
+    tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake)
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request", author="agent", tool_call_id="c", name="Bash", arguments={}
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response", author="agent", tool_call_id="c", name="Bash", content="ok"
+            ),
+        ),
+    ]
+    out = [e async for e in yield_events(_gen(events), tracer=tracer)]
+    assert out == events  # passthrough unchanged
+    assert fake.started == ["Bash"]  # span derived + opened
+    assert fake.ended == ["ok"]  # span closed with response
+
+
+@pytest.mark.asyncio
+async def test_yield_without_tracer_is_pure_passthrough():
+    events = [
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    out = [e async for e in yield_events(_gen(events), tracer=None)]
+    assert out == events
+
+
+@pytest.mark.asyncio
+async def test_flush_runs_on_early_close():
+    fake = _RecordTracing()
+    tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake)
+    events = [
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request", author="agent", tool_call_id="c", name="Bash", arguments={}
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        # response intentionally never arrives
+    ]
+    gen = yield_events(_gen(events), tracer=tracer)
+    first = await gen.__anext__()  # Start
+    second = await gen.__anext__()  # Done -> tool span opens here
+    await gen.aclose()  # triggers the finally -> flush()
+    assert fake.started == ["Bash"]
+    assert fake.ended == [None]  # flush closed the unpaired span (incomplete, no output)

From c8de1d4c9c3b5b3c16ad4aaf9644c1ba0d618757 Mon Sep 17 00:00:00 2001
From: Vijay Kalmath <158184866+vkalmathscale@users.noreply.github.com>
Date: Mon, 22 Jun 2026 16:02:41 -0400
Subject: [PATCH 02/10] feat(streaming): stream tool call argument deltas in
 TemporalStreamingModel (#355)

Co-authored-by: stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com>
Co-authored-by: Declan Brady <declan.brady@scale.com>
Co-authored-by: Nitesh Dhanpal <NiteshDhanpal@users.noreply.github.com>
Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../models/temporal_streaming_model.py        |  93 ++++++++-
 .../tests/test_streaming_model.py             | 194 ++++++++++++++++++
 2 files changed, 280 insertions(+), 7 deletions(-)

diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py b/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py
index 7ccc6627a..75dc0f053 100644
--- a/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py
+++ b/src/agentex/lib/core/temporal/plugins/openai_agents/models/temporal_streaming_model.py
@@ -1,6 +1,7 @@
 """Custom Temporal Model Provider with streaming support for OpenAI agents."""
 from __future__ import annotations
 
+import json
 import time
 import uuid
 from typing import Any, List, Union, Optional, override
@@ -63,9 +64,9 @@
 from agentex.lib import adk
 from agentex.lib.utils.logging import make_logger
 from agentex.lib.core.tracing.tracer import AsyncTracer
-from agentex.types.task_message_delta import TextDelta, ReasoningContentDelta, ReasoningSummaryDelta
+from agentex.types.task_message_delta import TextDelta, ToolRequestDelta, ReasoningContentDelta, ReasoningSummaryDelta
 from agentex.types.task_message_update import StreamTaskMessageFull, StreamTaskMessageDelta
-from agentex.types.task_message_content import TextContent, ReasoningContent
+from agentex.types.task_message_content import TextContent, ReasoningContent, ToolRequestContent
 from agentex.lib.adk.utils._modules.client import create_async_agentex_client
 from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import (
     streaming_task_id,
@@ -722,12 +723,27 @@ async def get_response(
                                     streaming_mode=self.streaming_mode,
                                 ).__aenter__()
                         elif item and getattr(item, 'type', None) == 'function_call':
-                            # Track the function call being streamed
+                            # Open a streaming context per function call so argument
+                            # deltas can be published incrementally. Coalescing and
+                            # mode dispatch are handled by the streaming layer.
+                            call_id = getattr(item, 'call_id', '')
+                            tool_name = getattr(item, 'name', '')
+                            call_context = await adk.streaming.streaming_task_message_context(
+                                task_id=task_id,
+                                initial_content=ToolRequestContent(
+                                    author="agent",
+                                    tool_call_id=call_id,
+                                    name=tool_name,
+                                    arguments={},
+                                ),
+                                streaming_mode=self.streaming_mode,
+                            ).__aenter__()
                             function_calls_in_progress[output_index] = {
                                 'id': getattr(item, 'id', ''),
-                                'call_id': getattr(item, 'call_id', ''),
-                                'name': getattr(item, 'name', ''),
+                                'call_id': call_id,
+                                'name': tool_name,
                                 'arguments': getattr(item, 'arguments', ''),
+                                'context': call_context,
                             }
                             logger.debug(f"[TemporalStreamingModel] Starting function call: {item.name}")
 
@@ -748,8 +764,24 @@ async def get_response(
                         output_index = getattr(event, 'output_index', 0)
                         delta = getattr(event, 'delta', '')
 
-                        if output_index in function_calls_in_progress:
-                            function_calls_in_progress[output_index]['arguments'] += delta
+                        call_data = function_calls_in_progress.get(output_index)
+                        if call_data is not None:
+                            call_data['arguments'] += delta
+                            call_context = call_data.get('context')
+                            if call_context is not None:
+                                try:
+                                    await call_context.stream_update(StreamTaskMessageDelta(
+                                        parent_task_message=call_context.task_message,
+                                        delta=ToolRequestDelta(
+                                            tool_call_id=call_data['call_id'],
+                                            name=call_data['name'],
+                                            arguments_delta=delta,
+                                            type="tool_request",
+                                        ),
+                                        type="delta",
+                                    ))
+                                except Exception as e:
+                                    logger.warning(f"Failed to send tool request delta: {e}")
                             logger.debug(f"[TemporalStreamingModel] Function call args delta: {delta[:50]}...")
 
                     elif isinstance(event, ResponseFunctionCallArgumentsDoneEvent):
@@ -874,6 +906,42 @@ async def get_response(
                                 )
                                 output_items.append(tool_call)
 
+                                # Emit the final ToolRequestContent and close the
+                                # per-call streaming context. If the model produced
+                                # invalid JSON args (truncation, hallucination), fall
+                                # back to an empty dict so the streaming layer can
+                                # still persist a message.
+                                call_context = call_data.get('context')
+                                if call_context is not None:
+                                    raw_args = call_data['arguments'] or ''
+                                    try:
+                                        parsed_args = json.loads(raw_args) if raw_args else {}
+                                    except json.JSONDecodeError:
+                                        logger.warning(
+                                            f"Failed to parse tool call arguments for {call_data['name']} "
+                                            f"(raw_args_bytes={len(raw_args)})"
+                                        )
+                                        parsed_args = {}
+                                    try:
+                                        await call_context.stream_update(StreamTaskMessageFull(
+                                            parent_task_message=call_context.task_message,
+                                            content=ToolRequestContent(
+                                                author="agent",
+                                                tool_call_id=call_data['call_id'],
+                                                name=call_data['name'],
+                                                arguments=parsed_args,
+                                            ),
+                                            type="full",
+                                        ))
+                                    except Exception as e:
+                                        logger.warning(f"Failed to send tool request full update: {e}")
+                                    try:
+                                        await call_context.close()
+                                    except Exception as e:
+                                        logger.warning(f"Failed to close tool request context: {e}")
+                                    finally:
+                                        call_data['context'] = None
+
                     elif isinstance(event, ResponseReasoningSummaryPartAddedEvent):
                         # New reasoning part/summary started - reset accumulator
                         part = getattr(event, 'part', None)
@@ -907,6 +975,17 @@ async def get_response(
                     await streaming_context.close()
                     streaming_context = None
 
+                # Defensive: close any function call contexts that didn't see a
+                # ResponseOutputItemDoneEvent (truncated stream, error mid-call).
+                for call_data in function_calls_in_progress.values():
+                    call_context = call_data.get('context')
+                    if call_context is not None:
+                        try:
+                            await call_context.close()
+                        except Exception as e:
+                            logger.warning(f"Failed to close orphaned tool request context: {e}")
+                        call_data['context'] = None
+
                 # Build the response from output items collected during streaming
                 # Create output from the items we collected
                 response_output = []
diff --git a/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_streaming_model.py b/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_streaming_model.py
index 97dda0e61..26c0b7c4b 100644
--- a/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_streaming_model.py
+++ b/src/agentex/lib/core/temporal/plugins/openai_agents/tests/test_streaming_model.py
@@ -12,8 +12,11 @@
 from openai.types.responses import (
     ResponseCompletedEvent,
     ResponseTextDeltaEvent,
+    ResponseOutputItemDoneEvent,
     ResponseOutputItemAddedEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
     ResponseReasoningSummaryTextDeltaEvent,
+    ResponseFunctionCallArgumentsDeltaEvent,
 )
 
 
@@ -851,6 +854,197 @@ async def test_missing_task_id_error(self, streaming_model):
             )
 
 
+class TestStreamingModelFunctionCallArgsStreaming:
+    """Verify ``ResponseFunctionCallArgumentsDeltaEvent``s are surfaced as
+    ``ToolRequestDelta`` updates and that a final ``ToolRequestContent`` Full is
+    emitted on ``ResponseOutputItemDoneEvent``.
+
+    Without this, write-heavy tools (``write_file``, ``apply_patch``) buffer their
+    entire argument body inside ``invoke_model_activity`` and the UI sees a
+    multi-second freeze while the model is actively producing tokens.
+    """
+
+    @staticmethod
+    def _build_function_call_stream(arguments_text: str):
+        """Construct a streaming event sequence for a single function_call.
+
+        Mirrors the production order: Added → N × ArgumentsDelta → ArgumentsDone
+        → OutputItemDone → ResponseCompleted. ``spec=...`` makes ``isinstance``
+        dispatch in production work without triggering pydantic validation.
+        """
+        call_item = MagicMock()
+        call_item.type = "function_call"
+        call_item.id = "fc_abc"
+        call_item.call_id = "call_abc"
+        call_item.name = "write_file"
+        call_item.arguments = ""
+
+        item_added = MagicMock(spec=ResponseOutputItemAddedEvent)
+        item_added.item = call_item
+        item_added.output_index = 0
+
+        # Split the argument text into a few chunks to exercise the per-delta loop
+        chunk_size = max(1, len(arguments_text) // 3) if arguments_text else 1
+        chunks = [arguments_text[i:i + chunk_size] for i in range(0, len(arguments_text), chunk_size)] or [""]
+        delta_events = []
+        for chunk in chunks:
+            ev = MagicMock(spec=ResponseFunctionCallArgumentsDeltaEvent)
+            ev.delta = chunk
+            ev.output_index = 0
+            delta_events.append(ev)
+
+        args_done = MagicMock(spec=ResponseFunctionCallArgumentsDoneEvent)
+        args_done.arguments = arguments_text
+        args_done.output_index = 0
+
+        item_done = MagicMock(spec=ResponseOutputItemDoneEvent)
+        item_done.item = call_item
+        item_done.output_index = 0
+
+        completed = MagicMock(spec=ResponseCompletedEvent)
+        completed.response = MagicMock(output=[], usage=MagicMock(), id=None)
+
+        return [item_added, *delta_events, args_done, item_done, completed], chunks
+
+    @staticmethod
+    def _install_real_task_message(mock_adk_streaming, task_id: str):
+        """Replace the autouse fixture's MagicMock ``task_message`` with a real
+        ``TaskMessage`` so production's ``StreamTaskMessageDelta(parent_task_message=...)``
+        construction passes pydantic validation. The default mock works for tests
+        that only assert on the context's ``__aenter__`` call but breaks tests
+        that exercise ``stream_update`` end-to-end.
+        """
+        from agentex.types.task_message import TaskMessage
+        from agentex.types.task_message_content import ToolRequestContent
+
+        ctx = mock_adk_streaming.streaming_task_message_context.return_value
+        ctx.task_message = TaskMessage(
+            id="msg_test",
+            task_id=task_id,
+            content=ToolRequestContent(
+                author="agent",
+                tool_call_id="call_abc",
+                name="write_file",
+                arguments={},
+            ),
+            streaming_status="IN_PROGRESS",
+        )
+        return ctx
+
+    @pytest.mark.asyncio
+    async def test_function_call_emits_argument_deltas_and_final_full(
+        self, streaming_model, mock_adk_streaming, _streaming_context_vars, sample_task_id
+    ):
+        """A function_call with well-formed JSON args should produce:
+        (1) one streaming context opened with ``ToolRequestContent`` initial_content,
+        (2) one ``StreamTaskMessageDelta`` per ``ArgumentsDelta`` event carrying a
+            ``ToolRequestDelta`` with the right ``tool_call_id`` and ``arguments_delta``,
+        (3) one final ``StreamTaskMessageFull`` with ``ToolRequestContent`` whose
+            ``arguments`` is the parsed JSON dict.
+        """
+        from agentex.types.task_message_delta import ToolRequestDelta
+        from agentex.types.task_message_update import StreamTaskMessageFull, StreamTaskMessageDelta
+        from agentex.types.task_message_content import ToolRequestContent
+
+        ctx = self._install_real_task_message(mock_adk_streaming, sample_task_id)
+
+        args_text = '{"path": "/tmp/foo.txt", "contents": "hello world"}'
+        events, chunks = self._build_function_call_stream(args_text)
+
+        mock_stream = AsyncMock()
+        mock_stream.__aiter__.return_value = iter(events)
+        streaming_model.client.responses.create = AsyncMock(return_value=mock_stream)
+
+        await streaming_model.get_response(
+            system_instructions=None,
+            input="please write foo",
+            model_settings=ModelSettings(),
+            tools=[],
+            output_schema=None,
+            handoffs=[],
+            tracing=None,
+        )
+
+        # 1. A streaming context was opened with ToolRequestContent.
+        opens = [
+            c for c in mock_adk_streaming.streaming_task_message_context.call_args_list
+            if isinstance(c.kwargs.get("initial_content"), ToolRequestContent)
+        ]
+        assert len(opens) == 1, f"expected one ToolRequest context, got {len(opens)}"
+        initial = opens[0].kwargs["initial_content"]
+        assert initial.tool_call_id == "call_abc"
+        assert initial.name == "write_file"
+
+        # 2. One StreamTaskMessageDelta(ToolRequestDelta) was streamed per
+        #    ArgumentsDelta event, preserving the delta text exactly.
+        delta_updates = [
+            call.args[0] if call.args else call.kwargs.get("update")
+            for call in ctx.stream_update.call_args_list
+            if (call.args and isinstance(call.args[0], StreamTaskMessageDelta)
+                and isinstance(call.args[0].delta, ToolRequestDelta))
+        ]
+        assert len(delta_updates) == len(chunks)
+        for update, expected_chunk in zip(delta_updates, chunks):
+            assert update.delta.tool_call_id == "call_abc"
+            assert update.delta.name == "write_file"
+            assert update.delta.arguments_delta == expected_chunk
+
+        # 3. A final StreamTaskMessageFull(ToolRequestContent) was streamed with
+        #    parsed args.
+        full_updates = [
+            call.args[0] if call.args else call.kwargs.get("update")
+            for call in ctx.stream_update.call_args_list
+            if (call.args and isinstance(call.args[0], StreamTaskMessageFull)
+                and isinstance(call.args[0].content, ToolRequestContent))
+        ]
+        assert len(full_updates) == 1
+        final = full_updates[0].content
+        assert final.tool_call_id == "call_abc"
+        assert final.name == "write_file"
+        assert final.arguments == {"path": "/tmp/foo.txt", "contents": "hello world"}
+
+    @pytest.mark.asyncio
+    async def test_function_call_malformed_args_fall_back_to_empty_dict(
+        self, streaming_model, mock_adk_streaming, _streaming_context_vars, sample_task_id, caplog
+    ):
+        """If the model produces invalid JSON for the args, the final
+        ``ToolRequestContent`` should carry ``arguments={}`` and a warning should
+        be logged. The raw delta stream is preserved either way.
+        """
+        from agentex.types.task_message_update import StreamTaskMessageFull
+        from agentex.types.task_message_content import ToolRequestContent
+
+        ctx = self._install_real_task_message(mock_adk_streaming, sample_task_id)
+
+        # Missing closing brace — invalid JSON.
+        events, _ = self._build_function_call_stream('{"path": "/tmp/foo.txt", "contents":')
+
+        mock_stream = AsyncMock()
+        mock_stream.__aiter__.return_value = iter(events)
+        streaming_model.client.responses.create = AsyncMock(return_value=mock_stream)
+
+        with caplog.at_level("WARNING"):
+            await streaming_model.get_response(
+                system_instructions=None,
+                input="please write foo",
+                model_settings=ModelSettings(),
+                tools=[],
+                output_schema=None,
+                handoffs=[],
+                tracing=None,
+            )
+
+        full_updates = [
+            call.args[0] if call.args else call.kwargs.get("update")
+            for call in ctx.stream_update.call_args_list
+            if (call.args and isinstance(call.args[0], StreamTaskMessageFull)
+                and isinstance(call.args[0].content, ToolRequestContent))
+        ]
+        assert len(full_updates) == 1
+        assert full_updates[0].content.arguments == {}
+        assert any("Failed to parse tool call arguments" in r.getMessage() for r in caplog.records)
+
+
 class TestStreamingModelUsageResponseIdAndCacheKey:
     """Cover real-Usage capture, real response_id, span emission, and opt-in prompt_cache_key."""
 

From 694960f913b8ba521d9236e876e5e00f57a3a3ff Mon Sep 17 00:00:00 2001
From: Declan Brady <declan.brady@scale.com>
Date: Mon, 22 Jun 2026 16:09:20 -0400
Subject: [PATCH 03/10] fix(harness): assert cross-channel (yield vs auto-send)
 conformance equivalence [AGX1-373] (#414)

---
 .github/workflows/agentex-tutorials-test.yml  |  28 ++
 tests/lib/core/harness/conformance/runner.py  | 472 +++++++++++++++++-
 .../harness/conformance/test_conformance.py   | 258 +++++++++-
 3 files changed, 747 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/agentex-tutorials-test.yml b/.github/workflows/agentex-tutorials-test.yml
index f19c58d4d..41b495d71 100644
--- a/.github/workflows/agentex-tutorials-test.yml
+++ b/.github/workflows/agentex-tutorials-test.yml
@@ -49,6 +49,29 @@ jobs:
           curl -LsSf https://astral.sh/uv/install.sh | sh
           echo "$HOME/.local/bin" >> $GITHUB_PATH
 
+      # Subprocess-CLI harnesses: install the relevant CLI only for the
+      # claude-code / codex tutorials (no-op for every other tutorial). npm is
+      # preinstalled on ubuntu runners. Versions mirror the golden agent's
+      # sandbox image (teams/sgp/agents/golden_agent/sandbox/Dockerfile): claude-code
+      # is pinned to the same CLAUDE_CODE_VERSION; codex is left unpinned there,
+      # so it is left unpinned here too. Bump CLAUDE_CODE_VERSION in lockstep
+      # with the sandbox Dockerfile.
+      - name: Install harness CLI (claude-code / codex only)
+        if: ${{ contains(matrix.tutorial, 'claude_code') || contains(matrix.tutorial, 'codex') }}
+        env:
+          CLAUDE_CODE_VERSION: "2.1.142"
+        run: |
+          if [[ "${{ matrix.tutorial }}" == *claude_code* ]]; then
+            echo "📦 Installing Claude Code CLI (v${CLAUDE_CODE_VERSION})..."
+            npm install -g "@anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}"
+            claude --version || true
+          fi
+          if [[ "${{ matrix.tutorial }}" == *codex* ]]; then
+            echo "📦 Installing Codex CLI..."
+            npm install -g @openai/codex
+            codex --version || true
+          fi
+
       - name: Pull latest AgentEx image
         run: |
           echo "🐳 Pulling latest Scale AgentEx Docker image..."
@@ -136,6 +159,11 @@ jobs:
         working-directory: ./examples/tutorials
         env:
           OPENAI_API_KEY: ${{ secrets.TUTORIAL_OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.TUTORIAL_ANTHROPIC_API_KEY }}
+          # Enable the gated live tests only for the matching subprocess-CLI
+          # harness tutorial (the CLI is installed for it in the step above).
+          CLAUDE_LIVE_TESTS: ${{ contains(matrix.tutorial, 'claude_code') && '1' || '' }}
+          CODEX_LIVE_TESTS: ${{ contains(matrix.tutorial, 'codex') && '1' || '' }}
           HEALTH_CHECK_PORT: 8080 # Use non-privileged port for temporal worker health checks
         run: |
           echo "Testing tutorial: ${{ matrix.tutorial }}"
diff --git a/tests/lib/core/harness/conformance/runner.py b/tests/lib/core/harness/conformance/runner.py
index 81a74860c..84e84fa51 100644
--- a/tests/lib/core/harness/conformance/runner.py
+++ b/tests/lib/core/harness/conformance/runner.py
@@ -1,8 +1,30 @@
 """Shared conformance engine: every harness tap registers fixtures here.
 
-A fixture is (name, list[StreamTaskMessage]). The runner asserts that span
-derivation over the events is identical regardless of delivery channel, which is
-the cross-channel guarantee from the spec.
+A fixture is (name, list[StreamTaskMessage]). The runner asserts two things:
+
+1. **Cross-channel logical equivalence**: yield_events and auto_send produce the
+   same *logical* sequence of delivered message contents. "Logical" means we
+   normalise away the streaming-envelope difference:
+   - yield channel delivers StreamTaskMessageFull(ToolResponseContent) verbatim.
+   - auto_send channel delivers the same tool-response by opening a streaming
+     context with the full content and closing it immediately (Start+Done on the
+     wire), not a Full event.
+   Both reduce to the same LogicalDelivery(type, identity, payload) tuple; the
+   conformance test compares those normalised sequences.
+
+   `payload` carries the content that callers actually consume:
+   - text: initial_content.content prepended, then accumulated delta string
+   - reasoning: initial_content.summary joined, then accumulated delta string
+   - tool_request: the arguments dict (JSON-sorted), from Start content
+   - tool_response: the content value (str)
+   This catches a channel that delivers the right structural shape but corrupts,
+   drops, or omits initial_content (including reasoning summary) or payload.
+
+2. **Span signal equivalence**: each channel is driven with its own recording
+   tracer that captures every SpanSignal it actually receives in handle(); the
+   two channels' recorded signal lists must be identical. Comparing what each
+   channel genuinely emitted (rather than re-deriving from the events) catches a
+   regression where a channel skips deriver.observe() for some event type.
 
 Registry shared-state hazard: `_REGISTRY` is process-global. Every `test_*.py`
 module that calls `register()` at import time contributes to it, so a module
@@ -12,13 +34,51 @@
 module should register and parametrize over its OWN fixtures (e.g. keep a
 module-local list it both registers and parametrizes), rather than relying on
 cross-module global accumulation via `all_fixtures()`.
+
+Design decision — Full-message handling in auto_send
+----------------------------------------------------
+auto_send posts a StreamTaskMessageFull (tool_request or tool_response) by
+opening a streaming context with the full content and closing it immediately,
+rather than calling adk.messages.create. This open+close approach is retained
+because:
+  - StreamingTaskMessageContext.close() persists initial_content when no deltas
+    have been streamed, so the message IS correctly persisted.
+  - It mirrors the pattern already used by the real _langgraph_async.py harness,
+    keeping behavioural parity.
+  - Switching to adk.messages.create would require an additional injectable
+    dependency, adding surface area for no observable benefit.
+The conformance test treats this as an ACCEPTABLE envelope difference: at the
+logical-content level, Full(ToolResponseContent) from yield and
+Start(content)+Done from auto_send are equivalent. The recorded span signals are
+identical because both adapters drive the same SpanDeriver.observe() call
+sequence and forward every signal to their tracer.
+
+AGX1-377 fix: auto_send now DELIVERS streamed tool-request messages (Start+Done)
+instead of dropping them. The conformance normaliser previously suppressed the
+delivery for Start(tool_request)+Done on the yield channel to match auto_send's
+old drop behaviour. That suppression is now removed: both channels produce a
+LogicalDelivery for a streamed tool_request, and the cross-channel assertion
+verifies it is delivered on both.
 """
 
 from __future__ import annotations
 
+import json
+import types as _types
+from typing import Any, NamedTuple, override
 from dataclasses import dataclass
 
+from agentex.types.text_delta import TextDelta
+from agentex.types.task_message import TaskMessage
 from agentex.lib.core.harness.types import SpanSignal, StreamTaskMessage
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
 from agentex.lib.core.harness.span_derivation import SpanDeriver
 
 
@@ -46,3 +106,409 @@ def derive_all(events: list[StreamTaskMessage]) -> list[SpanSignal]:
         out.extend(d.observe(e))
     out.extend(d.flush())
     return out
+
+
+# ---------------------------------------------------------------------------
+# Logical delivery normalisation
+# ---------------------------------------------------------------------------
+
+
+class LogicalDelivery(NamedTuple):
+    """A single logically-delivered message, channel-agnostic.
+
+    `content_type` is the .type of the content (e.g. "text", "reasoning",
+    "tool_request", "tool_response"). `identity` is a frozenset of key=value
+    pairs that uniquely identify the content (e.g. tool_call_id for tool
+    messages, or index for text/reasoning). `payload` is a stable string
+    representation of the content callers actually consume:
+    - text: initial_content.content prepended to accumulated delta strings
+    - reasoning: initial_content.summary joined, prepended to accumulated
+      reasoning-content delta strings
+    - tool_request: JSON-sorted arguments from Start content
+    - tool_response: str(content) from Full event
+    """
+
+    content_type: str
+    identity: frozenset[tuple[str, Any]]
+    payload: str = ""
+
+
+def _yield_logical_deliveries(events: list[StreamTaskMessage]) -> list[LogicalDelivery]:
+    """Extract logical deliveries from the yield channel's event list.
+
+    The yield channel forwards events verbatim. A logical delivery is:
+    - A Full event (tool_request / tool_response): content delivered as-is.
+    - A Start + ... + Done sequence for text/reasoning/tool_request content.
+
+    The `payload` field captures the content callers consume:
+    - text: initial_content.content (from Start) prepended to accumulated deltas
+    - reasoning: initial_content.summary joined (from Start) prepended to
+      accumulated reasoning-content deltas (this catches a channel that drops
+      the summary)
+    - tool_request: JSON-sorted arguments from the Start content (AGX1-377: now
+      delivered on both channels, no longer suppressed)
+    - tool_response: str(content) from Full event
+    """
+    from agentex.types.text_content import TextContent
+    from agentex.types.reasoning_content import ReasoningContent
+    from agentex.types.tool_request_content import ToolRequestContent
+
+    deliveries: list[LogicalDelivery] = []
+    # Track which indices had a Start so we can pair with Done
+    started: dict[int, Any] = {}  # index -> initial content
+    # Accumulate delta text per index (seed with initial_content text if present)
+    accumulated: dict[int, list[str]] = {}  # index -> list of delta strings
+
+    for event in events:
+        if isinstance(event, StreamTaskMessageStart):
+            if event.index is not None:
+                started[event.index] = event.content
+                # Seed accumulator with initial_content so a channel that drops
+                # initial_content but delivers deltas correctly will fail.
+                seed: list[str] = []
+                if isinstance(event.content, TextContent) and event.content.content:
+                    seed = [event.content.content]
+                elif isinstance(event.content, ReasoningContent) and event.content.summary:
+                    seed = list(event.content.summary)
+                accumulated[event.index] = seed
+        elif isinstance(event, StreamTaskMessageDelta):
+            if event.index is not None and event.delta is not None:
+                if isinstance(event.delta, TextDelta) and event.delta.text_delta:
+                    accumulated.setdefault(event.index, []).append(event.delta.text_delta)
+                elif isinstance(event.delta, ReasoningContentDelta) and event.delta.content_delta:
+                    accumulated.setdefault(event.index, []).append(event.delta.content_delta)
+        elif isinstance(event, StreamTaskMessageDone):
+            if event.index is not None and event.index in started:
+                content = started.pop(event.index)
+                deltas = accumulated.pop(event.index, [])
+                ctype = getattr(content, "type", None) or ""
+                if ctype in ("text", "reasoning"):
+                    deliveries.append(
+                        LogicalDelivery(
+                            content_type=ctype,
+                            identity=frozenset({("index", event.index)}),
+                            payload="".join(deltas),
+                        )
+                    )
+                elif ctype == "tool_request" and isinstance(content, ToolRequestContent):
+                    # AGX1-377 fix: auto_send now delivers streamed tool-request
+                    # messages. Emit a delivery here so the cross-channel
+                    # assertion verifies it is present on both channels.
+                    deliveries.append(
+                        LogicalDelivery(
+                            content_type=ctype,
+                            identity=frozenset(
+                                {
+                                    ("tool_call_id", content.tool_call_id),
+                                    ("name", content.name),
+                                }
+                            ),
+                            payload=json.dumps(content.arguments, sort_keys=True),
+                        )
+                    )
+        elif isinstance(event, StreamTaskMessageFull):
+            content = event.content
+            ctype = getattr(content, "type", None) or ""
+            if ctype == "tool_response":
+                from agentex.types.tool_response_content import ToolResponseContent
+
+                if isinstance(content, ToolResponseContent):
+                    deliveries.append(
+                        LogicalDelivery(
+                            content_type=ctype,
+                            identity=frozenset(
+                                {
+                                    ("tool_call_id", content.tool_call_id),
+                                    ("name", content.name),
+                                }
+                            ),
+                            payload=str(content.content),
+                        )
+                    )
+            elif ctype == "tool_request":
+                from agentex.types.tool_request_content import ToolRequestContent
+
+                if isinstance(content, ToolRequestContent):
+                    deliveries.append(
+                        LogicalDelivery(
+                            content_type=ctype,
+                            identity=frozenset(
+                                {
+                                    ("tool_call_id", content.tool_call_id),
+                                    ("name", content.name),
+                                }
+                            ),
+                            payload=json.dumps(content.arguments, sort_keys=True),
+                        )
+                    )
+
+    return deliveries
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend for auto_send conformance runner
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    """Mirrors StreamingTaskMessageContext: __aenter__ opens, close() closes."""
+
+    def __init__(self, sink: list[Any], content_type: str, initial_content: Any) -> None:
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(
+            id="msg-conformance",
+            task_id="conformance-task",
+            content=initial_content,
+        )
+
+    async def __aenter__(self) -> "_FakeCtx":
+        self.sink.append(("open", self.content_type, self.task_message.content))
+        return self
+
+    async def __aexit__(self, *args: Any) -> bool:
+        await self.close()
+        return False
+
+    async def close(self) -> None:
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update: Any) -> Any:
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    """Fake streaming backend; records every context lifecycle event."""
+
+    def __init__(self) -> None:
+        self.sink: list[Any] = []
+
+    def streaming_task_message_context(
+        self,
+        task_id: str,
+        initial_content: Any,
+        streaming_mode: str = "coalesced",
+        created_at: Any = None,
+    ) -> _FakeCtx:
+        ctype = getattr(initial_content, "type", None) or ""
+        self.sink.append(("ctx", ctype, initial_content))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+class _FakeTracing:
+    """Minimal tracing backend: records started/ended span names + outputs."""
+
+    def __init__(self) -> None:
+        self.started: list[str] = []
+        self.ended: list[Any] = []
+
+    async def start_span(
+        self,
+        *,
+        trace_id: str,
+        name: str,
+        input: Any = None,
+        parent_id: Any = None,
+        data: Any = None,
+        task_id: Any = None,
+    ) -> Any:
+        self.started.append(name)
+        return _types.SimpleNamespace()
+
+    async def end_span(self, *, trace_id: str, span: Any) -> None:
+        self.ended.append(getattr(span, "output", None))
+
+
+class _RecordingTracer(SpanTracer):
+    """SpanTracer that records every SpanSignal it actually receives.
+
+    Each delivery channel calls `tracer.handle(signal)` for every signal it
+    derives from the stream, so `received_signals` captures what the channel
+    genuinely emitted — not a re-derivation. Comparing the two channels'
+    recorded lists catches regressions where a channel skips
+    `deriver.observe(event)` for some event type.
+    """
+
+    def __init__(self, tracing: Any) -> None:
+        super().__init__(
+            trace_id="conformance-trace",
+            parent_span_id="conformance-parent",
+            tracing=tracing,
+        )
+        self.received_signals: list[SpanSignal] = []
+
+    @override
+    async def handle(self, signal: SpanSignal) -> None:
+        self.received_signals.append(signal)
+        await super().handle(signal)
+
+
+async def _gen(events: list[StreamTaskMessage]):  # type: ignore[return]
+    for e in events:
+        yield e
+
+
+def _auto_send_logical_deliveries(sink: list[Any]) -> list[LogicalDelivery]:
+    """Extract logical deliveries from the auto_send fake streaming sink.
+
+    Each context lifecycle in the sink looks like:
+      ("ctx", ctype, content)  -- context created
+      ("open", ctype, content) -- context __aenter__
+      [("update", delta), ...]  -- optional deltas (StreamTaskMessageDelta)
+      ("close", ctype)          -- context closed
+
+    A logical delivery corresponds to each open+close pair. For text/reasoning
+    we identify by sequential position and build the payload by prepending the
+    initial_content text (TextContent.content) or summary (ReasoningContent.summary)
+    to accumulated deltas. This matches _yield_logical_deliveries so a channel
+    that drops initial_content or reasoning summary fails the comparison.
+    For tool messages we use tool_call_id + name and capture arguments/content.
+    """
+    from agentex.types.text_content import TextContent
+    from agentex.types.reasoning_content import ReasoningContent
+    from agentex.types.tool_request_content import ToolRequestContent
+    from agentex.types.tool_response_content import ToolResponseContent
+
+    deliveries: list[LogicalDelivery] = []
+    open_idx = 0
+    while open_idx < len(sink):
+        entry = sink[open_idx]
+        if entry[0] == "ctx":
+            ctype: str = entry[1]
+            content: Any = entry[2]
+            found_open = False
+            delta_parts: list[str] = []
+            # Seed delta_parts with initial_content so payload comparison
+            # catches a channel that drops initial_content but delivers deltas.
+            if isinstance(content, TextContent) and content.content:
+                delta_parts = [content.content]
+            elif isinstance(content, ReasoningContent) and content.summary:
+                delta_parts = list(content.summary)
+            for j in range(open_idx + 1, len(sink)):
+                if sink[j][0] == "open" and sink[j][1] == ctype and not found_open:
+                    found_open = True
+                elif found_open and sink[j][0] == "update":
+                    # Accumulate delta content from StreamTaskMessageDelta
+                    update = sink[j][1]
+                    if isinstance(update, StreamTaskMessageDelta) and update.delta is not None:
+                        if isinstance(update.delta, TextDelta) and update.delta.text_delta:
+                            delta_parts.append(update.delta.text_delta)
+                        elif isinstance(update.delta, ReasoningContentDelta) and update.delta.content_delta:
+                            delta_parts.append(update.delta.content_delta)
+                elif sink[j][0] == "close" and sink[j][1] == ctype and found_open:
+                    # Matched open+close: emit logical delivery with payload
+                    if ctype in ("text", "reasoning"):
+                        count = sum(1 for k in range(open_idx) if sink[k][0] == "ctx" and sink[k][1] == ctype)
+                        deliveries.append(
+                            LogicalDelivery(
+                                content_type=ctype,
+                                identity=frozenset({("seq", count)}),
+                                payload="".join(delta_parts),
+                            )
+                        )
+                    elif ctype == "tool_response":
+                        if isinstance(content, ToolResponseContent):
+                            deliveries.append(
+                                LogicalDelivery(
+                                    content_type=ctype,
+                                    identity=frozenset(
+                                        {
+                                            ("tool_call_id", content.tool_call_id),
+                                            ("name", content.name),
+                                        }
+                                    ),
+                                    payload=str(content.content),
+                                )
+                            )
+                    elif ctype == "tool_request":
+                        if isinstance(content, ToolRequestContent):
+                            deliveries.append(
+                                LogicalDelivery(
+                                    content_type=ctype,
+                                    identity=frozenset(
+                                        {
+                                            ("tool_call_id", content.tool_call_id),
+                                            ("name", content.name),
+                                        }
+                                    ),
+                                    payload=json.dumps(content.arguments, sort_keys=True),
+                                )
+                            )
+                    open_idx = j + 1
+                    break
+            else:
+                open_idx += 1
+        else:
+            open_idx += 1
+
+    return deliveries
+
+
+def _yield_text_reasoning_seq(deliveries: list[LogicalDelivery]) -> list[LogicalDelivery]:
+    """Re-key text/reasoning deliveries from index-based to seq-based identity.
+
+    The yield channel uses event.index as identity; auto_send uses a sequential
+    counter. To compare across channels, normalise both to sequential position
+    within each content type.
+    """
+    result: list[LogicalDelivery] = []
+    counts: dict[str, int] = {}
+    for d in deliveries:
+        if d.content_type in ("text", "reasoning"):
+            seq = counts.get(d.content_type, 0)
+            counts[d.content_type] = seq + 1
+            result.append(
+                LogicalDelivery(
+                    content_type=d.content_type,
+                    identity=frozenset({("seq", seq)}),
+                    payload=d.payload,
+                )
+            )
+        else:
+            result.append(d)
+    return result
+
+
+async def run_cross_channel_conformance(
+    fixture: Fixture,
+) -> tuple[list[LogicalDelivery], list[LogicalDelivery], list[SpanSignal], list[SpanSignal]]:
+    """Run both channels over a fixture; return (yield_deliveries, auto_deliveries,
+    yield_spans, auto_spans).
+
+    The caller asserts yield_deliveries == auto_deliveries and
+    yield_spans == auto_spans. The span signals are the ones each channel's
+    tracer ACTUALLY recorded while delivering (not a re-derivation), so a
+    regression where a channel skips deriver.observe() for some event type is
+    caught.
+    """
+    from agentex.lib.core.harness.auto_send import auto_send
+    from agentex.lib.core.harness.yield_delivery import yield_events
+
+    # --- yield channel ---
+    tracer_yield = _RecordingTracer(tracing=_FakeTracing())
+    yield_out = [e async for e in yield_events(_gen(fixture.events), tracer=tracer_yield)]
+
+    # Span signals the yield channel actually emitted to its tracer
+    yield_spans = tracer_yield.received_signals
+
+    # Logical deliveries from yield output
+    yield_deliveries = _yield_text_reasoning_seq(_yield_logical_deliveries(yield_out))
+
+    # --- auto_send channel ---
+    tracer_auto = _RecordingTracer(tracing=_FakeTracing())
+    fake_streaming = _FakeStreaming()
+    await auto_send(
+        _gen(fixture.events),
+        task_id="conformance-task",
+        tracer=tracer_auto,
+        streaming=fake_streaming,
+    )
+
+    # Span signals the auto_send channel actually emitted to its tracer
+    auto_spans = tracer_auto.received_signals
+
+    # Logical deliveries from what the streaming backend received
+    auto_deliveries = _auto_send_logical_deliveries(fake_streaming.sink)
+
+    return yield_deliveries, auto_deliveries, yield_spans, auto_spans
diff --git a/tests/lib/core/harness/conformance/test_conformance.py b/tests/lib/core/harness/conformance/test_conformance.py
index d9eec1c15..6d5f8ca66 100644
--- a/tests/lib/core/harness/conformance/test_conformance.py
+++ b/tests/lib/core/harness/conformance/test_conformance.py
@@ -1,16 +1,68 @@
+"""Cross-channel conformance tests: yield_events vs auto_send.
+
+What is asserted
+----------------
+For each fixture the conformance runner drives BOTH delivery channels and
+verifies two guarantees:
+
+1. **Logical-delivery equivalence**: the sequence of logically-delivered
+   messages is identical across channels. "Logical" normalises away the
+   streaming-envelope difference:
+   - yield channel delivers StreamTaskMessageFull(ToolResponseContent) as-is.
+   - auto_send delivers the same tool-response by opening a streaming context
+     with the full content and closing it immediately.
+   Both collapse to LogicalDelivery(content_type, identity, payload) tuples
+   that compare equal. The payload includes initial_content (TextContent.content
+   and ReasoningContent.summary) so a channel that drops initial content fails.
+
+2. **Span signal equivalence**: both channels feed the same pure SpanDeriver
+   over the same event sequence, so the derived span signals must be identical.
+
+What is NOT asserted
+--------------------
+Raw wire-level event shapes are NOT compared (that would fail by design: the
+Full vs Start+Done envelope difference is a documented, acceptable choice in
+auto_send — see runner.py for the rationale).
+
+AGX1-377 fix: auto_send now delivers streamed tool-request messages. The
+suppression that previously prevented the yield normaliser from emitting a
+LogicalDelivery for Start(tool_request)+Done is removed. Both channels now
+produce a delivery for streamed tool_request, verified by the
+"streamed-tool-request" fixture.
+"""
+
+from __future__ import annotations
+
 import pytest
 
+from agentex.types.text_delta import TextDelta
+from agentex.types.text_content import TextContent
+from agentex.types.reasoning_content import ReasoningContent
 from agentex.types.task_message_update import (
     StreamTaskMessageDone,
     StreamTaskMessageFull,
+    StreamTaskMessageDelta,
     StreamTaskMessageStart,
 )
 from agentex.types.tool_request_content import ToolRequestContent
 from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+from .runner import (
+    Fixture,
+    register,
+    derive_all,
+    all_fixtures,
+    run_cross_channel_conformance,
+)
 
-from .runner import Fixture, register, derive_all, all_fixtures
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
 
-register(
+_FIXTURES: list[Fixture] = [
+    # fixture 1: single tool call — tool_request delivered via Full (classic path)
+    # plus a streamed tool_response via Full. Both channels should deliver both.
     Fixture(
         name="builtin-single-tool",
         events=[
@@ -30,14 +82,204 @@
                 ),
             ),
         ],
+    ),
+    # fixture 2: streaming text — exercises the text start/delta/done path.
+    # Uses non-empty initial_content so the payload comparison catches a channel
+    # that drops StreamTaskMessageStart.content (Greptile id 3438655533, P1).
+    Fixture(
+        name="streaming-text",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=TextContent(type="text", author="agent", content="Init"),
+            ),
+            StreamTaskMessageDelta(
+                type="delta",
+                index=0,
+                delta=TextDelta(type="text", text_delta="Hello"),
+            ),
+            StreamTaskMessageDelta(
+                type="delta",
+                index=0,
+                delta=TextDelta(type="text", text_delta=" world"),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+        ],
+    ),
+    # fixture 3: reasoning block — exercises reasoning span open/close + delivery.
+    # ReasoningContent.summary is included in the payload so a channel that drops
+    # the reasoning-summary fails (Greptile id 3438655533, P1).
+    Fixture(
+        name="reasoning-block",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ReasoningContent(
+                    type="reasoning",
+                    author="agent",
+                    summary=["Thinking..."],
+                ),
+            ),
+            StreamTaskMessageDelta(
+                type="delta",
+                index=0,
+                delta=ReasoningContentDelta(
+                    type="reasoning_content",
+                    content_index=0,
+                    content_delta="step 1",
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+        ],
+    ),
+    # fixture 4: streamed tool_request (AGX1-377 fix) — tool_request delivered
+    # via Start+Done (no Full). auto_send now delivers this instead of dropping
+    # it. Both channels must produce a LogicalDelivery for this fixture.
+    Fixture(
+        name="streamed-tool-request",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ToolRequestContent(
+                    type="tool_request",
+                    author="agent",
+                    tool_call_id="tr-1",
+                    name="Read",
+                    arguments={"path": "/tmp/foo"},
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+            StreamTaskMessageFull(
+                type="full",
+                index=1,
+                content=ToolResponseContent(
+                    type="tool_response",
+                    author="agent",
+                    tool_call_id="tr-1",
+                    name="Read",
+                    content="file contents",
+                ),
+            ),
+        ],
+    ),
+    # fixture 5: parallel tool calls + a tool that errors (AGX1-373 review,
+    # danielmillerp). The earlier fixtures only exercise one tool at a time, so
+    # equivalence is proven over trivially-orderable streams. This stresses the
+    # representative case: two tool spans open SIMULTANEOUSLY (p-ls opens via the
+    # streamed Start+Done path, p-read opens via Full while p-ls is still open),
+    # then close in a different order than they opened, and one of them returns
+    # an error. It guards against the two channels agreeing with each other while
+    # both mishandling interleaved/parallel spans or a failing tool.
+    #
+    # The failing tool sets ToolResponseContent.is_error=True (AGX1-371), which
+    # the span deriver threads onto the closed tool span's CloseSpan.is_error.
+    # Both channels feed the same deriver, so the recorded span signals — error
+    # status included — must match.
+    Fixture(
+        name="parallel-tools-with-error",
+        events=[
+            # p-ls: streamed tool_request (opens its span at Done).
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ToolRequestContent(
+                    type="tool_request",
+                    author="agent",
+                    tool_call_id="p-ls",
+                    name="Bash",
+                    arguments={"command": "ls /nope"},
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+            # p-read: Full tool_request opens a second span while p-ls is open.
+            StreamTaskMessageFull(
+                type="full",
+                index=1,
+                content=ToolRequestContent(
+                    type="tool_request",
+                    author="agent",
+                    tool_call_id="p-read",
+                    name="Read",
+                    arguments={"path": "/etc/hosts"},
+                ),
+            ),
+            # p-ls errors and closes first (close order != open order).
+            StreamTaskMessageFull(
+                type="full",
+                index=2,
+                content=ToolResponseContent(
+                    type="tool_response",
+                    author="agent",
+                    tool_call_id="p-ls",
+                    name="Bash",
+                    content="Error: ls: /nope: No such file or directory",
+                    is_error=True,
+                ),
+            ),
+            # p-read succeeds and closes second.
+            StreamTaskMessageFull(
+                type="full",
+                index=3,
+                content=ToolResponseContent(
+                    type="tool_response",
+                    author="agent",
+                    tool_call_id="p-read",
+                    name="Read",
+                    content="127.0.0.1 localhost",
+                ),
+            ),
+        ],
+    ),
+]
+
+# Register all fixtures for backward-compatible use via all_fixtures()
+for _f in _FIXTURES:
+    register(_f)
+
+
+# ---------------------------------------------------------------------------
+# Cross-channel conformance: logical equivalence + span equivalence
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("fixture", _FIXTURES, ids=lambda f: f.name)
+@pytest.mark.asyncio
+async def test_cross_channel_equivalence(fixture: Fixture) -> None:
+    """Assert that yield_events and auto_send produce equivalent logical
+    deliveries and identical span signals for every fixture.
+
+    This is the real cross-channel guarantee: the two delivery adapters
+    agree on WHAT was delivered (logical content) and HOW spans were derived,
+    even though their streaming-envelope shapes differ (Full vs Start+Done for
+    tool messages).
+
+    The span signals are the ones each channel's tracer ACTUALLY recorded while
+    delivering, not a re-derivation, so a regression where one channel skips
+    deriver.observe() for some event type is caught here.
+    """
+    yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
+
+    assert yield_deliveries == auto_deliveries, (
+        f"[{fixture.name}] logical deliveries differ:\n  yield:     {yield_deliveries}\n  auto_send: {auto_deliveries}"
     )
-)
+    assert yield_spans == auto_spans, (
+        f"[{fixture.name}] span signals differ:\n  yield:     {yield_spans}\n  auto_send: {auto_spans}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Backward-compatible determinism test (kept for regression coverage)
+# ---------------------------------------------------------------------------
 
 
 @pytest.mark.parametrize("fixture", all_fixtures(), ids=lambda f: f.name)
-def test_span_derivation_is_deterministic(fixture):
-    """Exercises the cross-channel guarantee: yield and auto-send observe the
-    same event stream, so span derivation must be deterministic/idempotent."""
-    # Deriving twice over the same events yields identical signals (the property
-    # that makes yield vs auto-send equivalent, since both observe the same stream).
+def test_span_derivation_is_deterministic(fixture: Fixture) -> None:
+    """Span derivation over the same event list is idempotent.
+
+    Retained as a lightweight regression guard. The primary cross-channel
+    guarantee is asserted in test_cross_channel_equivalence above.
+    """
     assert derive_all(fixture.events) == derive_all(fixture.events)

From 5ec62c20781d24fc3e0b92734fcd444b1e791d70 Mon Sep 17 00:00:00 2001
From: Declan Brady <declan.brady@scale.com>
Date: Mon, 22 Jun 2026 18:21:13 -0400
Subject: [PATCH 04/10] feat(pydantic-ai): migrate onto unified harness surface
 (PR4) (#415)

---
 .github/workflows/harness-integration.yml     |  27 +-
 ...unified-harness-surface-pr4-pydantic-ai.md | 246 +++++++++++
 .../00_sync/harness_pydantic_ai/.dockerignore |  43 ++
 .../00_sync/harness_pydantic_ai/Dockerfile    |  50 +++
 .../00_sync/harness_pydantic_ai/README.md     |  54 +++
 .../00_sync/harness_pydantic_ai/manifest.yaml |  58 +++
 .../harness_pydantic_ai/project/__init__.py   |   0
 .../harness_pydantic_ai/project/acp.py        |  92 +++++
 .../harness_pydantic_ai/project/agent.py      |  39 ++
 .../harness_pydantic_ai/project/tools.py      |  20 +
 .../harness_pydantic_ai/pyproject.toml        |  36 ++
 .../harness_pydantic_ai/tests/test_agent.py   | 138 +++++++
 .../00_base/harness_pydantic_ai/.dockerignore |  43 ++
 .../00_base/harness_pydantic_ai/Dockerfile    |  50 +++
 .../00_base/harness_pydantic_ai/README.md     |  54 +++
 .../00_base/harness_pydantic_ai/manifest.yaml |  58 +++
 .../harness_pydantic_ai/project/__init__.py   |   0
 .../harness_pydantic_ai/project/acp.py        | 159 +++++++
 .../harness_pydantic_ai/project/agent.py      |  39 ++
 .../harness_pydantic_ai/project/tools.py      |  20 +
 .../harness_pydantic_ai/pyproject.toml        |  36 ++
 .../harness_pydantic_ai/tests/test_agent.py   | 118 ++++++
 .../harness_pydantic_ai/.dockerignore         |  43 ++
 .../harness_pydantic_ai/Dockerfile            |  43 ++
 .../10_temporal/harness_pydantic_ai/README.md |  61 +++
 .../harness_pydantic_ai/manifest.yaml         |  62 +++
 .../harness_pydantic_ai/project/__init__.py   |   0
 .../harness_pydantic_ai/project/acp.py        |  35 ++
 .../harness_pydantic_ai/project/agent.py      | 111 +++++
 .../harness_pydantic_ai/project/run_worker.py |  48 +++
 .../harness_pydantic_ai/project/tools.py      |  24 ++
 .../harness_pydantic_ai/project/workflow.py   | 137 +++++++
 .../harness_pydantic_ai/pyproject.toml        |  38 ++
 .../harness_pydantic_ai/tests/test_agent.py   | 114 +++++
 .../lib/adk/_modules/_pydantic_ai_async.py    | 249 +----------
 .../lib/adk/_modules/_pydantic_ai_sync.py     |  33 +-
 .../lib/adk/_modules/_pydantic_ai_tracing.py  |  39 ++
 .../lib/adk/_modules/_pydantic_ai_turn.py     | 134 ++++++
 tests/lib/adk/test_pydantic_ai_async.py       | 311 +++++++++++---
 tests/lib/adk/test_pydantic_ai_sync.py        |  74 ++++
 .../lib/adk/test_pydantic_ai_sync_unified.py  | 209 ++++++++++
 tests/lib/adk/test_pydantic_ai_turn.py        | 276 +++++++++++++
 .../test_pydantic_ai_conformance.py           | 194 +++++++++
 .../harness/test_harness_pydantic_ai_async.py | 361 ++++++++++++++++
 .../harness/test_harness_pydantic_ai_sync.py  | 388 ++++++++++++++++++
 .../test_harness_pydantic_ai_temporal.py      | 370 +++++++++++++++++
 46 files changed, 4439 insertions(+), 295 deletions(-)
 create mode 100644 docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md
 create mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/.dockerignore
 create mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile
 create mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/README.md
 create mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml
 create mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/project/__init__.py
 create mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py
 create mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py
 create mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py
 create mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml
 create mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/.dockerignore
 create mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile
 create mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md
 create mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml
 create mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/project/__init__.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml
 create mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/__init__.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py
 create mode 100644 src/agentex/lib/adk/_modules/_pydantic_ai_turn.py
 create mode 100644 tests/lib/adk/test_pydantic_ai_sync_unified.py
 create mode 100644 tests/lib/adk/test_pydantic_ai_turn.py
 create mode 100644 tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py
 create mode 100644 tests/lib/core/harness/test_harness_pydantic_ai_async.py
 create mode 100644 tests/lib/core/harness/test_harness_pydantic_ai_sync.py
 create mode 100644 tests/lib/core/harness/test_harness_pydantic_ai_temporal.py

diff --git a/.github/workflows/harness-integration.yml b/.github/workflows/harness-integration.yml
index 51893f10f..11b5239dc 100644
--- a/.github/workflows/harness-integration.yml
+++ b/.github/workflows/harness-integration.yml
@@ -7,6 +7,7 @@ on:
     paths:
       - "src/agentex/lib/core/harness/**"
       - "src/agentex/lib/adk/_modules/**"
+      - "tests/lib/core/harness/test_harness_pydantic_ai_*.py"
       - ".github/workflows/harness-integration.yml"
 
 jobs:
@@ -31,10 +32,28 @@ jobs:
       - name: Conformance suite
         run: ./scripts/test tests/lib/core/harness/ -v
 
-  # Live integration matrix (harness x {sync, async, temporal}) is added per-harness
-  # in the migration plans. Placeholder job keeps the workflow valid until then.
+  # Offline pydantic-ai integration tests (sync / async / temporal channels).
+  # These use pydantic-ai TestModel + fake streaming/tracing and require no live
+  # infrastructure. Enabled here for PR 4 (pydantic-ai migration). Future harness
+  # migration PRs (5-8) should add their integration-test paths to this matrix.
   live-matrix:
     runs-on: ubuntu-latest
-    if: false  # enabled once the first harness's test agents land
+    strategy:
+      matrix:
+        channel: [sync, async, temporal]
+      fail-fast: false
+    name: pydantic-ai-${{ matrix.channel }}
     steps:
-      - run: echo "populated by migration PRs"  # TODO(harness-migration): enable per-harness; see migration PRs 4-8
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
+        with:
+          version: '0.10.2'
+
+      - name: Bootstrap
+        run: ./scripts/bootstrap
+
+      - name: pydantic-ai ${{ matrix.channel }} integration tests (offline, TestModel)
+        run: |
+          ./scripts/test tests/lib/core/harness/test_harness_pydantic_ai_${{ matrix.channel }}.py -v
diff --git a/docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md b/docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md
new file mode 100644
index 000000000..2fa1892fe
--- /dev/null
+++ b/docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md
@@ -0,0 +1,246 @@
+# Unified Harness Surface — PR 4: pydantic-ai Migration Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Migrate the pydantic-ai harness onto the unified harness surface so it emits streaming + persisted messages + tracing + turn usage through ONE source of truth, over both delivery channels (yield + auto-send), with no public regression — and ship its 3 integration test agents (sync/async/temporal).
+
+**Architecture:** Wrap a pydantic-ai run as a `HarnessTurn` (canonical `StreamTaskMessage*` stream + normalized `TurnUsage`). Reuse the existing `convert_pydantic_ai_to_agentex_events` mapping as the tap. Reimplement the existing public auto-send helper on top of `UnifiedEmitter.auto_send_turn`, and route sync ACP agents through `UnifiedEmitter.yield_turn`. Retire the bespoke `_pydantic_ai_tracing` handler in favor of the surface's derived spans (keep the old symbol as a deprecated shim).
+
+**Tech Stack:** Python 3, pydantic-ai (`pydantic_ai`), pydantic v2, pytest + pytest-asyncio, the `agentex.lib.core.harness` package from PRs 1-3.
+
+**Foundation:** `src/agentex/lib/core/harness/` (`UnifiedEmitter`, `SpanTracer`, `SpanDeriver`, `HarnessTurn`, `TurnUsage`, `TurnResult`, `yield_events`, `auto_send`, conformance scaffold). Design: `docs/superpowers/specs/2026-06-18-unified-harness-surface-design.md`.
+
+---
+
+## Dependencies (must land first)
+
+- **AGX1-373** — cross-channel conformance equivalence + `Full` wire reconciliation. PR 4's conformance fixtures register into the upgraded cross-channel runner. **Do not start Task 6 until 373 is merged into the foundation branch.**
+- **AGX1-375** — public `adk` import path for the harness surface. If merged, import the surface via the public path in this PR; if not, import from `agentex.lib.core.harness` and add a follow-up note. (Tasks below assume `from agentex.lib.core.harness import UnifiedEmitter, TurnUsage, ...`; swap to the public path if 375 landed.)
+
+This is one PR (target < 1000 lines code, excluding any recorded fixtures). The 3 test agents are the largest chunk; if the diff exceeds budget, split the test agents into a follow-up PR 4b (note in the PR description).
+
+---
+
+## File Structure
+
+- Modify `src/agentex/lib/adk/_modules/_pydantic_ai_sync.py` — add an optional `on_result` callback to `convert_pydantic_ai_to_agentex_events` (additive) so usage can be captured. Behavior unchanged when omitted.
+- Create `src/agentex/lib/adk/_modules/_pydantic_ai_turn.py` — `PydanticAITurn(HarnessTurn)` + `pydantic_ai_usage_to_turn_usage(...)`.
+- Modify `src/agentex/lib/adk/_modules/_pydantic_ai_async.py` — reimplement `stream_pydantic_ai_events` on `UnifiedEmitter.auto_send_turn`, preserving signature + return.
+- Modify `src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py` — mark `create_pydantic_ai_tracing_handler` / `AgentexPydanticAITracingHandler` deprecated (docstring + `DeprecationWarning`); keep importable.
+- Create `tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py` — register pydantic-ai fixtures into the cross-channel conformance runner.
+- Create `examples/tutorials/harness-pydantic-ai-{sync,async,temporal}/` — 3 test agents (modeled on the `sync-pydantic-ai` / `default-pydantic-ai` / `temporal-pydantic-ai` CLI templates) using the unified surface.
+- Modify `.github/workflows/harness-integration.yml` — enable the pydantic-ai rows of the `live-matrix` job.
+- Modify `.github/workflows/agentex-tutorials-test.yml` (or its agent list) — include the 3 new test agents if that workflow enumerates agents.
+
+---
+
+## Task 1: Expose the pydantic-ai run result for usage capture
+
+**Files:**
+- Modify: `src/agentex/lib/adk/_modules/_pydantic_ai_sync.py`
+- Test: `tests/lib/adk/test_pydantic_ai_sync.py` (create if absent)
+
+The converter already iterates the pydantic-ai event stream and currently *ignores* `AgentRunResultEvent` (the terminal event carrying the run result + usage). Add an optional callback so a caller can capture it without changing existing behavior.
+
+- [ ] **Step 1: Write the failing test.**
+
+```python
+import pytest
+from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events
+
+
+class _FakeResultEvent:  # stand-in for pydantic_ai.run.AgentRunResultEvent
+    def __init__(self, result):
+        self.result = result
+
+
+async def _stream(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_on_result_callback_receives_terminal_event(monkeypatch):
+    # When the stream ends with an AgentRunResultEvent, on_result is invoked with it,
+    # and the converter still yields no extra events for it.
+    captured = {}
+    # Use a real AgentRunResultEvent if constructable; otherwise patch isinstance check.
+    # (Implementer: see Step 3 note — match the real terminal event type.)
+    ...
+```
+
+Implementer note: the exact terminal event type is `pydantic_ai.run.AgentRunResultEvent` (already imported in `_pydantic_ai_sync.py`). Write the test to feed a stream ending in a real `AgentRunResultEvent` (construct it as the installed pydantic-ai version requires; inspect `python -c "import pydantic_ai.run, inspect; print(inspect.signature(pydantic_ai.run.AgentRunResultEvent))"`). Assert `on_result` is called once with that event and that the converter yields the same `StreamTaskMessage*` sequence as without the callback (no behavior change for the streaming output).
+
+- [ ] **Step 2: Run** `uv run pytest tests/lib/adk/test_pydantic_ai_sync.py -v` — expect FAIL (no `on_result` param).
+
+- [ ] **Step 3: Implement.** Add `on_result: Callable[[AgentRunResultEvent], None] | None = None` (and an async-callable variant if needed) to `convert_pydantic_ai_to_agentex_events`. In the existing `elif isinstance(event, (FunctionToolCallEvent, FinalResultEvent, AgentRunResultEvent))` branch, when the event is an `AgentRunResultEvent` and `on_result` is set, call it (await if it's a coroutine). Keep yielding nothing for it. No other change.
+
+- [ ] **Step 4: Run** the test — expect PASS, plus run the existing `_pydantic_ai_sync` tests if any to confirm no regression.
+
+- [ ] **Step 5: Commit** `feat(pydantic-ai): optional on_result callback to expose run result for usage capture`.
+
+---
+
+## Task 2: Normalize pydantic-ai usage to `TurnUsage`
+
+**Files:**
+- Create: `src/agentex/lib/adk/_modules/_pydantic_ai_turn.py`
+- Test: `tests/lib/adk/test_pydantic_ai_turn.py`
+
+- [ ] **Step 1: Verify the real usage shape FIRST.** Run `uv run python -c "from pydantic_ai.usage import RunUsage; import inspect; print([f for f in RunUsage.model_fields])"` (the type/name may be `RunUsage` or `Usage` depending on the installed version). Record the exact field names (commonly: `input_tokens`, `output_tokens`, `total_tokens`, `requests`, and a cache/`details` field). The mapping in Step 3 MUST use the real field names.
+
+- [ ] **Step 2: Write the failing test.**
+
+```python
+from agentex.lib.adk._modules._pydantic_ai_turn import pydantic_ai_usage_to_turn_usage
+
+
+def test_usage_normalization_maps_fields():
+    # Build a usage object matching the installed pydantic-ai RunUsage shape
+    # (see Task 2 Step 1 for the real fields), then assert the mapping.
+    usage_obj = ...  # construct RunUsage(input_tokens=10, output_tokens=20, requests=2, ...)
+    tu = pydantic_ai_usage_to_turn_usage(usage_obj, model="openai:gpt-4o")
+    assert tu.model == "openai:gpt-4o"
+    assert tu.input_tokens == 10
+    assert tu.output_tokens == 20
+    assert tu.num_llm_calls == 2
+```
+
+- [ ] **Step 3: Implement** `pydantic_ai_usage_to_turn_usage(usage, model) -> TurnUsage` mapping the verified RunUsage fields onto `TurnUsage` (`input_tokens`, `output_tokens`, `total_tokens`, `cached_input_tokens` if available, `num_llm_calls` ← `requests`). Use `getattr(usage, "<field>", None)` defensively so a version field rename degrades to `None` rather than crashing. Then implement `PydanticAITurn`:
+
+```python
+class PydanticAITurn:
+    """A pydantic-ai run as a HarnessTurn: canonical event stream + normalized usage."""
+
+    def __init__(self, stream, model: str | None = None):
+        self._stream = stream
+        self._model = model
+        self._usage = TurnUsage(model=model)
+
+    @property
+    async def events(self):
+        def _capture(result_event):
+            run_result = getattr(result_event, "result", None)
+            usage_obj = run_result.usage() if run_result is not None else None
+            if usage_obj is not None:
+                self._usage = pydantic_ai_usage_to_turn_usage(usage_obj, self._model)
+        async for ev in convert_pydantic_ai_to_agentex_events(self._stream, on_result=_capture):
+            yield ev
+
+    def usage(self) -> TurnUsage:
+        return self._usage
+```
+
+(Verify `run_result.usage()` is the correct accessor for the installed version; adjust if it's an attribute.)
+
+- [ ] **Step 4: Add a `PydanticAITurn` test** that feeds a small stream ending in an `AgentRunResultEvent` whose `result.usage()` returns a known usage, drives `turn.events` to exhaustion, then asserts `turn.usage()` reflects the normalized values and that `events` yielded the expected `StreamTaskMessage*`. Confirm `usage()` BEFORE exhaustion returns the default (documented single-pass contract).
+
+- [ ] **Step 5: Run** the tests — expect PASS.
+
+- [ ] **Step 6: Commit** `feat(pydantic-ai): PydanticAITurn HarnessTurn + usage normalization`.
+
+---
+
+## Task 3: Reimplement the auto-send helper on the unified surface
+
+**Files:**
+- Modify: `src/agentex/lib/adk/_modules/_pydantic_ai_async.py`
+- Test: `tests/lib/adk/test_pydantic_ai_async.py`
+
+`stream_pydantic_ai_events(stream, task_id, ...)` currently hand-drives `adk.streaming`. Reimplement it to delegate to `UnifiedEmitter.auto_send_turn(PydanticAITurn(stream, model))`, preserving its signature and return value (the accumulated final text). Feature-add: traces by default.
+
+- [ ] **Step 1: Capture current behavior as a characterization test.** Before changing anything, write a test that runs the CURRENT `stream_pydantic_ai_events` over a fixture stream with a fake `adk.streaming` and records the messages produced (text, tool request/response). This is the backward-compat baseline ("equivalent messages before/after" from the design).
+
+- [ ] **Step 2: Run** it green against the current implementation. Commit the test alone: `test(pydantic-ai): characterize stream_pydantic_ai_events output`.
+
+- [ ] **Step 3: Reimplement** `stream_pydantic_ai_events` to build a `PydanticAITurn` and call `UnifiedEmitter(task_id=task_id, trace_id=<resolved>, parent_span_id=<resolved>, streaming=<injected or None>).auto_send_turn(turn)`, returning `result.final_text`. Resolve `trace_id`/`parent_span_id` the same way the module does today (from the streaming/tracing context vars it already reads). Preserve the exact public signature and return type.
+
+- [ ] **Step 4: Run** the characterization test — it must still pass (same messages). Adjust the test only if AGX1-373 deliberately changed the tool-message wire shape; in that case assert the post-373 shape and note it. Confirm tracing now occurs by default (assert spans via a fake tracer).
+
+- [ ] **Step 5: Commit** `refactor(pydantic-ai): reimplement stream_pydantic_ai_events on UnifiedEmitter (default tracing)`.
+
+---
+
+## Task 4: Route sync ACP delivery through the surface + deprecate the bespoke tracing handler
+
+**Files:**
+- Modify: `src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py`
+- (Reference) the sync ACP usage pattern in the pydantic-ai docs/templates.
+
+- [ ] **Step 1: Deprecate the bespoke tracing handler.** Add a `DeprecationWarning` (via `warnings.warn(...)`) and a docstring note to `create_pydantic_ai_tracing_handler` / `AgentexPydanticAITracingHandler` stating the unified surface (`UnifiedEmitter`, which derives spans from the canonical stream) supersedes it. Keep the symbols importable and functional (no removal — backward compat).
+
+- [ ] **Step 2: Confirm the sync path.** The sync tap remains `convert_pydantic_ai_to_agentex_events`. Document (in the module docstring of `_pydantic_ai_sync.py`) the recommended sync ACP usage:
+
+```python
+turn = PydanticAITurn(agent.run_stream_events(...), model=...)
+async for event in emitter.yield_turn(turn):
+    yield event
+```
+
+No code change beyond the docstring (the sync converter already yields the canonical stream; `yield_turn` adds tracing). Add a test that `emitter.yield_turn(PydanticAITurn(...))` forwards the same events the bare converter would and derives spans.
+
+- [ ] **Step 3: Run** tests; **Commit** `refactor(pydantic-ai): deprecate bespoke tracing handler; document unified sync path`.
+
+---
+
+## Task 5: pydantic-ai cross-channel conformance fixtures
+
+**Files:**
+- Create: `tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py`
+
+**Blocked by AGX1-373** (the cross-channel conformance runner). Once 373 is merged into the foundation branch:
+
+- [ ] **Step 1: Record canonical fixtures.** For 3-4 representative pydantic-ai runs (text-only; single tool; reasoning/thinking; multi-step text+tool), capture the `StreamTaskMessage*` sequence the tap produces (run `convert_pydantic_ai_to_agentex_events` over recorded `AgentStreamEvent` inputs, or hand-author the canonical sequences). Store as `Fixture(name=..., events=[...])`.
+
+- [ ] **Step 2: Register** each fixture with the conformance runner and let the cross-channel parametrized test (from AGX1-373) assert yield-vs-auto-send equivalence + span equivalence for each. Register/parametrize within THIS module (per the runner's documented per-module registry semantics).
+
+- [ ] **Step 3: Run** `./scripts/test tests/lib/core/harness/ -v` — all green. **Commit** `test(pydantic-ai): cross-channel conformance fixtures`.
+
+---
+
+## Task 6: Three integration test agents (sync / async / temporal)
+
+**Files:**
+- Create: `examples/tutorials/harness-pydantic-ai-sync/` , `…-async/` , `…-temporal/` (each a minimal Agentex agent).
+- Modify: `.github/workflows/harness-integration.yml` (enable pydantic-ai `live-matrix` rows).
+- Modify: `.github/workflows/agentex-tutorials-test.yml` if it enumerates agents.
+
+Each agent is the smallest agent that exercises one delivery channel through the unified surface with the pydantic-ai harness.
+
+- [ ] **Step 1: Scaffold from the existing templates.** Base each agent on the corresponding CLI template: `sync-pydantic-ai`, `default-pydantic-ai` (async), `temporal-pydantic-ai` (under `src/agentex/lib/cli/templates/`). In each, the message handler builds `PydanticAITurn(agent.run_stream_events(params.content.content), model=...)` and:
+  - sync agent: `async for ev in emitter.yield_turn(turn): yield ev`
+  - async + temporal agents: `await emitter.auto_send_turn(turn)` (temporal: inside the activity, as the template already structures it).
+  Use a tiny pydantic-ai agent with ONE trivial tool so the run exercises text + a tool call + tool response.
+
+- [ ] **Step 2: Write an integration test per agent** that drives it with a fixed prompt and asserts: valid ordered messages (text + tool request + tool response) and a well-formed span tree. Use the repo's existing tutorial-agent test harness pattern (see `agentex-tutorials-test.yml` and how current tutorial agents are tested).
+
+- [ ] **Step 3: Wire CI.** In `.github/workflows/harness-integration.yml`, replace the `if: false` placeholder `live-matrix` job (or add a real matrix) with the pydantic-ai × {sync, async, temporal} entries, each running its agent's integration test. If `agentex-tutorials-test.yml` enumerates agents, add the three there too. `log`/document any agent-type not covered (none expected for pydantic-ai).
+
+- [ ] **Step 4: Run** the integration tests locally (as far as the env allows) and the conformance + unit suites. **Commit** `test(pydantic-ai): sync/async/temporal integration agents + enable CI live-matrix rows`.
+
+---
+
+## Task 7: Full suite, type check, and backward-compat audit
+
+- [ ] **Step 1:** `./scripts/test tests/lib/core/harness/ tests/lib/adk/ -v` — all green on 3.12 + 3.13.
+- [ ] **Step 2:** `uv run pyright src/agentex/lib/` (or the harness + pydantic modules) — 0 new errors.
+- [ ] **Step 3: Backward-compat audit.** Confirm the public signatures are unchanged: `convert_pydantic_ai_to_agentex_events` (only gained an optional kwarg), `stream_pydantic_ai_events` (same signature + return), `create_pydantic_ai_tracing_handler` (still importable, now warns). Grep the repo + templates for callers and confirm none broke.
+- [ ] **Step 4:** If any fix was needed, **Commit** `chore(pydantic-ai): type/back-compat fixes`.
+
+---
+
+## Self-Review checklist (run before opening the PR)
+
+- Every public symbol that existed before still exists with the same signature (additive-only): `convert_pydantic_ai_to_agentex_events`, `stream_pydantic_ai_events`, `create_pydantic_ai_tracing_handler`.
+- The auto-send helper returns the same final text as before (characterization test passes, or the post-373 shape is asserted with a note).
+- Tracing is now on by default for both channels and is overridable (emitter `tracer=False`).
+- Usage normalization uses the REAL pydantic-ai usage field names (verified in Task 2 Step 1), with defensive `getattr`.
+- Conformance fixtures register per-module and pass the cross-channel assertion from AGX1-373.
+- 3 test agents exist and their CI rows are enabled.
+- No `# type: ignore` added without justification.
+
+## Notes for the PR description
+
+- Link AGX1-373 (dependency) and AGX1-375 (import path); note AGX1-374 (reasoning/mixed-ordering auto_send tests) is foundation-level and orthogonal.
+- State the diff size; if test agents pushed it over budget, note the PR 4b split.
+- This is the template the langgraph (PR 5) and openai (PR 6) migrations follow.
diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/.dockerignore b/examples/tutorials/00_sync/harness_pydantic_ai/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_pydantic_ai/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile b/examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile
new file mode 100644
index 000000000..3a9412fa9
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile
@@ -0,0 +1,50 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+# Copy pyproject.toml and README.md to install dependencies
+COPY 00_sync/harness_pydantic_ai/pyproject.toml /app/harness_pydantic_ai/pyproject.toml
+COPY 00_sync/harness_pydantic_ai/README.md /app/harness_pydantic_ai/README.md
+
+WORKDIR /app/harness_pydantic_ai
+
+# Copy the project code
+COPY 00_sync/harness_pydantic_ai/project /app/harness_pydantic_ai/project
+
+# Copy the test files
+COPY 00_sync/harness_pydantic_ai/tests /app/harness_pydantic_ai/tests
+
+# Copy shared test utilities
+COPY test_utils /app/test_utils
+
+# Install the required Python packages with dev dependencies
+RUN uv pip install --system .[dev]
+
+# Set environment variables
+ENV PYTHONPATH=/app
+
+# Set test environment variables
+ENV AGENT_NAME=s-harness-pydantic-ai
+
+# Run the agent using uvicorn
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/README.md b/examples/tutorials/00_sync/harness_pydantic_ai/README.md
new file mode 100644
index 000000000..1466bc4e7
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_pydantic_ai/README.md
@@ -0,0 +1,54 @@
+# Sync Pydantic AI Harness Test Agent
+
+A minimal **synchronous** Pydantic AI agent that drives the **unified harness
+surface** (`UnifiedEmitter.yield_turn` + `PydanticAITurn`) on the sync
+(HTTP-yield) channel.
+
+## Why this agent exists
+
+The `00_sync/040_pydantic_ai` tutorial streams via the bare
+`convert_pydantic_ai_to_agentex_events` converter and does **not** exercise the
+unified `yield_turn` path. This harness test agent is the sync coverage for the
+unified surface: it proves an agent author can wire the sync channel through
+`UnifiedEmitter` and get automatic span derivation (tool spans nested under the
+per-turn span) for free, exactly like the async/temporal channels.
+
+## How it wires the unified surface
+
+In `project/acp.py`:
+
+```python
+emitter = UnifiedEmitter(
+    task_id=task_id,
+    trace_id=task_id,
+    parent_span_id=turn_span.id if turn_span else None,
+)
+async with agent.run_stream_events(user_message) as stream:
+    turn = PydanticAITurn(stream, model=MODEL_NAME)  # coalesce off: stream tool-call arg tokens
+    async for ev in emitter.yield_turn(turn):
+        yield ev
+```
+
+- `coalesce_tool_requests=False` (the default) preserves token-by-token
+  tool-call argument streaming on the sync channel.
+- The `UnifiedEmitter` is constructed from the ACP/streaming context
+  (`task_id` + `trace_id` + `parent_span_id`) so tool spans nest under the
+  per-turn `AGENT_WORKFLOW` span automatically.
+
+## Files
+
+- `project/acp.py` — sync ACP handler using `emitter.yield_turn(...)`.
+- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool.
+- `project/tools.py` — `get_weather(city)` returning a constant.
+- `tests/test_agent.py` — live integration test (requires a running agent).
+
+## Tools
+
+- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string so a
+  run deterministically exercises text + a tool call + a tool response.
+
+## Offline coverage
+
+Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake
+streaming/tracing, no network) live in the SDK repo at
+`tests/lib/core/harness/test_harness_pydantic_ai_sync.py`.
diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml b/examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml
new file mode 100644
index 000000000..55d8f5d2b
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../
+    include_paths:
+      - 00_sync/harness_pydantic_ai
+      - test_utils
+    dockerfile: 00_sync/harness_pydantic_ai/Dockerfile
+    dockerignore: 00_sync/harness_pydantic_ai/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: sync
+  name: s-harness-pydantic-ai
+  description: A sync Pydantic AI harness test agent using the unified emitter surface
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "s-harness-pydantic-ai"
+      description: "A sync Pydantic AI harness test agent using the unified emitter surface"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/__init__.py b/examples/tutorials/00_sync/harness_pydantic_ai/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py b/examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py
new file mode 100644
index 000000000..f23cd7960
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py
@@ -0,0 +1,92 @@
+"""ACP handler for the sync harness Pydantic AI test agent.
+
+This agent exercises the UNIFIED HARNESS SURFACE on the sync (HTTP-yield)
+channel — ``UnifiedEmitter.yield_turn(PydanticAITurn(...))`` — rather than the
+bare ``convert_pydantic_ai_to_agentex_events`` converter used by the
+``040_pydantic_ai`` tutorial. The unified surface gives the sync channel the
+same tracing (span derivation) the async/temporal channels get for free.
+
+Flow:
+1. Open a per-turn AGENT_WORKFLOW span via ``adk.tracing.span``.
+2. Construct a ``UnifiedEmitter`` from the ACP/streaming context (task_id +
+   trace_id + parent_span_id) so tool spans nest under the turn span.
+3. Wrap ``agent.run_stream_events(...)`` in a ``PydanticAITurn`` and forward
+   events with ``emitter.yield_turn(turn)`` — yielding each to the client.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import AsyncGenerator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from project.agent import MODEL_NAME, create_agent
+from agentex.lib.types.acp import SendMessageParams
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.types.task_message_update import TaskMessageUpdate
+from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(acp_type="sync")
+
+_agent = None
+
+
+def get_agent():
+    """Get or create the Pydantic AI agent instance."""
+    global _agent
+    if _agent is None:
+        _agent = create_agent()
+    return _agent
+
+
+@acp.on_message_send
+async def handle_message_send(
+    params: SendMessageParams,
+) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]:
+    """Handle incoming messages, streaming events through the unified surface."""
+    agent = get_agent()
+    task_id = params.task.id
+
+    user_message = params.content.content
+    logger.info(f"Processing message for task {task_id}")
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        # Construct the UnifiedEmitter from the ACP/streaming context so tracing
+        # is automatic: tool spans nest under this turn's span.
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
+        async with agent.run_stream_events(user_message) as stream:
+            # PydanticAITurn preserves token-by-token tool-call argument
+            # streaming (Start+Delta+Done) on the sync/HTTP channel.
+            turn = PydanticAITurn(stream, model=MODEL_NAME)
+            async for ev in emitter.yield_turn(turn):
+                yield ev
diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py b/examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py
new file mode 100644
index 000000000..72fd74173
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py
@@ -0,0 +1,39 @@
+"""Pydantic AI agent definition for the sync harness test agent.
+
+The Agent is the boundary between this module and the API layer (acp.py).
+Pydantic AI handles its own tool-call loop internally — no graph required.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from pydantic_ai import Agent
+
+from project.tools import get_weather
+
+__all__ = ["create_agent", "MODEL_NAME"]
+
+MODEL_NAME = "openai:gpt-4o-mini"
+SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use tools when they would help answer the user's question
+- If you're unsure, ask clarifying questions
+- Always provide accurate information
+"""
+
+
+def create_agent() -> Agent:
+    """Build and return the Pydantic AI agent with tools registered."""
+    agent = Agent(
+        MODEL_NAME,
+        system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
+    )
+
+    agent.tool_plain(get_weather)
+
+    return agent
diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py b/examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py
new file mode 100644
index 000000000..d649c75f1
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py
@@ -0,0 +1,20 @@
+"""Tool definitions for the sync harness Pydantic AI agent.
+
+Pydantic AI tools are registered directly on the Agent via decorators
+(see project.agent). This module hosts the bare function so it is easy to
+unit-test in isolation.
+"""
+
+from __future__ import annotations
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml b/examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml
new file mode 100644
index 000000000..08f709a4a
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml
@@ -0,0 +1,36 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "s-harness-pydantic-ai"
+version = "0.1.0"
+description = "A sync Pydantic AI harness test agent using the unified emitter surface"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "pydantic-ai-slim[openai]>=1.0,<2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py b/examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py
new file mode 100644
index 000000000..96da95fdc
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py
@@ -0,0 +1,138 @@
+"""Live tests for the sync harness Pydantic AI agent.
+
+These tests require a running agent (server + deployed agent) and exercise the
+unified-surface sync handler end-to-end over the wire. They mirror the
+``040_pydantic_ai`` tutorial tests but target this harness agent.
+
+Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives
+in ``tests/lib/core/harness/test_harness_pydantic_ai_sync.py`` in the SDK repo.
+
+To run these tests:
+1. Make sure the agent is running (via docker-compose or `agentex agents run`)
+2. Set the AGENTEX_API_BASE_URL environment variable if not using default
+3. Run: pytest test_agent.py -v
+
+Configuration:
+- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003)
+- AGENT_NAME: Name of the agent to test (default: s-harness-pydantic-ai)
+"""
+
+import os
+
+import pytest
+from test_utils.sync import validate_text_in_string, collect_streaming_response
+
+from agentex import Agentex
+from agentex.types import TextContentParam
+from agentex.types.agent_rpc_params import ParamsSendMessageRequest
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "s-harness-pydantic-ai")
+
+
+@pytest.fixture
+def client():
+    """Create an AgentEx client instance for testing."""
+    return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+
+@pytest.fixture
+def agent_name():
+    """Return the agent name for testing."""
+    return AGENT_NAME
+
+
+@pytest.fixture
+def agent_id(client, agent_name):
+    """Retrieve the agent ID based on the agent name."""
+    agents = client.agents.list()
+    for agent in agents:
+        if agent.name == agent_name:
+            return agent.id
+    raise ValueError(f"Agent with name {agent_name} not found.")
+
+
+class TestNonStreamingMessages:
+    """Test non-streaming message sending with the unified-surface sync agent."""
+
+    def test_send_simple_message(self, client: Agentex, agent_name: str):
+        """Test sending a simple message and receiving a response."""
+        response = client.agents.send_message(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="Hello! What can you help me with?",
+                    type="text",
+                )
+            ),
+        )
+        result = response.result
+        assert result is not None
+        assert len(result) >= 1
+
+    def test_tool_calling(self, client: Agentex, agent_name: str):
+        """Test that the agent can use tools (e.g., weather tool)."""
+        response = client.agents.send_message(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="What's the weather in San Francisco?",
+                    type="text",
+                )
+            ),
+        )
+        result = response.result
+        assert result is not None
+        assert len(result) >= 1
+
+
+class TestStreamingMessages:
+    """Test streaming message sending through the unified yield_turn path."""
+
+    def test_stream_simple_message(self, client: Agentex, agent_name: str):
+        """Test streaming a simple message response."""
+        stream = client.agents.send_message_stream(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="Tell me a short joke.",
+                    type="text",
+                )
+            ),
+        )
+
+        aggregated_content, chunks = collect_streaming_response(stream)
+
+        assert aggregated_content is not None
+        assert len(chunks) > 1, "No chunks received in streaming response."
+
+    def test_stream_tool_calling(self, client: Agentex, agent_name: str):
+        """Test streaming with tool calls through the unified surface.
+
+        Exercises token-by-token tool-call argument streaming (coalesce off),
+        which the unified yield_turn path preserves on the sync channel.
+        """
+        stream = client.agents.send_message_stream(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="What's the weather in New York? Respond with the temperature.",
+                    type="text",
+                )
+            ),
+        )
+
+        aggregated_content, chunks = collect_streaming_response(stream)
+
+        assert aggregated_content is not None
+        assert len(chunks) > 0, "No chunks received in streaming response."
+        # The weather tool always returns "72°F", so the agent's reply should mention it.
+        validate_text_in_string("72", aggregated_content)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/.dockerignore b/examples/tutorials/10_async/00_base/harness_pydantic_ai/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_pydantic_ai/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile b/examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile
new file mode 100644
index 000000000..3c1b9dfea
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile
@@ -0,0 +1,50 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+# Copy pyproject.toml and README.md to install dependencies
+COPY 10_async/00_base/harness_pydantic_ai/pyproject.toml /app/harness_pydantic_ai/pyproject.toml
+COPY 10_async/00_base/harness_pydantic_ai/README.md /app/harness_pydantic_ai/README.md
+
+WORKDIR /app/harness_pydantic_ai
+
+# Copy the project code
+COPY 10_async/00_base/harness_pydantic_ai/project /app/harness_pydantic_ai/project
+
+# Copy the test files
+COPY 10_async/00_base/harness_pydantic_ai/tests /app/harness_pydantic_ai/tests
+
+# Copy shared test utilities
+COPY test_utils /app/test_utils
+
+# Install the required Python packages with dev dependencies
+RUN uv pip install --system .[dev] pytest-asyncio httpx
+
+# Set environment variables
+ENV PYTHONPATH=/app
+
+# Set test environment variables
+ENV AGENT_NAME=ab-harness-pydantic-ai
+
+# Run the agent using uvicorn
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md b/examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md
new file mode 100644
index 000000000..51acb62bd
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md
@@ -0,0 +1,54 @@
+# Async Pydantic AI Harness Test Agent
+
+A minimal **async** (Redis-streaming) Pydantic AI agent that drives the
+**unified harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`)
+directly.
+
+## Why this agent exists
+
+The `10_async/00_base/110_pydantic_ai` tutorial streams via the
+`stream_pydantic_ai_events` helper (which uses the unified surface internally).
+This harness test agent calls `emitter.auto_send_turn(...)` **explicitly** at the
+agent-author level, making the unified-surface wiring visible and giving the
+async channel direct coverage.
+
+## How it wires the unified surface
+
+In `project/acp.py`:
+
+```python
+emitter = UnifiedEmitter(
+    task_id=task_id,
+    trace_id=task_id,
+    parent_span_id=turn_span.id if turn_span else None,
+)
+async with agent.run_stream_events(user_message, message_history=previous_messages) as stream:
+    turn = PydanticAITurn(tee_messages(stream), model=MODEL_NAME, coalesce_tool_requests=True)
+    result = await emitter.auto_send_turn(turn)
+```
+
+- `coalesce_tool_requests=True` is required on the async/auto_send path until
+  AGX1-377 lands: tool requests are delivered as a single `Full(tool_request)`
+  rather than streamed `Start + Delta + Done`.
+- The `UnifiedEmitter` is constructed from the ACP context (`task_id` +
+  `trace_id` + `parent_span_id`) so messages auto-send to the task stream
+  (Redis) and tracing is automatic.
+- Multi-turn memory is persisted via `adk.state` (pydantic-ai message history
+  round-tripped through `ModelMessagesTypeAdapter`).
+
+## Files
+
+- `project/acp.py` — async ACP handler using `emitter.auto_send_turn(...)`.
+- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool.
+- `project/tools.py` — `get_weather(city)` returning a constant.
+- `tests/test_agent.py` — live integration test (requires a running agent).
+
+## Tools
+
+- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string.
+
+## Offline coverage
+
+Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake
+streaming/tracing, no network) live in the SDK repo at
+`tests/lib/core/harness/test_harness_pydantic_ai_async.py`.
diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml
new file mode 100644
index 000000000..f9e50f329
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/00_base/harness_pydantic_ai
+      - test_utils
+    dockerfile: 10_async/00_base/harness_pydantic_ai/Dockerfile
+    dockerignore: 10_async/00_base/harness_pydantic_ai/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: async
+  name: ab-harness-pydantic-ai
+  description: An async Pydantic AI harness test agent using the unified emitter surface
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "ab-harness-pydantic-ai"
+      description: "An async Pydantic AI harness test agent using the unified emitter surface"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/__init__.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py
new file mode 100644
index 000000000..95b638f8b
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py
@@ -0,0 +1,159 @@
+"""ACP handler for the async harness Pydantic AI test agent.
+
+This agent exercises the UNIFIED HARNESS SURFACE on the async (Redis-streaming)
+channel — ``UnifiedEmitter.auto_send_turn(PydanticAITurn(...))``
+— calling it directly rather than via the ``stream_pydantic_ai_events`` helper
+(which the ``110_pydantic_ai`` tutorial uses). This makes the unified-surface
+wiring explicit at the agent-author level.
+
+Multi-turn memory is persisted via ``adk.state``: on each turn we load the
+previous pydantic-ai ``message_history`` from state, run the agent with it,
+then save the updated history back.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any, AsyncIterator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from pydantic_ai.run import AgentRunResultEvent
+from pydantic_ai.messages import ModelMessagesTypeAdapter
+
+import agentex.lib.adk as adk
+from project.agent import MODEL_NAME, create_agent
+from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.types.fastacp import AsyncACPConfig
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.utils.model_utils import BaseModel
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(
+    acp_type="async",
+    config=AsyncACPConfig(type="base"),
+)
+
+_agent = None
+
+
+def get_agent():
+    global _agent
+    if _agent is None:
+        _agent = create_agent()
+    return _agent
+
+
+class ConversationState(BaseModel):
+    """Per-task conversation state persisted via ``adk.state``.
+
+    ``history_json`` holds the pydantic-ai message history serialized by
+    ``ModelMessagesTypeAdapter`` — pydantic-ai's official way to round-trip
+    ``ModelMessage`` objects through JSON.
+    """
+
+    history_json: str = "[]"
+    turn_number: int = 0
+
+
+@acp.on_task_create
+async def handle_task_create(params: CreateTaskParams):
+    """Initialize per-task state on task creation."""
+    logger.info(f"Task created: {params.task.id}")
+    await adk.state.create(
+        task_id=params.task.id,
+        agent_id=params.agent.id,
+        state=ConversationState(),
+    )
+
+
+@acp.on_task_event_send
+async def handle_task_event_send(params: SendEventParams):
+    """Handle each user message through the unified auto_send_turn path."""
+    agent = get_agent()
+    task_id = params.task.id
+    agent_id = params.agent.id
+    user_message = params.event.content.content
+
+    logger.info(f"Processing message for thread {task_id}")
+
+    # Echo the user's message into the task history.
+    await adk.messages.create(task_id=task_id, content=params.event.content)
+
+    # Load the previous conversation history from state (fall back to fresh).
+    task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id)
+    if task_state is None:
+        state = ConversationState()
+        task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state)
+    else:
+        state = ConversationState.model_validate(task_state.state)
+
+    state.turn_number += 1
+    previous_messages = ModelMessagesTypeAdapter.validate_json(state.history_json)
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name=f"Turn {state.turn_number}",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        # Construct the UnifiedEmitter from the ACP context so tracing is
+        # automatic and messages are auto-sent to the task stream (Redis).
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
+        # Capture the terminal AgentRunResultEvent to persist message history.
+        captured_messages: list[Any] = []
+
+        async def tee_messages(upstream) -> AsyncIterator[Any]:
+            async for event in upstream:
+                if isinstance(event, AgentRunResultEvent):
+                    captured_messages[:] = list(event.result.all_messages())
+                yield event
+
+        async with agent.run_stream_events(user_message, message_history=previous_messages) as stream:
+            # The unified auto_send path delivers streamed tool requests natively
+            # (Start+Delta+Done), so no coalescing workaround is needed.
+            turn = PydanticAITurn(
+                tee_messages(stream),
+                model=MODEL_NAME,
+            )
+            result = await emitter.auto_send_turn(turn)
+
+        # Save the updated message history so the next turn picks up here.
+        if captured_messages:
+            state.history_json = ModelMessagesTypeAdapter.dump_json(captured_messages).decode()
+            await adk.state.update(
+                state_id=task_state.id,
+                task_id=task_id,
+                agent_id=agent_id,
+                state=state,
+            )
+
+        if turn_span:
+            turn_span.output = {"final_output": result.final_text}
+
+
+@acp.on_task_cancel
+async def handle_task_canceled(params: CancelTaskParams):
+    logger.info(f"Task canceled: {params.task.id}")
diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py
new file mode 100644
index 000000000..e7b764d82
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py
@@ -0,0 +1,39 @@
+"""Pydantic AI agent definition for the async harness test agent.
+
+The Agent is the boundary between this module and the API layer (acp.py).
+Pydantic AI handles its own tool-call loop internally — no graph required.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from pydantic_ai import Agent
+
+from project.tools import get_weather
+
+__all__ = ["create_agent", "MODEL_NAME"]
+
+MODEL_NAME = "openai:gpt-4o-mini"
+SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use tools when they would help answer the user's question
+- If you're unsure, ask clarifying questions
+- Always provide accurate information
+"""
+
+
+def create_agent() -> Agent:
+    """Build and return the Pydantic AI agent with tools registered."""
+    agent = Agent(
+        MODEL_NAME,
+        system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
+    )
+
+    agent.tool_plain(get_weather)
+
+    return agent
diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py
new file mode 100644
index 000000000..0f16a7cb0
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py
@@ -0,0 +1,20 @@
+"""Tool definitions for the async harness Pydantic AI agent.
+
+Pydantic AI tools are registered directly on the Agent via decorators
+(see project.agent). This module hosts the bare function so it is easy to
+unit-test in isolation.
+"""
+
+from __future__ import annotations
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml
new file mode 100644
index 000000000..3dc1e0e41
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml
@@ -0,0 +1,36 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "ab-harness-pydantic-ai"
+version = "0.1.0"
+description = "An async Pydantic AI harness test agent using the unified emitter surface"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "pydantic-ai-slim[openai]>=1.0,<2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py
new file mode 100644
index 000000000..11098c7d5
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py
@@ -0,0 +1,118 @@
+"""Live tests for the async harness Pydantic AI agent.
+
+These tests require a running agent (server + deployed agent) and exercise the
+unified-surface async handler end-to-end over the wire. They mirror the
+``110_pydantic_ai`` async tutorial tests but target this harness agent.
+
+Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives
+in ``tests/lib/core/harness/test_harness_pydantic_ai_async.py`` in the SDK repo.
+
+To run these tests:
+1. Make sure the agent is running (via docker-compose or `agentex agents run`)
+2. Set the AGENTEX_API_BASE_URL environment variable if not using default
+3. Run: pytest test_agent.py -v
+
+Configuration:
+- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003)
+- AGENT_NAME: Name of the agent to test (default: ab-harness-pydantic-ai)
+"""
+
+import os
+
+import pytest
+import pytest_asyncio
+
+from agentex import AsyncAgentex
+from agentex.types import TextContentParam
+from agentex.types.agent_rpc_params import ParamsCreateTaskRequest
+from agentex.lib.sdk.fastacp.base.base_acp_server import uuid
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "ab-harness-pydantic-ai")
+
+
+@pytest_asyncio.fixture
+async def client():
+    """Create an AsyncAgentex client instance for testing."""
+    client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL)
+    yield client
+    await client.close()
+
+
+@pytest.fixture
+def agent_name():
+    """Return the agent name for testing."""
+    return AGENT_NAME
+
+
+@pytest_asyncio.fixture
+async def agent_id(client, agent_name):
+    """Retrieve the agent ID based on the agent name."""
+    agents = await client.agents.list()
+    for agent in agents:
+        if agent.name == agent_name:
+            return agent.id
+    raise ValueError(f"Agent with name {agent_name} not found.")
+
+
+class TestNonStreamingEvents:
+    """Test non-streaming event sending through the unified auto_send_turn path."""
+
+    @pytest.mark.asyncio
+    async def test_send_event(self, client: AsyncAgentex, agent_id: str):
+        """Test sending an event to the async harness Pydantic AI agent."""
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
+        task = task_response.result
+        assert task is not None
+
+        event_content = TextContentParam(
+            type="text",
+            author="user",
+            content="Hello! What can you help me with?",
+        )
+        await client.agents.send_event(
+            agent_id=agent_id,
+            params={"task_id": task.id, "content": event_content},
+        )
+
+    @pytest.mark.asyncio
+    async def test_tool_calling(self, client: AsyncAgentex, agent_id: str):
+        """Test that the agent can use tools (e.g., weather tool)."""
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
+        task = task_response.result
+        assert task is not None
+
+        event_content = TextContentParam(
+            type="text",
+            author="user",
+            content="What's the weather in San Francisco?",
+        )
+        await client.agents.send_event(
+            agent_id=agent_id,
+            params={"task_id": task.id, "content": event_content},
+        )
+
+
+class TestStreamingEvents:
+    """Test streaming event sending."""
+
+    @pytest.mark.asyncio
+    async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str):
+        """Test sending an event and streaming the response."""
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
+        task = task_response.result
+        assert task is not None
+
+        event_content = TextContentParam(
+            type="text",
+            author="user",
+            content="Tell me a short joke.",
+        )
+        await client.agents.send_event(
+            agent_id=agent_id,
+            params={"task_id": task.id, "content": event_content},
+        )
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile
new file mode 100644
index 000000000..98c74c6e8
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile
@@ -0,0 +1,43 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/10_temporal/harness_pydantic_ai/pyproject.toml /app/harness_pydantic_ai/pyproject.toml
+COPY 10_async/10_temporal/harness_pydantic_ai/README.md /app/harness_pydantic_ai/README.md
+
+WORKDIR /app/harness_pydantic_ai
+
+COPY 10_async/10_temporal/harness_pydantic_ai/project /app/harness_pydantic_ai/project
+COPY 10_async/10_temporal/harness_pydantic_ai/tests /app/harness_pydantic_ai/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+
+ENV AGENT_NAME=at-harness-pydantic-ai
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
+
+# When we deploy the worker, we will replace the CMD with the following
+# CMD ["python", "-m", "run_worker"]
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md
new file mode 100644
index 000000000..3e5fef4c6
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md
@@ -0,0 +1,61 @@
+# Temporal Pydantic AI Harness Test Agent
+
+A minimal **Temporal-backed** Pydantic AI agent that drives the **unified
+harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`) from
+inside the model activity's `event_stream_handler`.
+
+## Why this agent exists
+
+The `10_async/10_temporal/110_pydantic_ai` tutorial streams via the
+`stream_pydantic_ai_events` helper (which uses the unified surface internally).
+This harness test agent calls `emitter.auto_send_turn(...)` **explicitly** inside
+the `event_stream_handler`, making the unified-surface wiring visible and giving
+the temporal channel direct coverage.
+
+## How it wires the unified surface
+
+In `project/agent.py`, the `event_stream_handler` runs inside the model activity
+and constructs a `UnifiedEmitter` from `RunContext.deps`:
+
+```python
+async def event_handler(run_context, events):
+    emitter = UnifiedEmitter(
+        task_id=run_context.deps.task_id,
+        trace_id=run_context.deps.task_id,
+        parent_span_id=run_context.deps.parent_span_id,
+    )
+    turn = PydanticAITurn(events, model=MODEL_NAME, coalesce_tool_requests=True)
+    await emitter.auto_send_turn(turn)
+```
+
+- The handler runs inside a Temporal activity, so it can freely make
+  non-deterministic Redis + tracing writes.
+- `coalesce_tool_requests=True` is required on the auto_send path until
+  AGX1-377 lands.
+- `deps` (set by `project/workflow.py`) threads the `task_id` and the per-turn
+  `parent_span_id` into the handler so tool spans nest under the workflow's turn
+  span.
+
+## Structure
+
+- `project/acp.py` — thin ACP server; FastACP auto-wires HTTP routes to the
+  workflow when `TemporalACPConfig` is used.
+- `project/agent.py` — base `Agent` + `TemporalAgent` + the unified-surface
+  `event_stream_handler`.
+- `project/workflow.py` — durable workflow; each turn delegates to
+  `temporal_agent.run(...)`.
+- `project/run_worker.py` — Temporal worker entry point.
+- `project/tools.py` — async `get_weather(city)` returning a constant.
+- `tests/test_agent.py` — live integration test (requires Temporal + Redis +
+  ACP server + worker).
+
+## Tools
+
+- `get_weather(city: str) -> str` (async): returns a fixed "sunny and 72°F"
+  string. Each tool call becomes its own Temporal activity.
+
+## Offline coverage
+
+Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake
+streaming/tracing, no Temporal server) live in the SDK repo at
+`tests/lib/core/harness/test_harness_pydantic_ai_temporal.py`.
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml
new file mode 100644
index 000000000..9efbff918
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml
@@ -0,0 +1,62 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/10_temporal/harness_pydantic_ai
+      - test_utils
+    dockerfile: 10_async/10_temporal/harness_pydantic_ai/Dockerfile
+    dockerignore: 10_async/10_temporal/harness_pydantic_ai/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+    worker: project/run_worker.py
+
+agent:
+  acp_type: async
+  name: at-harness-pydantic-ai
+  description: A Temporal-backed Pydantic AI harness test agent using the unified emitter surface
+
+  temporal:
+    enabled: true
+    workflows:
+      - name: at-harness-pydantic-ai
+        queue_name: at_harness_pydantic_ai_queue
+
+  credentials:
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "at-harness-pydantic-ai"
+      description: "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/__init__.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py
new file mode 100644
index 000000000..c142dcf70
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py
@@ -0,0 +1,35 @@
+"""ACP server for the Temporal harness Pydantic AI test agent.
+
+This file is intentionally thin. When ``acp_type="async"`` is combined with
+``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires:
+
+    HTTP task/create       → @workflow.run on the workflow class
+    HTTP task/event/send   → @workflow.signal(SignalName.RECEIVE_EVENT)
+    HTTP task/cancel       → workflow cancellation via the Temporal client
+
+so we don't define any handlers here. The actual agent code lives in
+``project/workflow.py`` and is executed by the Temporal worker
+(``project/run_worker.py``), not by this HTTP process.
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from pydantic_ai.durable_exec.temporal import PydanticAIPlugin
+
+from agentex.lib.types.fastacp import TemporalACPConfig
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+
+acp = FastACP.create(
+    acp_type="async",
+    config=TemporalACPConfig(
+        type="temporal",
+        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
+        plugins=[PydanticAIPlugin()],
+    ),
+)
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py
new file mode 100644
index 000000000..5e8697264
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py
@@ -0,0 +1,111 @@
+"""Pydantic AI agent definition for the Temporal harness test agent.
+
+This module constructs the base ``pydantic_ai.Agent`` once at import time,
+registers tools on it, and wraps it in ``TemporalAgent`` from
+``pydantic_ai.durable_exec.temporal``.
+
+The ``TemporalAgent`` wrapper makes every model call and every tool call run as
+a Temporal activity automatically. The workflow stays deterministic; the
+non-deterministic work (LLM HTTP calls, tool execution) moves into recorded
+activities.
+
+Streaming back to Agentex happens via ``event_stream_handler``, which receives
+Pydantic AI ``AgentStreamEvent``s from inside the model activity and forwards
+them through the UNIFIED HARNESS SURFACE (``UnifiedEmitter.auto_send_turn`` +
+``PydanticAITurn``) — called directly rather than via ``stream_pydantic_ai_events``.
+The ``task_id`` and per-turn ``parent_span_id`` are threaded into the handler
+via ``deps``.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from collections.abc import AsyncIterable
+
+from pydantic import BaseModel
+from pydantic_ai import Agent, RunContext
+from pydantic_ai.messages import AgentStreamEvent
+from pydantic_ai.durable_exec.temporal import TemporalAgent
+
+from project.tools import get_weather
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+__all__ = ["TaskDeps", "temporal_agent", "base_agent", "MODEL_NAME"]
+
+MODEL_NAME = "openai:gpt-4o-mini"
+SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use tools when they would help answer the user's question
+- If you're unsure, ask clarifying questions
+- Always provide accurate information
+"""
+
+
+class TaskDeps(BaseModel):
+    """Per-run dependencies passed into the agent via ``deps=``.
+
+    Pydantic AI's ``RunContext.deps`` is the canonical place to thread
+    request-scoped data (like the Agentex task_id) into tools and event
+    handlers — including code that runs inside Temporal activities.
+    """
+
+    task_id: str
+    # When set, the event handler nests per-tool-call spans under this span.
+    # Typically the ID of the per-turn span opened by the workflow.
+    parent_span_id: str | None = None
+
+
+def _build_base_agent() -> Agent[TaskDeps, str]:
+    """Build the underlying Pydantic AI agent with tools registered.
+
+    Tools must be registered BEFORE the agent is wrapped in TemporalAgent;
+    changes to tool registration after wrapping are not reflected.
+    """
+    agent: Agent[TaskDeps, str] = Agent(
+        MODEL_NAME,
+        deps_type=TaskDeps,
+        system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
+    )
+    agent.tool_plain(get_weather)
+    return agent
+
+
+async def event_handler(
+    run_context: RunContext[TaskDeps],
+    events: AsyncIterable[AgentStreamEvent],
+) -> None:
+    """Stream Pydantic AI events to Agentex via the unified surface.
+
+    Pydantic AI calls this with the live event stream as soon as the model
+    activity begins emitting parts. Because the handler runs inside the activity
+    (not the workflow), it can freely make non-deterministic Redis + tracing
+    writes.
+
+    The UnifiedEmitter is constructed from ``deps`` (task_id + parent_span_id),
+    so tool spans nest under the workflow's per-turn span and messages auto-send
+    to the task stream. The auto_send path delivers streamed tool requests
+    natively, so no coalescing workaround is needed.
+    """
+    emitter = UnifiedEmitter(
+        task_id=run_context.deps.task_id,
+        trace_id=run_context.deps.task_id,
+        parent_span_id=run_context.deps.parent_span_id,
+    )
+    turn = PydanticAITurn(events, model=MODEL_NAME)
+    await emitter.auto_send_turn(turn)
+
+
+# Construct the durable agent at module load time so that the PydanticAIPlugin
+# can auto-discover its activities via the workflow's ``__pydantic_ai_agents__``
+# attribute.
+base_agent = _build_base_agent()
+temporal_agent: TemporalAgent[TaskDeps, str] = TemporalAgent(
+    base_agent,
+    name="harness_pydantic_ai_agent",
+    event_stream_handler=event_handler,
+)
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py
new file mode 100644
index 000000000..4b4d43d19
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py
@@ -0,0 +1,48 @@
+"""Temporal worker for the harness Pydantic AI test agent.
+
+Run as a separate long-lived process alongside the ACP HTTP server. The worker
+polls Temporal for workflow + activity tasks and executes them.
+
+The ``PydanticAIPlugin`` reads ``__pydantic_ai_agents__`` off the workflow class
+and registers every model/tool activity the TemporalAgent needs — so we don't
+have to enumerate activities by hand here.
+"""
+
+import asyncio
+
+from pydantic_ai.durable_exec.temporal import PydanticAIPlugin
+
+from project.workflow import HarnessPydanticAiWorkflow
+from agentex.lib.utils.debug import setup_debug_if_enabled
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.activities import get_all_activities
+from agentex.lib.core.temporal.workers.worker import AgentexWorker
+
+environment_variables = EnvironmentVariables.refresh()
+logger = make_logger(__name__)
+
+
+async def main():
+    setup_debug_if_enabled()
+
+    task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE
+    if task_queue_name is None:
+        raise ValueError("WORKFLOW_TASK_QUEUE is not set")
+
+    # get_all_activities() returns the built-in Agentex activities (state,
+    # messages, streaming, tracing). Pydantic AI's TemporalAgent activities are
+    # auto-registered by PydanticAIPlugin via __pydantic_ai_agents__.
+    worker = AgentexWorker(
+        task_queue=task_queue_name,
+        plugins=[PydanticAIPlugin()],
+    )
+
+    await worker.run(
+        activities=get_all_activities(),
+        workflow=HarnessPydanticAiWorkflow,
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py
new file mode 100644
index 000000000..bbd6c5200
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py
@@ -0,0 +1,24 @@
+"""Tool definitions for the Temporal harness Pydantic AI agent.
+
+These functions are registered on the base Pydantic AI agent. When the agent
+is wrapped in ``TemporalAgent``, each tool call becomes its own Temporal
+activity automatically — independently retryable and observable.
+
+Tools must be ``async`` because Pydantic AI's Temporal integration requires
+it: non-async tools would run in threads, which is non-deterministic and
+unsafe for Temporal replay.
+"""
+
+from __future__ import annotations
+
+
+async def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py
new file mode 100644
index 000000000..9a01be7de
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py
@@ -0,0 +1,137 @@
+"""Temporal workflow for the harness Pydantic AI test agent.
+
+The workflow holds task state durably across crashes. Its signal handler
+delegates the actual agent run to ``temporal_agent.run(...)`` — which internally
+schedules model and tool activities, each independently durable. The
+``event_stream_handler`` registered on ``temporal_agent`` (see project.agent)
+pushes streaming deltas through the unified harness surface while the model
+activity runs.
+
+Multi-turn memory is kept on the workflow instance itself
+(``self._message_history``). Temporal's workflow state is already durable and
+replay-safe, so unlike the async-base agent we don't need an external
+``adk.state`` round-trip.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import TYPE_CHECKING
+
+from temporalio import workflow
+
+from agentex.lib import adk
+from project.agent import TaskDeps, temporal_agent
+from agentex.lib.types.acp import SendEventParams, CreateTaskParams
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.types.text_content import TextContent
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.types.workflow import SignalName
+from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow
+from agentex.lib.core.tracing.tracing_processor_manager import (
+    add_tracing_processor_config,
+)
+
+if TYPE_CHECKING:
+    from pydantic_ai.messages import ModelMessage
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+environment_variables = EnvironmentVariables.refresh()
+
+if environment_variables.WORKFLOW_NAME is None:
+    raise ValueError("Environment variable WORKFLOW_NAME is not set")
+if environment_variables.AGENT_NAME is None:
+    raise ValueError("Environment variable AGENT_NAME is not set")
+
+logger = make_logger(__name__)
+
+
+@workflow.defn(name=environment_variables.WORKFLOW_NAME)
+class HarnessPydanticAiWorkflow(BaseWorkflow):
+    """Long-running Temporal workflow that delegates each turn to a Pydantic AI TemporalAgent.
+
+    The ``__pydantic_ai_agents__`` attribute is the marker the
+    ``PydanticAIPlugin`` looks for at worker startup: it pulls
+    ``temporal_agent.temporal_activities`` off this list and registers them on
+    the worker automatically — so we don't have to list activities by hand in
+    ``run_worker.py``.
+    """
+
+    __pydantic_ai_agents__ = [temporal_agent]
+
+    def __init__(self):
+        super().__init__(display_name=environment_variables.AGENT_NAME)
+        self._complete_task = False
+        self._turn_number = 0
+        # Conversation history accumulated across turns. Each entry is a
+        # pydantic-ai ``ModelMessage``. Temporal replays the activity that
+        # produced these messages, so the list is rebuilt deterministically if
+        # the workflow ever recovers from a crash.
+        self._message_history: list["ModelMessage"] = []
+
+    @workflow.signal(name=SignalName.RECEIVE_EVENT)
+    async def on_task_event_send(self, params: SendEventParams) -> None:
+        """Handle a new user message: echo it, then run the agent durably."""
+        logger.info(f"Received task event: {params.task.id}")
+        self._turn_number += 1
+
+        # Echo the user's message so it shows up in the UI as a chat bubble.
+        await adk.messages.create(task_id=params.task.id, content=params.event.content)
+
+        async with adk.tracing.span(
+            trace_id=params.task.id,
+            task_id=params.task.id,
+            name=f"Turn {self._turn_number}",
+            input={"message": params.event.content.content},
+        ) as span:
+            # temporal_agent.run() schedules a model activity, per-tool
+            # activities, and the event_stream_handler activity (which pushes
+            # deltas through the unified surface). Passing ``message_history``
+            # makes the run remember prior turns.
+            result = await temporal_agent.run(
+                params.event.content.content,
+                message_history=self._message_history,
+                deps=TaskDeps(
+                    task_id=params.task.id,
+                    parent_span_id=span.id if span else None,
+                ),
+            )
+            # Persist the new full history (user + assistant + any tool rounds)
+            # so the next turn picks up from here.
+            self._message_history = list(result.all_messages())
+            if span:
+                span.output = {"final_output": result.output}
+
+    @workflow.run
+    async def on_task_create(self, params: CreateTaskParams) -> str:
+        """Workflow entry point — keep the conversation alive for incoming signals."""
+        logger.info(f"Task created: {params.task.id}")
+
+        await adk.messages.create(
+            task_id=params.task.id,
+            content=TextContent(
+                author="agent",
+                content=(
+                    f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n"
+                    f"Send me a message and I'll respond using a Pydantic AI agent backed by Temporal."
+                ),
+            ),
+        )
+
+        await workflow.wait_condition(lambda: self._complete_task, timeout=None)
+        return "Task completed"
+
+    @workflow.signal
+    async def complete_task_signal(self) -> None:
+        """Graceful workflow shutdown signal."""
+        logger.info("Received complete_task signal")
+        self._complete_task = True
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml
new file mode 100644
index 000000000..4d9039640
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml
@@ -0,0 +1,38 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "at-harness-pydantic-ai"
+version = "0.1.0"
+description = "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "temporalio>=1.18.2",
+    "pydantic-ai-slim[openai]>=1.0,<2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+    "debugpy>=1.8.15",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py
new file mode 100644
index 000000000..a5b90ca34
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py
@@ -0,0 +1,114 @@
+"""Live tests for the Temporal harness Pydantic AI agent.
+
+These tests require a running agent (Temporal + Redis + ACP server + worker) and
+exercise the unified-surface event_stream_handler end-to-end over the wire. They
+mirror the ``at110`` temporal tutorial tests but target this harness agent.
+
+Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives
+in ``tests/lib/core/harness/test_harness_pydantic_ai_temporal.py`` in the SDK repo.
+
+To run these tests:
+1. Make sure the agent is running (worker + ACP server)
+2. Set AGENTEX_API_BASE_URL if not using the default
+3. Run: pytest tests/test_agent.py -v
+"""
+
+import os
+import uuid
+
+import pytest
+import pytest_asyncio
+from test_utils.async_utils import poll_messages, send_event_and_poll_yielding
+
+from agentex import AsyncAgentex
+from agentex.types.task_message import TaskMessage
+from agentex.types.agent_rpc_params import ParamsCreateTaskRequest
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "at-harness-pydantic-ai")
+
+
+@pytest_asyncio.fixture
+async def client():
+    client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL)
+    yield client
+    await client.close()
+
+
+@pytest.fixture
+def agent_name():
+    return AGENT_NAME
+
+
+@pytest_asyncio.fixture
+async def agent_id(client, agent_name):
+    agents = await client.agents.list()
+    for agent in agents:
+        if agent.name == agent_name:
+            return agent.id
+    raise ValueError(f"Agent with name {agent_name} not found.")
+
+
+class TestNonStreamingEvents:
+    """Test that the Temporal-backed harness agent responds and uses tools."""
+
+    @pytest.mark.asyncio
+    async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str):
+        """Drive a full turn: create task, send a weather question, verify tool round-trip."""
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
+        task = task_response.result
+        assert task is not None
+
+        # Wait for the welcome message from on_task_create
+        task_creation_found = False
+        async for message in poll_messages(
+            client=client,
+            task_id=task.id,
+            timeout=30,
+            sleep_interval=1.0,
+        ):
+            assert isinstance(message, TaskMessage)
+            if message.content and message.content.type == "text" and message.content.author == "agent":
+                task_creation_found = True
+                break
+        assert task_creation_found, "Task creation welcome message not found"
+
+        # Ask about weather — the agent should call get_weather
+        seen_tool_request = False
+        seen_tool_response = False
+        final_message = None
+        async for message in send_event_and_poll_yielding(
+            client=client,
+            agent_id=agent_id,
+            task_id=task.id,
+            user_message="What is the weather in San Francisco?",
+            timeout=60,
+            sleep_interval=1.0,
+        ):
+            assert isinstance(message, TaskMessage)
+
+            if message.content and message.content.type == "tool_request":
+                seen_tool_request = True
+            if message.content and message.content.type == "tool_response":
+                seen_tool_response = True
+                if final_message and getattr(final_message, "streaming_status", None) == "DONE":
+                    break
+
+            if message.content and message.content.type == "text" and message.content.author == "agent":
+                final_message = message
+                content_length = len(getattr(message.content, "content", "") or "")
+                if message.streaming_status == "DONE" and content_length > 0:
+                    if not seen_tool_request or seen_tool_response:
+                        break
+
+        assert seen_tool_request, "Expected a tool_request (agent calling get_weather)"
+        assert seen_tool_response, "Expected a tool_response (get_weather result)"
+        assert final_message is not None, "Expected a final agent text message"
+        final_text = getattr(final_message.content, "content", None) if final_message.content else None
+        assert isinstance(final_text, str) and len(final_text) > 0
+        # The get_weather tool always returns "72°F" — the response should mention it.
+        assert "72" in final_text, "Expected weather response to mention 72°F"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py b/src/agentex/lib/adk/_modules/_pydantic_ai_async.py
index 0bbb5b19d..85abfb845 100644
--- a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py
+++ b/src/agentex/lib/adk/_modules/_pydantic_ai_async.py
@@ -6,11 +6,10 @@
 HTTP yields.
 
 Text and thinking tokens stream as deltas inside coalesced streaming
-contexts. Tool requests and tool results are emitted as full
-``adk.messages.create(...)`` calls (Option A — matches the async LangGraph
-helper's convention). To stream tool-call argument tokens, see the sync
-converter at ``agentex.lib.adk._modules._pydantic_ai_sync`` which yields
-``ToolRequestDelta`` events.
+contexts. Tool requests and tool results are posted as open+close pairs
+on a streaming context (the unified surface persists ``initial_content``
+when a context is closed without deltas). This matches the ``auto_send``
+convention used by all other async/Temporal harnesses.
 
 Tracing is opt-in via a ``tracing_handler`` parameter — see
 ``create_pydantic_ai_tracing_handler`` in
@@ -19,7 +18,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from agentex.lib.adk._modules._pydantic_ai_tracing import (
@@ -49,230 +48,18 @@ async def stream_pydantic_ai_events(
         more text) return only the final text segment, matching the
         ``stream_langgraph_events`` convention.
     """
-    # Lazy imports so pydantic-ai isn't required at module load time.
-    import json
+    from agentex.lib.core.harness.emitter import UnifiedEmitter
+    from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
 
-    from pydantic_ai.messages import (
-        TextPart,
-        PartEndEvent,
-        ThinkingPart,
-        ToolCallPart,
-        TextPartDelta,
-        PartDeltaEvent,
-        PartStartEvent,
-        ThinkingPartDelta,
-        FunctionToolResultEvent,
+    turn = PydanticAITurn(
+        stream,
+        model=None,
+        tracing_handler=tracing_handler,
     )
-
-    from agentex.lib import adk
-    from agentex.types.text_content import TextContent
-    from agentex.types.reasoning_content import ReasoningContent
-    from agentex.types.task_message_delta import TextDelta
-    from agentex.types.task_message_update import StreamTaskMessageDelta
-    from agentex.types.tool_request_content import ToolRequestContent
-    from agentex.types.tool_response_content import ToolResponseContent
-    from agentex.types.reasoning_content_delta import ReasoningContentDelta
-
-    text_context = None
-    reasoning_context = None
-    final_text = ""
-
-    # Per Pydantic-AI part-index bookkeeping. Part indices restart at 0 on
-    # each new model response, so we overwrite on PartStartEvent.
-    part_kind: dict[int, str] = {}
-    tool_call_info: dict[int, tuple[str, str]] = {}
-
-    async def _close_text():
-        nonlocal text_context
-        if text_context:
-            await text_context.close()
-            text_context = None
-
-    async def _close_reasoning():
-        nonlocal reasoning_context
-        if reasoning_context:
-            await reasoning_context.close()
-            reasoning_context = None
-
-    try:
-        async for event in stream:
-            if isinstance(event, PartStartEvent):
-                if isinstance(event.part, TextPart):
-                    await _close_reasoning()
-                    await _close_text()
-
-                    final_text = ""
-                    text_context = await adk.streaming.streaming_task_message_context(
-                        task_id=task_id,
-                        initial_content=TextContent(
-                            author="agent",
-                            content="",
-                            format="markdown",
-                        ),
-                    ).__aenter__()
-                    part_kind[event.index] = "text"
-
-                    # Pydantic AI puts the first streaming chunk in
-                    # PartStartEvent.part.content; surface it as a Delta so it
-                    # actually renders (Start.content is initialization, not body).
-                    if event.part.content:
-                        final_text += event.part.content
-                        await text_context.stream_update(
-                            StreamTaskMessageDelta(
-                                parent_task_message=text_context.task_message,
-                                delta=TextDelta(type="text", text_delta=event.part.content),
-                                type="delta",
-                            )
-                        )
-
-                elif isinstance(event.part, ThinkingPart):
-                    await _close_text()
-                    await _close_reasoning()
-
-                    reasoning_context = await adk.streaming.streaming_task_message_context(
-                        task_id=task_id,
-                        initial_content=ReasoningContent(
-                            author="agent",
-                            summary=[],
-                            content=[],
-                            type="reasoning",
-                            style="active",
-                        ),
-                    ).__aenter__()
-                    part_kind[event.index] = "reasoning"
-
-                    if event.part.content:
-                        await reasoning_context.stream_update(
-                            StreamTaskMessageDelta(
-                                parent_task_message=reasoning_context.task_message,
-                                delta=ReasoningContentDelta(
-                                    type="reasoning_content",
-                                    content_index=0,
-                                    content_delta=event.part.content,
-                                ),
-                                type="delta",
-                            )
-                        )
-
-                elif isinstance(event.part, ToolCallPart):
-                    await _close_text()
-                    await _close_reasoning()
-                    tool_call_info[event.index] = (
-                        event.part.tool_call_id,
-                        event.part.tool_name,
-                    )
-                    part_kind[event.index] = "tool_call"
-
-            elif isinstance(event, PartDeltaEvent):
-                kind = part_kind.get(event.index)
-                if kind == "text" and isinstance(event.delta, TextPartDelta) and text_context:
-                    final_text += event.delta.content_delta
-                    await text_context.stream_update(
-                        StreamTaskMessageDelta(
-                            parent_task_message=text_context.task_message,
-                            delta=TextDelta(type="text", text_delta=event.delta.content_delta),
-                            type="delta",
-                        )
-                    )
-                elif (
-                    kind == "reasoning"
-                    and isinstance(event.delta, ThinkingPartDelta)
-                    and reasoning_context
-                    and event.delta.content_delta
-                ):
-                    await reasoning_context.stream_update(
-                        StreamTaskMessageDelta(
-                            parent_task_message=reasoning_context.task_message,
-                            delta=ReasoningContentDelta(
-                                type="reasoning_content",
-                                content_index=0,
-                                content_delta=event.delta.content_delta,
-                            ),
-                            type="delta",
-                        )
-                    )
-                # Tool-call arg deltas: Pydantic AI accumulates them; we
-                # surface the final args on PartEndEvent below (Option A).
-
-            elif isinstance(event, PartEndEvent):
-                kind = part_kind.get(event.index)
-                if kind == "text":
-                    await _close_text()
-                elif kind == "reasoning":
-                    await _close_reasoning()
-                elif kind == "tool_call" and isinstance(event.part, ToolCallPart):
-                    tool_call_id, tool_name = tool_call_info.get(event.index, ("", ""))
-                    args = event.part.args
-                    if isinstance(args, str):
-                        try:
-                            args = json.loads(args) if args else {}
-                        except json.JSONDecodeError:
-                            args = {"_raw": args}
-                    elif args is None:
-                        args = {}
-                    await adk.messages.create(
-                        task_id=task_id,
-                        content=ToolRequestContent(
-                            tool_call_id=tool_call_id,
-                            name=tool_name,
-                            arguments=args,
-                            author="agent",
-                        ),
-                    )
-                    if tracing_handler is not None and tool_call_id:
-                        await tracing_handler.on_tool_start(
-                            tool_call_id=tool_call_id,
-                            tool_name=tool_name,
-                            arguments=args,
-                        )
-
-            elif isinstance(event, FunctionToolResultEvent):
-                await _close_text()
-                await _close_reasoning()
-
-                result = event.part
-                tool_call_id = result.tool_call_id
-                tool_name = getattr(result, "tool_name", "") or ""
-                # Preserve structure for dicts / lists / Pydantic models so the
-                # UI can render them as JSON, not as Python repr. Matches the
-                # sync converter's ``_tool_return_content`` helper exactly —
-                # ``str(content)`` on a dict produces ``"{'k': 'v'}"`` which is
-                # invalid JSON and unreadable in the UI.
-                content = getattr(result, "content", None)
-                content_payload: Any
-                if content is None:
-                    content_payload = str(result)
-                elif isinstance(content, (str, int, float, bool, list, dict)):
-                    content_payload = content
-                elif hasattr(content, "model_dump"):
-                    try:
-                        content_payload = content.model_dump()
-                    except Exception:
-                        content_payload = str(content)
-                else:
-                    content_payload = str(content)
-                await adk.messages.create(
-                    task_id=task_id,
-                    content=ToolResponseContent(
-                        tool_call_id=tool_call_id,
-                        name=tool_name,
-                        content=content_payload,
-                        author="agent",
-                    ),
-                )
-                if tracing_handler is not None and tool_call_id:
-                    await tracing_handler.on_tool_end(
-                        tool_call_id=tool_call_id,
-                        result=content_payload,
-                    )
-
-            # FunctionToolCallEvent / FinalResultEvent / AgentRunResultEvent
-            # are intentionally ignored — same as the sync converter.
-
-    finally:
-        if text_context:
-            await text_context.close()
-        if reasoning_context:
-            await reasoning_context.close()
-
-    return final_text
+    emitter = UnifiedEmitter(
+        task_id=task_id,
+        trace_id=None,
+        parent_span_id=None,
+    )
+    result = await emitter.auto_send_turn(turn)
+    return result.final_text
diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py b/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py
index d94c0ae12..e4ac31e7e 100644
--- a/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py
+++ b/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py
@@ -16,12 +16,32 @@ async def handle_message_send(params):
         async with agent.run_stream_events(params.content.content) as stream:
             async for event in convert_pydantic_ai_to_agentex_events(stream):
                 yield event
+
+Recommended: unified surface
+-----------------------------
+For new handlers, prefer ``UnifiedEmitter`` + ``PydanticAITurn`` over the
+bare converter. The unified surface wires tracing automatically when a
+``trace_id`` is provided, so tool and reasoning spans are derived from the
+same event stream with no extra setup:
+
+    from agentex.lib.core.harness import UnifiedEmitter
+    from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+    emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=parent_span_id)
+    turn = PydanticAITurn(agent.run_stream_events(prompt), model="openai:gpt-4o")
+    async for event in emitter.yield_turn(turn):
+        yield event   # forwarded over the ACP streaming response; spans derived automatically
+
+``convert_pydantic_ai_to_agentex_events`` remains the low-level tap for
+callers that manage their own tracing or need direct access to the raw
+converted stream.
 """
 
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING, Any, AsyncIterator
+import inspect
+from typing import TYPE_CHECKING, Any, Callable, AsyncIterator
 
 from pydantic_ai.run import AgentRunResultEvent
 
@@ -105,6 +125,7 @@ def _tool_return_content(result: ToolReturnPart | Any) -> Any:
 async def convert_pydantic_ai_to_agentex_events(
     stream_response: AsyncIterator[Any],
     tracing_handler: "AgentexPydanticAITracingHandler | None" = None,
+    on_result: Callable[[AgentRunResultEvent], Any] | None = None,
 ) -> AsyncIterator[StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone]:
     """Convert a Pydantic AI agent event stream into Agentex stream events.
 
@@ -132,6 +153,12 @@ async def convert_pydantic_ai_to_agentex_events(
             tool call in the run is also recorded as an Agentex child span
             beneath the handler's configured ``parent_span_id``. Streaming
             behavior is unchanged when omitted.
+        on_result: Optional callback invoked with the terminal
+            ``AgentRunResultEvent`` when the run completes. Both sync and
+            async callables are accepted. No ``StreamTaskMessage*`` events are
+            yielded for this terminal event; the callback is the only side
+            effect. Useful for capturing run-level usage without altering the
+            streaming output.
 
     Yields:
         Agentex ``StreamTaskMessage*`` events suitable for forwarding back over
@@ -328,6 +355,10 @@ async def convert_pydantic_ai_to_agentex_events(
             # Already covered by PartStart/PartDelta/PartEnd events above, or
             # informational only (FinalResultEvent / AgentRunResultEvent signal
             # run-level state, not new content to surface).
+            if isinstance(event, AgentRunResultEvent) and on_result is not None:
+                ret = on_result(event)
+                if inspect.iscoroutine(ret):
+                    await ret
             continue
 
         else:
diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py b/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py
index aa9d906eb..e199d0a8c 100644
--- a/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py
+++ b/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py
@@ -1,5 +1,29 @@
 """Tracing handler that records Agentex spans for tool calls in a pydantic-ai agent run.
 
+.. deprecated::
+    ``AgentexPydanticAITracingHandler`` and ``create_pydantic_ai_tracing_handler``
+    are superseded by the unified harness surface (``UnifiedEmitter`` in
+    ``agentex.lib.core.harness``). The unified surface derives tool and
+    reasoning spans directly from the canonical ``StreamTaskMessage*`` stream,
+    so no separate handler is required. Both symbols remain fully importable
+    and functional; they will be removed in a future release. New code should
+    construct a ``UnifiedEmitter`` with a ``trace_id`` instead:
+
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+        emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=parent_span_id)
+        turn = PydanticAITurn(agent.run_stream_events(prompt), model="openai:gpt-4o")
+        async for event in emitter.yield_turn(turn):
+            yield event
+
+# NOTE: A runtime ``warnings.warn(..., DeprecationWarning)`` is intentionally
+# omitted here. The repo's pyproject ``filterwarnings = ["error"]`` would turn
+# it into a test/caller failure, and the async helper (``stream_pydantic_ai_events``)
+# still threads this handler through for existing callers that lack a ``trace_id``
+# on the async path. The runtime warning and caller migration are deferred until
+# ``trace_id`` threading lands on the async helper in a future API-versioning change.
+
 Mirrors the LangGraph tracing handler pattern: the caller creates a handler
 bound to a ``trace_id`` and a ``parent_span_id``, then hands it to
 ``stream_pydantic_ai_events(..., tracing_handler=handler)``. The streamer
@@ -63,6 +87,14 @@ def _tool_span_id(trace_id: str, tool_call_id: str) -> str:
 class AgentexPydanticAITracingHandler:
     """Records Agentex tracing spans for tool calls observed in a pydantic-ai event stream.
 
+    .. deprecated::
+        Superseded by ``UnifiedEmitter`` (``agentex.lib.core.harness``), which
+        derives tool and reasoning spans from the canonical ``StreamTaskMessage*``
+        stream automatically when ``trace_id`` is provided. This class remains
+        fully functional but will be removed in a future release. New code should
+        use ``UnifiedEmitter`` with a trace context instead of constructing this
+        handler directly.
+
     Pass an instance to ``stream_pydantic_ai_events(..., tracing_handler=...)``
     or call ``on_tool_start`` / ``on_tool_end`` yourself if you're consuming
     the event stream by hand.
@@ -165,6 +197,13 @@ def create_pydantic_ai_tracing_handler(
 ) -> AgentexPydanticAITracingHandler:
     """Create a tracing handler that records Agentex spans for pydantic-ai tool calls.
 
+    .. deprecated::
+        Superseded by ``UnifiedEmitter`` (``agentex.lib.core.harness``), which
+        derives tool and reasoning spans from the canonical ``StreamTaskMessage*``
+        stream automatically when ``trace_id`` is provided. This function remains
+        fully functional but will be removed in a future release. New code should
+        construct a ``UnifiedEmitter`` with a trace context instead.
+
     Args:
         trace_id: The trace ID. Typically the Agentex task ID.
         parent_span_id: Optional parent span ID to nest tool spans under. If
diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py
new file mode 100644
index 000000000..b06172e7f
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py
@@ -0,0 +1,134 @@
+"""PydanticAITurn: a HarnessTurn wrapping a pydantic-ai event stream.
+
+Adapts a pydantic-ai ``AgentStreamEvent`` stream into the canonical
+``StreamTaskMessage*`` stream while capturing run-level usage from the
+terminal ``AgentRunResultEvent``.
+
+Typical usage::
+
+    async with agent.run_stream_events(user_msg) as stream:
+        turn = PydanticAITurn(stream, model="openai:gpt-4o")
+        async for event in turn.events:
+            yield event
+        span.set_attributes(turn.usage().model_dump())
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, AsyncIterator
+
+from pydantic_ai.run import AgentRunResultEvent
+
+from agentex.lib.core.harness.types import TurnUsage
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events
+
+if TYPE_CHECKING:
+    from agentex.lib.adk._modules._pydantic_ai_tracing import AgentexPydanticAITracingHandler
+
+StreamTaskMessage = StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone
+
+
+def pydantic_ai_usage_to_turn_usage(usage: Any, model: str | None) -> TurnUsage:
+    """Map a pydantic-ai ``RunUsage`` onto ``TurnUsage``.
+
+    Uses defensive ``getattr(..., None)`` so a future field rename in
+    pydantic-ai degrades to ``None`` rather than raising ``AttributeError``.
+
+    RunUsage fields (verified against pydantic-ai in this repo):
+        input_tokens, cache_write_tokens, cache_read_tokens, output_tokens,
+        input_audio_tokens, cache_audio_read_tokens, output_audio_tokens,
+        details, requests, tool_calls.
+    ``total_tokens`` is a computed property.
+
+    Mapping:
+        requests           -> num_llm_calls
+        input_tokens       -> input_tokens
+        output_tokens      -> output_tokens
+        cache_read_tokens  -> cached_input_tokens
+        total_tokens       -> total_tokens
+
+    getattr results pass straight through: a MISSING attribute degrades to
+    None (defensive), while a real 0 stays 0 (a cache-hit with 0 output
+    tokens is a genuine zero, not "unknown") and a real N stays N.
+    """
+    raw_input = getattr(usage, "input_tokens", None)
+    raw_output = getattr(usage, "output_tokens", None)
+    raw_cache_read = getattr(usage, "cache_read_tokens", None)
+    raw_total = getattr(usage, "total_tokens", None)
+    raw_requests = getattr(usage, "requests", None)
+
+    return TurnUsage(
+        model=model,
+        input_tokens=raw_input,
+        output_tokens=raw_output,
+        cached_input_tokens=raw_cache_read,
+        total_tokens=raw_total,
+        num_llm_calls=raw_requests if raw_requests is not None else 0,
+    )
+
+
+class PydanticAITurn:
+    """A single harness turn backed by a pydantic-ai event stream.
+
+    Satisfies the ``HarnessTurn`` protocol: ``events`` async-generates the
+    canonical ``StreamTaskMessage*`` stream; ``usage()`` returns a normalized
+    ``TurnUsage`` (valid only after ``events`` is exhausted).
+
+    ``events`` is identical to the bare ``convert_pydantic_ai_to_agentex_events``
+    output (tool calls stream as ``Start + ToolRequestDelta + Done``, preserving
+    argument-token streaming on the sync/yield channel). The foundation
+    ``auto_send`` delivers the streamed tool-request shape natively (AGX1-377),
+    so no coalescing is needed on either channel.
+    """
+
+    def __init__(
+        self,
+        stream: AsyncIterator[Any],
+        model: str | None = None,
+        tracing_handler: "AgentexPydanticAITracingHandler | None" = None,
+    ) -> None:
+        self._stream = stream
+        self._model = model
+        self._tracing_handler = tracing_handler
+        self._usage = TurnUsage(model=model)
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]:
+        return self._generate_events()
+
+    async def _generate_events(self) -> AsyncIterator[StreamTaskMessage]:
+        def _capture(result_event: AgentRunResultEvent) -> None:
+            run_result = getattr(result_event, "result", None)
+            if run_result is None:
+                return
+            usage_attr = getattr(run_result, "usage", None)
+            if usage_attr is None:
+                return
+            # In newer pydantic-ai, .usage is a DeprecatedCallableRunUsage —
+            # it's both a property value and callable (emitting a deprecation
+            # warning when called). Access it as a plain attribute to avoid the
+            # warning; it already IS the RunUsage instance.
+            usage_obj = usage_attr
+            self._usage = pydantic_ai_usage_to_turn_usage(usage_obj, self._model)
+
+        raw_stream = convert_pydantic_ai_to_agentex_events(
+            self._stream,
+            tracing_handler=self._tracing_handler,
+            on_result=_capture,
+        )
+        async for ev in raw_stream:
+            yield ev
+
+    def usage(self) -> TurnUsage:
+        """Return the normalized usage for this turn.
+
+        Valid only after ``events`` is exhausted (single-pass contract).
+        Before exhaustion the model field is set but token fields are None.
+        """
+        return self._usage
diff --git a/tests/lib/adk/test_pydantic_ai_async.py b/tests/lib/adk/test_pydantic_ai_async.py
index dadda5914..49cb6054c 100644
--- a/tests/lib/adk/test_pydantic_ai_async.py
+++ b/tests/lib/adk/test_pydantic_ai_async.py
@@ -82,7 +82,9 @@ class FakeStreamingModule:
     def __init__(self) -> None:
         self.contexts: list[FakeContext] = []
 
-    def streaming_task_message_context(self, *, task_id: str, initial_content: Any) -> FakeContext:
+    def streaming_task_message_context(
+        self, *, task_id: str, initial_content: Any, streaming_mode: str = "coalesced", created_at: Any = None
+    ) -> FakeContext:
         tm = TaskMessage(
             id=f"m{len(self.contexts) + 1}",
             task_id=task_id,
@@ -255,16 +257,36 @@ async def test_empty_thinking_delta_is_skipped(
 
 
 class TestToolCallEmission:
-    async def test_tool_call_emits_full_tool_request_message_on_part_end(
+    async def test_tool_call_opens_streaming_context_with_identity(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        """Async helper uses Option A: tool requests are full messages, not delta streams."""
+        """Tool requests are delivered as a streaming context (Start+Delta+Done).
+
+        AGX1-377 fix: auto_send now delivers streamed tool-request messages
+        natively (Start+ToolRequestDelta+Done). The streaming context is opened
+        at the Start event with the initial ToolRequestContent (tool_call_id +
+        name + empty arguments), argument tokens are streamed as deltas, and the
+        context is closed on Done.
+
+        This test uses a realistic pydantic-ai event sequence: args arrive as a
+        PartDeltaEvent fragment (the way OpenAI/Anthropic actually stream JSON
+        tool-call arguments).
+        """
+        from pydantic_ai.messages import ToolCallPartDelta
+
+        from agentex.types.tool_request_delta import ToolRequestDelta
+
         streaming, messages = fake_adk
         events = [
             PartStartEvent(
                 index=1,
                 part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"),
             ),
+            # Realistic: args arrive as delta tokens (JSON string fragments).
+            PartDeltaEvent(
+                index=1,
+                delta=ToolCallPartDelta(args_delta='{"city":"Paris"}'),
+            ),
             PartEndEvent(
                 index=1,
                 part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="c1"),
@@ -272,21 +294,28 @@ async def test_tool_call_emits_full_tool_request_message_on_part_end(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert streaming.contexts == [], "Tool calls do not open a streaming context"
-        assert len(messages.created) == 1
-        msg = messages.created[0]
-        assert msg["task_id"] == TASK_ID
-        content = msg["content"]
+        # AGX1-373: tool messages arrive via streaming_task_message_context.
+        assert messages.created == [], "adk.messages.create must not be called"
+        assert len(streaming.contexts) == 1, "tool_request opens a streaming context"
+        ctx = streaming.contexts[0]
+        assert ctx.closed is True
+        content = ctx.initial_content
         assert isinstance(content, ToolRequestContent)
         assert content.tool_call_id == "c1"
         assert content.name == "get_weather"
-        assert content.arguments == {"city": "Paris"}
         assert content.author == "agent"
+        # AGX1-377 streamed shape: initial_content has empty args (args come via delta)
+        assert content.arguments == {}
+        # The arg delta is delivered as a stream_update
+        assert len(ctx.updates) == 1
+        assert isinstance(ctx.updates[0].delta, ToolRequestDelta)
+        assert ctx.updates[0].delta.arguments_delta == '{"city":"Paris"}'
 
     async def test_tool_call_with_dict_args_passes_through(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        _, messages = fake_adk
+        """When args arrive pre-populated as a dict in PartStart, they're in initial_content."""
+        streaming, messages = fake_adk
         events = [
             PartStartEvent(
                 index=0,
@@ -299,23 +328,40 @@ async def test_tool_call_with_dict_args_passes_through(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(messages.created) == 1
-        assert messages.created[0]["content"].arguments == {"q": "weather"}
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        # Dict args present at PartStart land directly in initial_content.arguments
+        assert streaming.contexts[0].initial_content.arguments == {"q": "weather"}
+        assert streaming.contexts[0].updates == [], "no delta for pre-populated dict args"
 
     async def test_tool_call_with_invalid_json_args_surfaces_raw(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        """Don't drop the tool call when the model emits malformed JSON args.
+        """Malformed JSON arg delta is surfaced as a ToolRequestDelta with the raw string.
+
+        The argument delta is delivered as-is by auto_send; the client-side
+        accumulator or the streaming backend handles malformed JSON gracefully.
 
-        The arguments field is preserved under ``_raw`` so the failure is
-        visible to the UI rather than silently truncated.
+        Parts-manager invariant: PartEnd.part is the accumulated snapshot; real
+        pydantic-ai conveys args via PartStart + PartDeltaEvent, so a
+        PartStart(None)+PartEnd(json) with no delta is not realizable.
         """
-        _, messages = fake_adk
+        from pydantic_ai.messages import ToolCallPartDelta
+
+        from agentex.types.tool_request_delta import ToolRequestDelta
+
+        streaming, messages = fake_adk
         events = [
             PartStartEvent(
                 index=0,
                 part=ToolCallPart(tool_name="t", args=None, tool_call_id="c"),
             ),
+            # Malformed JSON arrives as a delta token.
+            PartDeltaEvent(
+                index=0,
+                delta=ToolCallPartDelta(args_delta="not-json{"),
+            ),
             PartEndEvent(
                 index=0,
                 part=ToolCallPart(tool_name="t", args="not-json{", tool_call_id="c"),
@@ -323,13 +369,21 @@ async def test_tool_call_with_invalid_json_args_surfaces_raw(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(messages.created) == 1
-        assert messages.created[0]["content"].arguments == {"_raw": "not-json{"}
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        ctx = streaming.contexts[0]
+        # Initial content has empty args (args come via delta)
+        assert ctx.initial_content.arguments == {}
+        # The malformed JSON is surfaced verbatim in the ToolRequestDelta
+        assert len(ctx.updates) == 1
+        assert isinstance(ctx.updates[0].delta, ToolRequestDelta)
+        assert ctx.updates[0].delta.arguments_delta == "not-json{"
 
     async def test_tool_call_with_none_args_defaults_to_empty_dict(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        _, messages = fake_adk
+        streaming, messages = fake_adk
         events = [
             PartStartEvent(
                 index=0,
@@ -342,15 +396,20 @@ async def test_tool_call_with_none_args_defaults_to_empty_dict(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(messages.created) == 1
-        assert messages.created[0]["content"].arguments == {}
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        assert streaming.contexts[0].initial_content.arguments == {}
+        assert streaming.contexts[0].updates == [], "no delta when args are absent"
 
 
 class TestToolResult:
     async def test_tool_return_emits_full_tool_response_message(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        _, messages = fake_adk
+        # AGX1-373: tool responses arrive via streaming_task_message_context
+        # (open+close pair), NOT via adk.messages.create.
+        streaming, messages = fake_adk
         events = [
             FunctionToolResultEvent(
                 part=ToolReturnPart(tool_name="get_weather", content="Sunny, 72F", tool_call_id="c1"),
@@ -358,13 +417,17 @@ async def test_tool_return_emits_full_tool_response_message(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(messages.created) == 1
-        content = messages.created[0]["content"]
+        assert messages.created == [], "adk.messages.create must not be called after reimplementation"
+        assert len(streaming.contexts) == 1
+        ctx = streaming.contexts[0]
+        assert ctx.closed is True
+        content = ctx.initial_content
         assert isinstance(content, ToolResponseContent)
         assert content.tool_call_id == "c1"
         assert content.name == "get_weather"
         assert content.content == "Sunny, 72F"
         assert content.author == "agent"
+        assert ctx.updates == [], "open+close only — no deltas for tool messages"
 
     async def test_tool_return_with_dict_content_preserves_structure(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
@@ -377,7 +440,7 @@ async def test_tool_return_with_dict_content_preserves_structure(
         and divergent from the sync converter which uses ``_tool_return_content``
         to return dicts as-is.
         """
-        _, messages = fake_adk
+        streaming, messages = fake_adk
         events = [
             FunctionToolResultEvent(
                 part=ToolReturnPart(tool_name="t", content={"temp": 72, "sky": "clear"}, tool_call_id="c"),
@@ -385,7 +448,10 @@ async def test_tool_return_with_dict_content_preserves_structure(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        out = messages.created[0]["content"].content
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        out = streaming.contexts[0].initial_content.content
         assert out == {"temp": 72, "sky": "clear"}, (
             f"Expected the dict to survive verbatim; got {out!r}. "
             "If this is a Python repr string, the helper regressed to str(content)."
@@ -402,7 +468,7 @@ class WeatherResult(BaseModel):
             temp: int
             sky: str
 
-        _, messages = fake_adk
+        streaming, messages = fake_adk
         events = [
             FunctionToolResultEvent(
                 part=ToolReturnPart(
@@ -414,13 +480,16 @@ class WeatherResult(BaseModel):
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        out = messages.created[0]["content"].content
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        out = streaming.contexts[0].initial_content.content
         assert out == {"temp": 72, "sky": "clear"}
 
     async def test_retry_prompt_part_surfaces_as_tool_response(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        _, messages = fake_adk
+        streaming, messages = fake_adk
         events = [
             FunctionToolResultEvent(
                 part=RetryPromptPart(
@@ -432,8 +501,10 @@ async def test_retry_prompt_part_surfaces_as_tool_response(
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(messages.created) == 1
-        content = messages.created[0]["content"]
+        # AGX1-373: tool messages via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 1
+        content = streaming.contexts[0].initial_content
         assert isinstance(content, ToolResponseContent)
         assert content.tool_call_id == "c1"
         # RetryPromptPart.content stringifies to the error description
@@ -446,9 +517,9 @@ async def test_text_then_tool_then_text_uses_separate_contexts_in_order(
     ) -> None:
         """End-to-end multi-step shape: text → tool call → tool result → more text.
 
-        Each text/reasoning segment must get its own streaming context that is
-        closed before the next one opens, and tool messages must interleave
-        correctly via ``adk.messages.create``.
+        AGX1-373 envelope change: tool messages now arrive via
+        streaming_task_message_context (open+close pairs) instead of
+        adk.messages.create. All four message types open streaming contexts.
         """
         streaming, messages = fake_adk
         events = [
@@ -474,18 +545,30 @@ async def test_text_then_tool_then_text_uses_separate_contexts_in_order(
         ]
         final = await stream_pydantic_ai_events(_aiter(events), TASK_ID)
 
-        assert len(streaming.contexts) == 2, "One context per text part — tool calls don't open streaming contexts"
+        # AGX1-373: all 4 messages (text, tool_request, tool_response, text)
+        # arrive via streaming_task_message_context.
+        assert messages.created == [], "adk.messages.create must not be called after reimplementation"
+        assert len(streaming.contexts) == 4
         assert all(ctx.closed for ctx in streaming.contexts)
-        assert _text_deltas(streaming.contexts[0]) == ["Looking up..."]
-        assert _text_deltas(streaming.contexts[1]) == ["It's sunny."]
 
-        # Two messages: tool request, then tool response — in that order.
-        assert [type(m["content"]).__name__ for m in messages.created] == [
-            "ToolRequestContent",
-            "ToolResponseContent",
-        ]
-        assert messages.created[0]["content"].tool_call_id == "c1"
-        assert messages.created[1]["content"].tool_call_id == "c1"
+        text_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, TextContent)]
+        tool_req_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, ToolRequestContent)]
+        tool_resp_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, ToolResponseContent)]
+        assert len(text_ctxs) == 2
+        assert len(tool_req_ctxs) == 1
+        assert len(tool_resp_ctxs) == 1
+
+        assert _text_deltas(text_ctxs[0]) == ["Looking up..."]
+        assert _text_deltas(text_ctxs[1]) == ["It's sunny."]
+
+        # Tool content is preserved verbatim.
+        assert tool_req_ctxs[0].initial_content.tool_call_id == "c1"
+        assert tool_resp_ctxs[0].initial_content.tool_call_id == "c1"
+
+        # Tool contexts carry no deltas (open+close only).
+        assert tool_req_ctxs[0].updates == []
+        assert tool_resp_ctxs[0].updates == []
+
         assert final == "It's sunny."
 
     async def test_new_text_part_after_text_closes_previous(
@@ -533,7 +616,11 @@ async def test_reasoning_then_text_closes_reasoning_context(
     async def test_tool_result_closes_any_open_streaming_context(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        """A tool result arriving while a text context is open must close that context first."""
+        """A tool result arriving while a text context is open must close that context first.
+
+        AGX1-373: the tool response itself now also opens a streaming context
+        (open+close pair) rather than going through adk.messages.create.
+        """
         streaming, messages = fake_adk
         events = [
             PartStartEvent(index=0, part=TextPart(content="")),
@@ -548,7 +635,10 @@ async def test_tool_result_closes_any_open_streaming_context(
         assert streaming.contexts[0].closed is True, (
             "Helper must close any open streaming context before emitting a tool result message"
         )
-        assert len(messages.created) == 1
+        # AGX1-373: tool response arrives via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 2
+        assert isinstance(streaming.contexts[1].initial_content, ToolResponseContent)
 
 
 class TestDeltaForOrphanIndexIgnored:
@@ -584,7 +674,7 @@ async def on_tool_end(self, tool_call_id: str, result: Any) -> None:
     async def test_handler_records_start_and_end_for_each_tool_call(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        _, messages = fake_adk
+        streaming, messages = fake_adk
         handler = self._RecordingHandler()
         events = [
             PartStartEvent(
@@ -605,11 +695,12 @@ async def test_handler_records_start_and_end_for_each_tool_call(
             tracing_handler=handler,  # type: ignore[arg-type]
         )
 
-        # Streaming side-effects still happen — tracing is additive.
-        assert [type(m["content"]).__name__ for m in messages.created] == [
-            "ToolRequestContent",
-            "ToolResponseContent",
-        ]
+        # AGX1-373: tool messages arrive via streaming_task_message_context.
+        # Tracing is still additive — both messages are delivered AND hooks fire.
+        assert messages.created == []
+        assert len(streaming.contexts) == 2
+        assert isinstance(streaming.contexts[0].initial_content, ToolRequestContent)
+        assert isinstance(streaming.contexts[1].initial_content, ToolResponseContent)
         # And both lifecycle hooks fired exactly once with the right payload.
         assert handler.starts == [
             {
@@ -680,8 +771,12 @@ async def test_handler_records_each_tool_in_multi_tool_run(
     async def test_omitting_handler_is_a_no_op_for_existing_behavior(
         self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
     ) -> None:
-        """Regression: passing no tracing handler preserves the pre-tracing behavior."""
-        _, messages = fake_adk
+        """Regression: passing no tracing handler preserves streaming behavior.
+
+        AGX1-373: tool messages arrive via streaming_task_message_context
+        regardless of whether tracing_handler is passed.
+        """
+        streaming, messages = fake_adk
         events = [
             PartStartEvent(
                 index=0,
@@ -696,11 +791,11 @@ async def test_omitting_handler_is_a_no_op_for_existing_behavior(
             ),
         ]
         await stream_pydantic_ai_events(_aiter(events), TASK_ID)
-        # Exact same shape as before tracing existed.
-        assert [type(m["content"]).__name__ for m in messages.created] == [
-            "ToolRequestContent",
-            "ToolResponseContent",
-        ]
+        # AGX1-373: tool messages via streaming_task_message_context.
+        assert messages.created == []
+        assert len(streaming.contexts) == 2
+        content_types = [type(ctx.initial_content).__name__ for ctx in streaming.contexts]
+        assert content_types == ["ToolRequestContent", "ToolResponseContent"]
 
 
 class TestPydanticAITracingHandlerDeterministicIds:
@@ -867,3 +962,101 @@ async def boom() -> AsyncIterator[Any]:
             await stream_pydantic_ai_events(boom(), TASK_ID)
 
         assert streaming.contexts[0].closed is True
+
+
+# ---------------------------------------------------------------------------
+# Characterization test: lock the wire-level delivery shape for a representative
+# pydantic-ai run (text + tool call + tool response + more text).
+#
+# Step 1 (CURRENT behavior): written against the original implementation.
+# - Text/reasoning use adk.streaming.streaming_task_message_context.
+# - Tool messages use adk.messages.create (FakeMessagesModule.created list).
+# - Final text is the last text segment.
+#
+# Step 2 (POST-reimplementation on UnifiedEmitter / auto_send):
+# The assertions in TestCharacterizeWireShapeNew (below) lock the new shape.
+# Tool messages no longer go through adk.messages.create; they arrive via
+# streaming_task_message_context open+close pairs (Start+Done envelope).
+# This is the AGX1-373 accepted envelope change: logical content is identical.
+# ---------------------------------------------------------------------------
+
+
+class TestCharacterizeWireShape:
+    """Characterization tests: lock the wire-level delivery shape after reimplementation.
+
+    Uses FakeStreamingModule + FakeMessagesModule (the existing fake pair).
+
+    AGX1-373 shape (post-reimplementation on UnifiedEmitter / auto_send):
+    - Text/reasoning: streaming_task_message_context (open + deltas + close)
+    - Tool messages: streaming_task_message_context (open+close, no deltas)
+    - adk.messages.create is NOT called.
+    - Final text == last text segment only.
+
+    This class was first written to characterize the OLD shape (adk.messages.create
+    for tool messages) and was updated post-reimplementation to reflect the new
+    delivery channel. The logical content is identical; only the channel changed.
+    """
+
+    async def test_text_tool_text_new_wire_shape(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        """Representative run: text -> tool call -> tool response -> more text.
+
+        Post-AGX1-373 delivery shape:
+        - Four streaming contexts: text, tool_request, tool_response, text.
+        - adk.messages.create NOT called.
+        - Final text == "It's sunny." (last segment only, matching the
+          multi-step convention).
+        """
+        from pydantic_ai.messages import ToolReturnPart
+
+        streaming, messages = fake_adk
+        events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Looking up...")),
+            PartEndEvent(index=0, part=TextPart(content="Looking up...")),
+            PartStartEvent(
+                index=1,
+                part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"),
+            ),
+            PartEndEvent(
+                index=1,
+                part=ToolCallPart(tool_name="get_weather", args="{}", tool_call_id="c1"),
+            ),
+            FunctionToolResultEvent(
+                part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"),
+            ),
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="It's sunny.")),
+            PartEndEvent(index=0, part=TextPart(content="It's sunny.")),
+        ]
+
+        final = await stream_pydantic_ai_events(_aiter(events), TASK_ID)
+
+        assert final == "It's sunny.", "multi-step: only the last text segment is returned"
+
+        # AGX1-373: all 4 messages arrive via streaming_task_message_context
+        assert messages.created == []
+        assert len(streaming.contexts) == 4
+        assert all(ctx.closed for ctx in streaming.contexts)
+
+        content_types = [type(ctx.initial_content).__name__ for ctx in streaming.contexts]
+        assert content_types == [
+            "TextContent",
+            "ToolRequestContent",
+            "ToolResponseContent",
+            "TextContent",
+        ]
+
+        text_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, TextContent)]
+        tool_req_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, ToolRequestContent)]
+        tool_resp_ctxs = [ctx for ctx in streaming.contexts if isinstance(ctx.initial_content, ToolResponseContent)]
+
+        assert _text_deltas(text_ctxs[0]) == ["Looking up..."]
+        assert _text_deltas(text_ctxs[1]) == ["It's sunny."]
+        assert tool_req_ctxs[0].initial_content.tool_call_id == "c1"
+        assert tool_req_ctxs[0].initial_content.name == "get_weather"
+        assert tool_req_ctxs[0].updates == []
+        assert tool_resp_ctxs[0].initial_content.tool_call_id == "c1"
+        assert tool_resp_ctxs[0].initial_content.content == "Sunny"
+        assert tool_resp_ctxs[0].updates == []
diff --git a/tests/lib/adk/test_pydantic_ai_sync.py b/tests/lib/adk/test_pydantic_ai_sync.py
index 36d06200e..080bc5be8 100644
--- a/tests/lib/adk/test_pydantic_ai_sync.py
+++ b/tests/lib/adk/test_pydantic_ai_sync.py
@@ -3,9 +3,11 @@
 from __future__ import annotations
 
 import json
+import asyncio
 from typing import Any, AsyncIterator
 
 import pytest
+from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
 from pydantic_ai.messages import (
     TextPart,
     PartEndEvent,
@@ -481,3 +483,75 @@ async def test_author_is_agent(self, events: list[Any]):
             content = getattr(e, "content", None)
             if content is not None and hasattr(content, "author"):
                 assert content.author == "agent"
+
+
+class TestOnResultCallback:
+    """on_result callback: captures the terminal AgentRunResultEvent without
+    altering streaming output."""
+
+    def _make_result_event(self, output: Any = "hello") -> AgentRunResultEvent:
+        result = AgentRunResult(output=output, _output_tool_name=None)
+        return AgentRunResultEvent(result=result)
+
+    async def test_callback_invoked_once_with_result_event(self):
+        """on_result is called exactly once, with the AgentRunResultEvent."""
+        captured: list[AgentRunResultEvent] = []
+
+        def on_result(event: AgentRunResultEvent) -> None:
+            captured.append(event)
+
+        result_event = self._make_result_event("the answer")
+        events = [result_event]
+        await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events), on_result=on_result))
+
+        assert len(captured) == 1
+        assert captured[0] is result_event
+        assert captured[0].result.output == "the answer"
+
+    async def test_streaming_output_unchanged_with_callback(self):
+        """Yielded StreamTaskMessage* sequence is identical whether on_result is set or not."""
+        result_event = self._make_result_event()
+        events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hi")),
+            PartEndEvent(index=0, part=TextPart(content="hi")),
+            result_event,
+        ]
+
+        captured: list[AgentRunResultEvent] = []
+        out_with = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events), on_result=captured.append))
+        out_without = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events)))
+
+        assert len(out_with) == len(out_without)
+        for a, b in zip(out_with, out_without):
+            assert type(a) is type(b)
+            assert a.model_dump() == b.model_dump()
+        assert len(captured) == 1
+
+    async def test_no_callback_no_error(self):
+        """AgentRunResultEvent is silently ignored when on_result is None."""
+        result_event = self._make_result_event()
+        events = [result_event]
+        out = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events)))
+        assert out == []
+
+    async def test_async_callback_is_awaited(self):
+        """An async on_result callable is properly awaited.
+
+        The callback suspends (``await asyncio.sleep(0)``) before recording its
+        side effect, so ``awaited`` is only populated if the converter actually
+        awaits the returned coroutine — distinguishing "awaited" from
+        "called-but-not-awaited."
+        """
+        awaited: list[AgentRunResultEvent] = []
+
+        async def on_result_async(event: AgentRunResultEvent) -> None:
+            await asyncio.sleep(0)
+            awaited.append(event)
+
+        result_event = self._make_result_event("async_output")
+        events = [result_event]
+        await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events), on_result=on_result_async))
+
+        assert len(awaited) == 1
+        assert awaited[0].result.output == "async_output"
diff --git a/tests/lib/adk/test_pydantic_ai_sync_unified.py b/tests/lib/adk/test_pydantic_ai_sync_unified.py
new file mode 100644
index 000000000..f920418de
--- /dev/null
+++ b/tests/lib/adk/test_pydantic_ai_sync_unified.py
@@ -0,0 +1,209 @@
+"""Tests for the unified sync (HTTP ACP) path: PydanticAITurn + UnifiedEmitter.
+
+Exercises the path documented in _pydantic_ai_sync.py under "Recommended: unified surface":
+- events forwarded by yield_turn equal PydanticAITurn(stream).events (passthrough)
+- with a trace context + fake tracing backend, tool spans are derived (start_span / end_span called)
+- with a trace context + fake tracing backend, reasoning spans are derived
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
+from pydantic_ai.usage import RunUsage
+from pydantic_ai.messages import (
+    TextPart,
+    PartEndEvent,
+    ThinkingPart,
+    ToolCallPart,
+    TextPartDelta,
+    PartDeltaEvent,
+    PartStartEvent,
+    ThinkingPartDelta,
+    ToolCallPartDelta,
+)
+
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+
+async def _aiter(events: list[Any]) -> AsyncIterator[Any]:
+    for e in events:
+        yield e
+
+
+async def _collect(stream: AsyncIterator[Any]) -> list[Any]:
+    return [e async for e in stream]
+
+
+class _FakeSpan:
+    def __init__(self, name: str):
+        self.name = name
+        self.output: Any = None
+
+
+class _FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, str | None, Any]] = []
+        self.ended: list[tuple[str, Any]] = []
+
+    async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None):
+        self.started.append((name, parent_id, input))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id, span):
+        self.ended.append((span.name, span.output))
+
+
+def _make_result_event(usage: RunUsage | None = None) -> AgentRunResultEvent:
+    result = AgentRunResult(output="done", _output_tool_name=None)
+    if usage is not None:
+        result._state.usage = usage
+    return AgentRunResultEvent(result=result)
+
+
+class TestUnifiedSyncPathPassthrough:
+    """The events forwarded by yield_turn are identical to PydanticAITurn.events."""
+
+    async def test_text_stream_passthrough(self):
+        raw_events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hello")),
+            PartEndEvent(index=0, part=TextPart(content="hello")),
+        ]
+
+        turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        direct = await _collect(turn_a.events)
+
+        turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        via_emitter = await _collect(emitter.yield_turn(turn_b))
+
+        assert len(via_emitter) == len(direct)
+        for a, b in zip(via_emitter, direct):
+            assert type(a) is type(b)
+            assert a.model_dump() == b.model_dump()
+
+    async def test_tool_call_stream_passthrough(self):
+        raw_events = [
+            PartStartEvent(index=0, part=ToolCallPart(tool_name="Bash", args=None, tool_call_id="c1")),
+            PartDeltaEvent(index=0, delta=ToolCallPartDelta(args_delta='{"cmd":"ls"}')),
+            PartEndEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c1"),
+            ),
+        ]
+
+        turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        direct = await _collect(turn_a.events)
+
+        turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        via_emitter = await _collect(emitter.yield_turn(turn_b))
+
+        assert len(via_emitter) == len(direct)
+        for a, b in zip(via_emitter, direct):
+            assert type(a) is type(b)
+            assert a.model_dump() == b.model_dump()
+
+
+class TestUnifiedSyncPathSpanDerivation:
+    """With trace context + fake tracing, spans are derived from the stream."""
+
+    async def test_tool_span_opened_and_closed(self):
+        """A tool call produces start_span + end_span on the fake tracing backend."""
+        from pydantic_ai.messages import ToolReturnPart, FunctionToolResultEvent
+
+        tool_events = [
+            PartStartEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="call_1"),
+            ),
+            PartEndEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="call_1"),
+            ),
+            FunctionToolResultEvent(
+                part=ToolReturnPart(tool_name="Bash", content="files", tool_call_id="call_1"),
+            ),
+        ]
+
+        fake = _FakeTracing()
+        turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake)
+
+        events = await _collect(emitter.yield_turn(turn))
+
+        assert len(events) >= 2, "at least Start(tool) + Done + Full(response)"
+        assert len(fake.started) == 1, "one tool span opened"
+        assert len(fake.ended) == 1, "one tool span closed"
+        span_name, parent_id, span_input = fake.started[0]
+        assert span_name == "Bash"
+        assert parent_id == "p"
+        closed_name, closed_output = fake.ended[0]
+        assert closed_name == "Bash"
+
+    async def test_reasoning_span_opened_and_closed(self):
+        """A thinking/reasoning block produces start_span + end_span."""
+        reasoning_events = [
+            PartStartEvent(index=0, part=ThinkingPart(content="")),
+            PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta="let me think")),
+            PartEndEvent(index=0, part=ThinkingPart(content="let me think")),
+        ]
+
+        fake = _FakeTracing()
+        turn = PydanticAITurn(_aiter(reasoning_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake)
+
+        await _collect(emitter.yield_turn(turn))
+
+        assert len(fake.started) == 1, "one reasoning span opened"
+        assert len(fake.ended) == 1, "one reasoning span closed"
+        span_name, parent_id, _ = fake.started[0]
+        assert span_name == "reasoning"
+        assert parent_id == "p"
+
+    async def test_no_trace_id_means_no_spans(self):
+        """When trace_id is None, no spans are derived even with a fake tracing backend."""
+        raw_events = [
+            PartStartEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c2"),
+            ),
+            PartEndEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c2"),
+            ),
+        ]
+
+        fake = _FakeTracing()
+        turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, tracing=fake)
+
+        await _collect(emitter.yield_turn(turn))
+
+        assert fake.started == [], "no spans when trace_id is absent"
+        assert fake.ended == []
+
+    async def test_tracer_false_suppresses_spans_even_with_trace_id(self):
+        """tracer=False disables span derivation regardless of trace_id."""
+        raw_events = [
+            PartStartEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c3"),
+            ),
+            PartEndEvent(
+                index=0,
+                part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c3"),
+            ),
+        ]
+
+        fake = _FakeTracing()
+        turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o")
+        emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracer=False, tracing=fake)
+
+        await _collect(emitter.yield_turn(turn))
+
+        assert fake.started == []
+        assert fake.ended == []
diff --git a/tests/lib/adk/test_pydantic_ai_turn.py b/tests/lib/adk/test_pydantic_ai_turn.py
new file mode 100644
index 000000000..0659895d3
--- /dev/null
+++ b/tests/lib/adk/test_pydantic_ai_turn.py
@@ -0,0 +1,276 @@
+"""Tests for PydanticAITurn and pydantic_ai_usage_to_turn_usage."""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+from pydantic_ai.run import AgentRunResult, AgentRunResultEvent
+from pydantic_ai.usage import RunUsage
+from pydantic_ai.messages import (
+    TextPart,
+    PartEndEvent,
+    TextPartDelta,
+    PartDeltaEvent,
+    PartStartEvent,
+)
+
+from agentex.lib.core.harness import HarnessTurn
+from agentex.lib.adk._modules._pydantic_ai_turn import (
+    PydanticAITurn,
+    pydantic_ai_usage_to_turn_usage,
+)
+
+
+async def _aiter(events: list[Any]) -> AsyncIterator[Any]:
+    for e in events:
+        yield e
+
+
+async def _collect(stream: AsyncIterator[Any]) -> list[Any]:
+    return [e async for e in stream]
+
+
+def _make_result_event(output: Any = "done", usage: RunUsage | None = None) -> AgentRunResultEvent:
+    result = AgentRunResult(output=output, _output_tool_name=None)
+    if usage is not None:
+        result._state.usage = usage
+    return AgentRunResultEvent(result=result)
+
+
+class TestUsageNormalization:
+    def test_usage_normalization_maps_fields(self):
+        """Real RunUsage fields map correctly onto TurnUsage."""
+        usage = RunUsage(
+            requests=3,
+            input_tokens=200,
+            output_tokens=80,
+            cache_read_tokens=25,
+        )
+        turn_usage = pydantic_ai_usage_to_turn_usage(usage, model="openai:gpt-4o")
+
+        assert turn_usage.model == "openai:gpt-4o"
+        assert turn_usage.input_tokens == 200
+        assert turn_usage.output_tokens == 80
+        assert turn_usage.num_llm_calls == 3
+
+    def test_total_tokens_is_computed(self):
+        """RunUsage.total_tokens is a computed property; we surface it correctly."""
+        usage = RunUsage(input_tokens=100, output_tokens=50)
+        turn_usage = pydantic_ai_usage_to_turn_usage(usage, model="openai:gpt-4o")
+        assert turn_usage.total_tokens == 150
+
+    def test_cache_read_tokens_mapped_to_cached_input_tokens(self):
+        usage = RunUsage(input_tokens=100, output_tokens=50, cache_read_tokens=20)
+        turn_usage = pydantic_ai_usage_to_turn_usage(usage, model="openai:gpt-4o")
+        assert turn_usage.cached_input_tokens == 20
+
+    def test_none_model(self):
+        """model=None is preserved."""
+        usage = RunUsage()
+        turn_usage = pydantic_ai_usage_to_turn_usage(usage, model=None)
+        assert turn_usage.model is None
+
+    def test_all_zero_usage_preserves_real_zeros(self):
+        """An all-zero RunUsage maps real 0s through (not None).
+
+        RunUsage token fields are ints defaulting to 0. A 0 is a genuine
+        value (e.g. a cache-hit with 0 output tokens), not "unknown", so it
+        must survive normalization as 0 rather than being coerced to None.
+        """
+        usage = RunUsage()
+        turn_usage = pydantic_ai_usage_to_turn_usage(usage, model="openai:gpt-4o")
+        assert turn_usage.num_llm_calls == 0
+        assert turn_usage.input_tokens == 0
+        assert turn_usage.output_tokens == 0
+        assert turn_usage.cached_input_tokens == 0
+        assert turn_usage.total_tokens == 0
+
+    def test_missing_field_degrades_to_none(self):
+        """A usage object MISSING a field maps that field to None (defensive getattr).
+
+        Guards the version-rename guarantee: if pydantic-ai renames a field,
+        the absent attribute degrades to None rather than raising.
+        """
+
+        class StubUsage:
+            requests = 2
+            input_tokens = 100
+            # no output_tokens / cache_read_tokens / total_tokens attributes
+
+        turn_usage = pydantic_ai_usage_to_turn_usage(StubUsage(), model="openai:gpt-4o")
+        assert turn_usage.num_llm_calls == 2
+        assert turn_usage.input_tokens == 100
+        assert turn_usage.output_tokens is None
+        assert turn_usage.cached_input_tokens is None
+        assert turn_usage.total_tokens is None
+
+
+class TestPydanticAITurn:
+    async def test_turn_satisfies_harness_turn_protocol(self):
+        """PydanticAITurn is structurally compatible with HarnessTurn."""
+        turn = PydanticAITurn(_aiter([]), model="openai:gpt-4o")
+        assert isinstance(turn, HarnessTurn)
+
+    async def test_usage_before_exhaustion_returns_default(self):
+        """usage() before iterating events returns default TurnUsage (model set, tokens None)."""
+        result_event = _make_result_event(usage=RunUsage(requests=1, input_tokens=100, output_tokens=40))
+        events = [result_event]
+        turn = PydanticAITurn(_aiter(events), model="openai:gpt-4o")
+
+        # Do NOT exhaust events — check usage pre-run
+        pre_usage = turn.usage()
+        assert pre_usage.model == "openai:gpt-4o"
+        assert pre_usage.input_tokens is None
+        assert pre_usage.output_tokens is None
+        assert pre_usage.num_llm_calls == 0
+
+    async def test_turn_events_and_usage(self):
+        """Driving events to exhaustion populates usage from the terminal event."""
+        known_usage = RunUsage(
+            requests=2,
+            input_tokens=300,
+            output_tokens=120,
+            cache_read_tokens=30,
+        )
+        result_event = _make_result_event(usage=known_usage)
+        events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hi")),
+            PartEndEvent(index=0, part=TextPart(content="hi")),
+            result_event,
+        ]
+        turn = PydanticAITurn(_aiter(events), model="openai:gpt-4o")
+
+        collected = await _collect(turn.events)
+
+        # Events match bare converter output (Start + Delta + Done = 3 events)
+        assert len(collected) == 3
+
+        # Usage is populated after exhaustion
+        usage = turn.usage()
+        assert usage.model == "openai:gpt-4o"
+        assert usage.input_tokens == 300
+        assert usage.output_tokens == 120
+        assert usage.cached_input_tokens == 30
+        assert usage.num_llm_calls == 2
+        assert usage.total_tokens == 420
+
+    async def test_events_match_bare_converter(self):
+        """Yielded events are identical to bare convert_pydantic_ai_to_agentex_events output."""
+        from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events
+
+        text_events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Hello")),
+            PartEndEvent(index=0, part=TextPart(content="Hello")),
+        ]
+
+        turn = PydanticAITurn(_aiter(text_events), model="openai:gpt-4o")
+        turn_out = await _collect(turn.events)
+
+        bare_out = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(text_events)))
+
+        assert len(turn_out) == len(bare_out)
+        for a, b in zip(turn_out, bare_out):
+            assert type(a) is type(b)
+            assert a.model_dump() == b.model_dump()
+
+    async def test_usage_captured_via_real_usage_accessor(self):
+        """Drive the turn through the REAL ``result.usage`` property accessor.
+
+        The production code reads ``getattr(run_result, "usage", None)``, which
+        on this pydantic-ai version resolves the ``_DeprecatedCallableRunUsage``
+        property (NOT ``_state.usage`` directly). This asserts that the real
+        accessor path the converter uses captures the run usage. Constructing
+        the event without our test's ``_state`` shortcut: we set ``_state.usage``
+        only because that is the sole supported way to seed an
+        ``AgentRunResult``, but we then assert capture happens through the
+        public ``.usage`` attribute access (verified below).
+        """
+        known_usage = RunUsage(requests=4, input_tokens=512, output_tokens=64)
+        result = AgentRunResult(output="done", _output_tool_name=None)
+        result._state.usage = known_usage
+        result_event = AgentRunResultEvent(result=result)
+
+        # Sanity: the value is reachable via the real public accessor the
+        # production code uses (not just via the private _state). The
+        # _DeprecatedCallableRunUsage property wraps the value, so compare by
+        # equality rather than identity.
+        accessed = getattr(result_event.result, "usage", None)
+        assert accessed is not None
+        assert accessed.input_tokens == 512
+        assert accessed.requests == 4
+
+        events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartEndEvent(index=0, part=TextPart(content="")),
+            result_event,
+        ]
+        turn = PydanticAITurn(_aiter(events), model="anthropic:claude-3-5-sonnet")
+        await _collect(turn.events)
+
+        usage = turn.usage()
+        assert usage.model == "anthropic:claude-3-5-sonnet"
+        assert usage.input_tokens == 512
+        assert usage.output_tokens == 64
+        assert usage.num_llm_calls == 4
+
+    async def test_no_usage_event_leaves_default_usage(self):
+        """If the stream has no AgentRunResultEvent, usage() returns the default (tokens None)."""
+        events = [
+            PartStartEvent(index=0, part=TextPart(content="")),
+            PartEndEvent(index=0, part=TextPart(content="")),
+        ]
+        turn = PydanticAITurn(_aiter(events), model="openai:gpt-4o")
+        await _collect(turn.events)
+
+        usage = turn.usage()
+        assert usage.model == "openai:gpt-4o"
+        assert usage.input_tokens is None
+        assert usage.num_llm_calls == 0
+
+
+class TestToolRequestStreaming:
+    """PydanticAITurn.events equals the bare converter output unconditionally.
+
+    The foundation auto_send delivers Start+ToolRequestDelta+Done natively
+    (AGX1-377), so no coalescing is needed on either channel.
+    """
+
+    async def test_events_match_bare_converter_for_streamed_tool_call(self):
+        """PydanticAITurn yields a ToolRequestDelta for a streamed-args tool call
+        — i.e. it is byte-for-byte the bare converter output, preserving
+        argument-token streaming on the sync/yield channel."""
+        from pydantic_ai.messages import ToolCallPart, ToolCallPartDelta
+
+        from agentex.types.tool_request_delta import ToolRequestDelta
+        from agentex.types.task_message_update import StreamTaskMessageDelta
+        from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events
+
+        tool_events = [
+            PartStartEvent(index=0, part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1")),
+            PartDeltaEvent(index=0, delta=ToolCallPartDelta(args_delta='{"city":"Paris"}')),
+            PartEndEvent(
+                index=0,
+                part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="c1"),
+            ),
+        ]
+
+        turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o")
+        turn_out = await _collect(turn.events)
+
+        bare_out = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(tool_events)))
+
+        # Turn is identical to the bare converter.
+        assert len(turn_out) == len(bare_out)
+        for a, b in zip(turn_out, bare_out):
+            assert type(a) is type(b)
+            assert a.model_dump() == b.model_dump()
+
+        # The arg-streaming delta is present.
+        deltas = [
+            e for e in turn_out if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, ToolRequestDelta)
+        ]
+        assert len(deltas) == 1, "streamed tool-call args must surface as a ToolRequestDelta"
+        assert isinstance(deltas[0].delta, ToolRequestDelta)
+        assert deltas[0].delta.arguments_delta == '{"city":"Paris"}'
diff --git a/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py b/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py
new file mode 100644
index 000000000..ca8234fda
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py
@@ -0,0 +1,194 @@
+"""Cross-channel conformance fixtures derived from real pydantic-ai event sequences.
+
+Each fixture is built by running a pydantic_ai event stream through PydanticAITurn
+and collecting the canonical StreamTaskMessage* output. These canonical event lists are
+then registered with the conformance runner and exercised by the cross-channel test
+(yield_events vs auto_send).
+
+Streamed tool requests
+----------------------
+The pydantic-ai stream emits a tool REQUEST as Start + ToolRequestDelta + Done (not a
+Full event). AGX1-377 has landed: both the conformance runner and auto_send now deliver
+the Start+Delta+Done(tool_request) shape, so the cross-channel test asserts full
+delivery-equivalence for streamed tool requests. The fixtures below retain the
+ToolRequestDelta events as the streamed tool-request inputs.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Any, AsyncIterator
+
+import pytest
+from pydantic_ai.messages import (
+    TextPart,
+    PartEndEvent,
+    ThinkingPart,
+    ToolCallPart,
+    TextPartDelta,
+    PartDeltaEvent,
+    PartStartEvent,
+    ToolReturnPart,
+    ThinkingPartDelta,
+    ToolCallPartDelta,
+    FunctionToolResultEvent,
+)
+
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+from .runner import (
+    Fixture,
+    register,
+    derive_all,
+    run_cross_channel_conformance,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _aiter(events: list[Any]) -> AsyncIterator[Any]:
+    for e in events:
+        yield e
+
+
+async def _canonical(pydantic_events: list[Any]) -> list[Any]:
+    """Run pydantic_ai events through PydanticAITurn and collect the output.
+
+    The output equals the bare convert_pydantic_ai_to_agentex_events output.
+    """
+    turn = PydanticAITurn(_aiter(pydantic_events), model=None)
+    return [e async for e in turn.events]
+
+
+def _build_fixtures() -> list[Fixture]:
+    """Build all pydantic-ai conformance fixtures synchronously via asyncio.run."""
+
+    # ------------------------------------------------------------------ #
+    # 1. Text-only run: simple streaming text response.
+    # ------------------------------------------------------------------ #
+    text_only_pydantic = [
+        PartStartEvent(index=0, part=TextPart(content="")),
+        PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Hello, ")),
+        PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="world!")),
+        PartEndEvent(index=0, part=TextPart(content="Hello, world!")),
+    ]
+
+    # ------------------------------------------------------------------ #
+    # 2. Single tool call + tool response.
+    # The canonical stream emits Start+ToolRequestDelta+Done for the request
+    # and Full(ToolResponseContent) for the response. See AGX1-377 note above
+    # for why the request delivery is not yet asserted cross-channel.
+    # ------------------------------------------------------------------ #
+    tool_call_pydantic = [
+        PartStartEvent(
+            index=0,
+            part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="call_01"),
+        ),
+        PartDeltaEvent(
+            index=0,
+            delta=ToolCallPartDelta(args_delta='{"city":"Paris"}', tool_call_id="call_01"),
+        ),
+        PartEndEvent(
+            index=0,
+            part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="call_01"),
+        ),
+        FunctionToolResultEvent(
+            part=ToolReturnPart(tool_name="get_weather", content="Sunny, 22C", tool_call_id="call_01"),
+        ),
+    ]
+
+    # ------------------------------------------------------------------ #
+    # 3. Reasoning/thinking block: produces ReasoningContent Start+Delta+Done.
+    # ------------------------------------------------------------------ #
+    reasoning_pydantic = [
+        PartStartEvent(index=0, part=ThinkingPart(content="")),
+        PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta="First, let me think...")),
+        PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta=" Then conclude.")),
+        PartEndEvent(index=0, part=ThinkingPart(content="First, let me think... Then conclude.")),
+    ]
+
+    # ------------------------------------------------------------------ #
+    # 4. Multi-step run: text -> tool call + response -> text.
+    # Pydantic AI restarts part indices at 0 for each model response; the
+    # converter assigns globally-monotonic indices to Agentex messages.
+    # ------------------------------------------------------------------ #
+    multi_step_pydantic = [
+        # First model turn: text then tool call
+        PartStartEvent(index=0, part=TextPart(content="")),
+        PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Let me check the weather.")),
+        PartEndEvent(index=0, part=TextPart(content="Let me check the weather.")),
+        PartStartEvent(
+            index=1,
+            part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="call_ms1"),
+        ),
+        PartDeltaEvent(
+            index=1,
+            delta=ToolCallPartDelta(args_delta='{"city":"London"}', tool_call_id="call_ms1"),
+        ),
+        PartEndEvent(
+            index=1,
+            part=ToolCallPart(tool_name="get_weather", args='{"city":"London"}', tool_call_id="call_ms1"),
+        ),
+        FunctionToolResultEvent(
+            part=ToolReturnPart(tool_name="get_weather", content="Cloudy, 15C", tool_call_id="call_ms1"),
+        ),
+        # Second model turn: text response (pydantic restarts index at 0)
+        PartStartEvent(index=0, part=TextPart(content="")),
+        PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="It's cloudy and 15C in London.")),
+        PartEndEvent(index=0, part=TextPart(content="It's cloudy and 15C in London.")),
+    ]
+
+    text_only_events = asyncio.run(_canonical(text_only_pydantic))
+    tool_call_events = asyncio.run(_canonical(tool_call_pydantic))
+    reasoning_events = asyncio.run(_canonical(reasoning_pydantic))
+    multi_step_events = asyncio.run(_canonical(multi_step_pydantic))
+
+    return [
+        Fixture(name="pydantic-ai-text-only", events=text_only_events),
+        Fixture(name="pydantic-ai-single-tool-call", events=tool_call_events),
+        Fixture(name="pydantic-ai-reasoning-block", events=reasoning_events),
+        Fixture(name="pydantic-ai-multi-step", events=multi_step_events),
+    ]
+
+
+_FIXTURES: list[Fixture] = _build_fixtures()
+
+for _f in _FIXTURES:
+    register(_f)
+
+
+# ---------------------------------------------------------------------------
+# Cross-channel conformance: logical equivalence + span equivalence
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("fixture", _FIXTURES, ids=lambda f: f.name)
+@pytest.mark.asyncio
+async def test_cross_channel_equivalence(fixture: Fixture) -> None:
+    """Assert that yield_events and auto_send produce equivalent logical
+    deliveries and identical span signals for each pydantic-ai fixture.
+
+    See runner.py for the full contract. The AGX1-377 note at the top of this
+    module explains why streamed-tool-request delivery is not yet asserted.
+    """
+    yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
+
+    assert yield_deliveries == auto_deliveries, (
+        f"[{fixture.name}] logical deliveries differ:\n  yield:     {yield_deliveries}\n  auto_send: {auto_deliveries}"
+    )
+    assert yield_spans == auto_spans, (
+        f"[{fixture.name}] span signals differ:\n  yield:     {yield_spans}\n  auto_send: {auto_spans}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Backward-compatible determinism guard
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("fixture", _FIXTURES, ids=lambda f: f.name)
+def test_span_derivation_is_deterministic(fixture: Fixture) -> None:
+    """Span derivation over the same event list is idempotent."""
+    assert derive_all(fixture.events) == derive_all(fixture.events)
diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_async.py b/tests/lib/core/harness/test_harness_pydantic_ai_async.py
new file mode 100644
index 000000000..8bda7d020
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_pydantic_ai_async.py
@@ -0,0 +1,361 @@
+"""Integration test: async (Redis-streaming) channel with a pydantic-ai agent.
+
+Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + PydanticAITurn)
+with a minimal pydantic-ai agent backed by TestModel so the test runs fully
+offline (no API keys, no Redis, no Agentex server).
+
+Agent description
+-----------------
+Same single-tool agent as the sync test: ``get_weather(city: str) -> str``
+returning "sunny and 72F". TestModel is configured to call the tool once then
+produce a fixed text reply.
+
+The async path uses the bare PydanticAITurn (no coalescing): the foundation
+auto_send delivers streamed tool-request Start+ToolRequestDelta+Done messages
+natively (AGX1-377 fix), so no coalescing wrapper is needed.
+
+What is tested
+--------------
+- The async handler pushes the correct sequence of messages to the fake streaming
+  backend: tool_request + tool_response + text (in that order).
+- final_text equals the TestModel custom output.
+- With a SpanTracer, tool spans are derived and forwarded to the fake tracing
+  backend (streamed tool-request delivery now triggers span derivation on the
+  async path).
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Actual Redis streaming (requires a running Redis instance).
+- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle.
+- Multi-turn history persistence via adk.state.
+- Real LLM calls or production model behaviour.
+- The full FastACP async request lifecycle.
+
+See also: test_harness_pydantic_ai_sync.py (span derivation with sync path) and
+test_harness_pydantic_ai_temporal.py (temporal activity path).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+from pydantic_ai import Agent
+from pydantic_ai.models.test import TestModel
+
+from agentex.types.task_message import TaskMessage
+from agentex.lib.core.harness.types import TurnResult
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+# ---------------------------------------------------------------------------
+# Minimal agent under test
+# ---------------------------------------------------------------------------
+
+
+def _make_agent() -> Agent:
+    """Build a pydantic-ai agent with one weather tool and a TestModel."""
+    model = TestModel(
+        call_tools=["get_weather"],
+        custom_output_text="The weather in Paris is sunny and 72F.",
+    )
+    agent: Agent = Agent(model)
+
+    @agent.tool_plain
+    def get_weather(city: str) -> str:
+        """Get the current weather for a city."""
+        return f"The weather in {city} is sunny and 72F"
+
+    return agent
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend (replaces adk.streaming; no Redis required)
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    """Minimal StreamingTaskMessageContext fake."""
+
+    def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None:
+        self.sink = sink
+        self.ctype = ctype
+        self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content)
+
+    async def __aenter__(self) -> "_FakeCtx":
+        self.sink.append(("open", self.ctype, self.task_message.content))
+        return self
+
+    async def __aexit__(self, *args: Any) -> bool:
+        await self.close()
+        return False
+
+    async def close(self) -> None:
+        self.sink.append(("close", self.ctype))
+
+    async def stream_update(self, update: Any) -> Any:
+        self.sink.append(("delta", self.ctype, update))
+        return update
+
+
+class _FakeStreaming:
+    """Fake streaming backend; records every context lifecycle event."""
+
+    def __init__(self) -> None:
+        self.sink: list[Any] = []
+        self.messages_opened: list[Any] = []
+
+    def streaming_task_message_context(
+        self,
+        task_id: str,
+        initial_content: Any,
+        streaming_mode: str = "coalesced",
+        created_at: Any = None,
+    ) -> _FakeCtx:
+        ctype = getattr(initial_content, "type", None) or ""
+        self.messages_opened.append(initial_content)
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Fake tracing backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeSpan:
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self.output: Any = None
+
+
+class _FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, str | None]] = []
+        self.ended: list[tuple[str, Any]] = []
+
+    async def start_span(
+        self,
+        *,
+        trace_id: str,
+        name: str,
+        input: Any = None,
+        parent_id: Any = None,
+        data: Any = None,
+        task_id: Any = None,
+    ) -> _FakeSpan:
+        self.started.append((name, parent_id))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None:
+        self.ended.append((span.name, span.output))
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _run_auto_send_turn(
+    agent: Agent,
+    user_msg: str = "What is the weather in Paris?",
+    trace_id: str | None = None,
+    parent_span_id: str | None = None,
+    fake_tracing: _FakeTracing | None = None,
+) -> tuple[TurnResult, _FakeStreaming]:
+    """Drive the async (auto_send) path and return the TurnResult + fake streaming state."""
+    fake_streaming = _FakeStreaming()
+
+    tracer: SpanTracer | bool | None = None
+    if trace_id and fake_tracing is not None:
+        tracer = SpanTracer(
+            trace_id=trace_id,
+            parent_span_id=parent_span_id,
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+
+    async with agent.run_stream_events(user_msg) as stream:
+        turn = PydanticAITurn(
+            stream,
+            model="test",
+        )
+        emitter = UnifiedEmitter(
+            task_id="task1",
+            trace_id=trace_id,
+            parent_span_id=parent_span_id,
+            tracer=tracer if tracer is not None else False,
+            streaming=fake_streaming,
+        )
+        result = await emitter.auto_send_turn(turn)
+
+    return result, fake_streaming
+
+
+# ---------------------------------------------------------------------------
+# Tests: message order and content
+# ---------------------------------------------------------------------------
+
+
+class TestAsyncAutoSendMessageOrder:
+    """auto_send pushes messages to the streaming backend in canonical order."""
+
+    async def test_tool_request_pushed_first(self) -> None:
+        """tool_request is the first message type pushed to the streaming backend."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+        assert "tool_request" in message_types
+        assert message_types.index("tool_request") < message_types.index("tool_response"), (
+            "tool_request must be pushed before tool_response"
+        )
+
+    async def test_tool_response_pushed_after_tool_request(self) -> None:
+        """tool_response appears after tool_request in the pushed messages."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+        assert "tool_response" in message_types
+
+    async def test_text_pushed_last(self) -> None:
+        """Text content is the last type pushed (after tool round-trip)."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+        assert message_types[-1] == "text", f"Expected last message type=text, got {message_types}"
+
+    async def test_exactly_three_messages(self) -> None:
+        """Exactly three message contexts are opened: tool_request, tool_response, text."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        assert len(fake_streaming.messages_opened) == 3, (
+            f"Expected 3 messages (tool_request + tool_response + text), "
+            f"got {len(fake_streaming.messages_opened)}: "
+            f"{[getattr(m, 'type', None) for m in fake_streaming.messages_opened]}"
+        )
+
+
+class TestAsyncAutoSendContentVerification:
+    """The content pushed to the streaming backend is correct."""
+
+    async def test_tool_request_content(self) -> None:
+        """The pushed tool_request is a ToolRequestContent for get_weather."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        tool_reqs = [m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)]
+        assert len(tool_reqs) == 1, "Expected exactly one ToolRequestContent"
+        assert tool_reqs[0].name == "get_weather"
+
+    async def test_tool_response_content(self) -> None:
+        """The pushed tool_response is a ToolResponseContent containing the weather result."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)]
+        assert len(tool_resps) == 1, "Expected exactly one ToolResponseContent"
+        assert isinstance(tool_resps[0].content, str)
+        assert "72F" in tool_resps[0].content
+        assert tool_resps[0].name == "get_weather"
+
+    async def test_tool_call_ids_match(self) -> None:
+        """tool_request and tool_response have the same tool_call_id."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent))
+        tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent))
+        assert tool_req.tool_call_id == tool_resp.tool_call_id, (
+            "tool_request and tool_response must share the same tool_call_id"
+        )
+
+
+class TestAsyncAutoSendFinalText:
+    """auto_send_turn returns the accumulated text from the last text part."""
+
+    async def test_final_text_matches_model_output(self) -> None:
+        """TurnResult.final_text equals the TestModel custom_output_text."""
+        agent = _make_agent()
+        result, _ = await _run_auto_send_turn(agent)
+        assert result.final_text == "The weather in Paris is sunny and 72F."
+
+    async def test_turn_result_has_usage(self) -> None:
+        """TurnResult carries a TurnUsage object (may have None tokens from TestModel)."""
+        agent = _make_agent()
+        result, _ = await _run_auto_send_turn(agent)
+        assert result.usage is not None
+
+    async def test_context_lifecycle_open_then_close(self) -> None:
+        """Every message context is opened then closed (no leak)."""
+        agent = _make_agent()
+        _, fake_streaming = await _run_auto_send_turn(agent)
+
+        opens = [e for e in fake_streaming.sink if e[0] == "open"]
+        closes = [e for e in fake_streaming.sink if e[0] == "close"]
+        assert len(opens) == len(closes) == 3, "Each of the 3 messages must have exactly one open and one close"
+
+
+class TestAsyncAutoSendSpanDerivation:
+    """Span derivation on the async path now works for streamed tool requests.
+
+    The foundation auto_send delivers Start+ToolRequestDelta+Done natively
+    (AGX1-377 fix). The SpanDeriver opens a tool span on Done(tool_request),
+    so the async path now derives spans just like the sync path.
+    """
+
+    async def test_tool_span_derived_on_async_path(self) -> None:
+        """With the bare PydanticAITurn (no coalescing), a tool span is derived
+        on the async/auto_send path when auto_send delivers the streamed
+        Start+ToolRequestDelta+Done sequence."""
+        agent = _make_agent()
+        fake_tracing = _FakeTracing()
+        tracer = SpanTracer(
+            trace_id="trace1",
+            parent_span_id="parent",
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+        fake_streaming = _FakeStreaming()
+
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id="trace1",
+                parent_span_id="parent",
+                tracer=tracer,
+                streaming=fake_streaming,
+            )
+            await emitter.auto_send_turn(turn)
+
+        assert len(fake_tracing.started) == 1, (
+            "Expected one tool span to be started for the get_weather call."
+        )
+        assert fake_tracing.started[0][0] == "get_weather"
+        assert len(fake_tracing.ended) == 1
+
+
+@pytest.mark.parametrize(
+    "user_msg",
+    [
+        "What is the weather in Paris?",
+        "Tell me the weather in London.",
+    ],
+)
+async def test_async_handler_pushes_messages_for_various_inputs(user_msg: str) -> None:
+    """auto_send pushes at least tool_request + tool_response + text for any input."""
+    agent = _make_agent()
+    result, fake_streaming = await _run_auto_send_turn(agent, user_msg=user_msg)
+
+    message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+    assert "tool_request" in message_types
+    assert "tool_response" in message_types
+    assert "text" in message_types
+    assert isinstance(result.final_text, str)
+    assert len(result.final_text) > 0
diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_sync.py b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py
new file mode 100644
index 000000000..1557d0dd1
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py
@@ -0,0 +1,388 @@
+"""Integration test: sync (HTTP-yield) channel with a pydantic-ai agent.
+
+Exercises the unified harness surface (UnifiedEmitter.yield_turn + PydanticAITurn)
+with a minimal pydantic-ai agent backed by TestModel so the test runs fully
+offline (no API keys, no live infrastructure).
+
+Agent description
+-----------------
+A single-tool agent with ``get_weather(city: str) -> str`` that always returns
+"sunny and 72F". TestModel is configured to call that tool once then produce
+a fixed text reply, giving a deterministic event sequence.
+
+What is tested
+--------------
+- The sync handler correctly yields StreamTaskMessage* events in order:
+  tool_request (Start+Done) then tool_response (Full) then text (Start+Delta+Done).
+- Final accumulated text equals the TestModel custom output.
+- With a trace_id + fake tracing, a tool span is opened (OpenSpan) and
+  closed (CloseSpan) — proving the SpanDeriver is wired on the yield path.
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Actual HTTP streaming over the ACP sync endpoint (requires a running
+  Agentex server + deployed agent).
+- Real LLM calls or production model behaviour.
+- The full FastACP request/response lifecycle.
+
+See also: tests/lib/core/harness/test_harness_pydantic_ai_async.py and
+test_harness_pydantic_ai_temporal.py for the other two channels.
+"""
+
+from __future__ import annotations
+
+from typing import Any, override
+
+import pytest
+from pydantic_ai import Agent
+from pydantic_ai.models.test import TestModel
+
+from agentex.types.text_delta import TextDelta
+from agentex.lib.core.harness.types import OpenSpan, CloseSpan
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+# ---------------------------------------------------------------------------
+# Minimal agent under test
+# ---------------------------------------------------------------------------
+
+
+def _make_agent() -> Agent:
+    """Build a pydantic-ai agent with one weather tool and a TestModel.
+
+    TestModel is instantiated with call_tools=['get_weather'] so it always
+    invokes the tool once, then emits custom_output_text as the reply.
+    """
+    model = TestModel(
+        call_tools=["get_weather"],
+        custom_output_text="The weather in Paris is sunny and 72F.",
+    )
+    agent: Agent = Agent(model)
+
+    @agent.tool_plain
+    def get_weather(city: str) -> str:
+        """Get the current weather for a city."""
+        return f"The weather in {city} is sunny and 72F"
+
+    return agent
+
+
+# ---------------------------------------------------------------------------
+# Fake tracing backend (no network calls)
+# ---------------------------------------------------------------------------
+
+
+class _FakeSpan:
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self.output: Any = None
+
+
+class _FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, str | None]] = []
+        self.ended: list[tuple[str, Any]] = []
+
+    async def start_span(
+        self,
+        *,
+        trace_id: str,
+        name: str,
+        input: Any = None,
+        parent_id: Any = None,
+        data: Any = None,
+        task_id: Any = None,
+    ) -> _FakeSpan:
+        self.started.append((name, parent_id))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None:
+        self.ended.append((span.name, span.output))
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _run_yield_turn(
+    agent: Agent,
+    user_msg: str = "What is the weather in Paris?",
+    trace_id: str | None = None,
+    parent_span_id: str | None = None,
+    fake_tracing: _FakeTracing | None = None,
+) -> list[Any]:
+    """Drive the sync (yield) path and collect all yielded events."""
+    tracer: SpanTracer | bool | None = None
+    if trace_id and fake_tracing is not None:
+        tracer = SpanTracer(
+            trace_id=trace_id,
+            parent_span_id=parent_span_id,
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+
+    events: list[Any] = []
+    async with agent.run_stream_events(user_msg) as stream:
+        turn = PydanticAITurn(stream, model="test")
+        emitter = UnifiedEmitter(
+            task_id="task1",
+            trace_id=trace_id,
+            parent_span_id=parent_span_id,
+            tracer=tracer if tracer is not None else False,
+        )
+        events = [ev async for ev in emitter.yield_turn(turn)]
+    return events
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestSyncYieldEventOrder:
+    """The yield channel forwards events in canonical order."""
+
+    async def test_tool_request_precedes_tool_response(self) -> None:
+        """tool_request events appear before the tool_response Full event."""
+        agent = _make_agent()
+        events = await _run_yield_turn(agent)
+
+        content_types = [
+            getattr(getattr(ev, "content", None), "type", None)
+            for ev in events
+            if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull))
+        ]
+        assert "tool_request" in content_types
+        assert "tool_response" in content_types
+        tool_req_idx = content_types.index("tool_request")
+        tool_resp_idx = content_types.index("tool_response")
+        assert tool_req_idx < tool_resp_idx, "tool_request must appear before tool_response in the event stream"
+
+    async def test_text_appears_after_tool_response(self) -> None:
+        """Text content (Start/Done) comes after the tool_response Full event."""
+        agent = _make_agent()
+        events = await _run_yield_turn(agent)
+
+        full_types = [
+            getattr(getattr(ev, "content", None), "type", None)
+            for ev in events
+            if isinstance(ev, StreamTaskMessageFull)
+        ]
+        start_types = [
+            getattr(getattr(ev, "content", None), "type", None)
+            for ev in events
+            if isinstance(ev, StreamTaskMessageStart)
+        ]
+
+        assert "tool_response" in full_types
+        assert "text" in start_types
+
+        tool_resp_pos = next(
+            i
+            for i, ev in enumerate(events)
+            if isinstance(ev, StreamTaskMessageFull)
+            and getattr(getattr(ev, "content", None), "type", None) == "tool_response"
+        )
+        text_start_pos = next(
+            i
+            for i, ev in enumerate(events)
+            if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text"
+        )
+        assert tool_resp_pos < text_start_pos
+
+    async def test_tool_response_carries_weather_result(self) -> None:
+        """The ToolResponseContent contains the get_weather return value."""
+        agent = _make_agent()
+        events = await _run_yield_turn(agent)
+
+        full_events = [
+            ev
+            for ev in events
+            if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent)
+        ]
+        assert len(full_events) >= 1, "Expected at least one tool_response Full event"
+        tool_response = full_events[0].content
+        assert isinstance(tool_response, ToolResponseContent)
+        assert isinstance(tool_response.content, str)
+        assert "72F" in tool_response.content
+        assert tool_response.name == "get_weather"
+
+    async def test_accumulated_text_matches_model_output(self) -> None:
+        """Accumulated text deltas equal the TestModel custom_output_text."""
+        from agentex.types.task_message_update import StreamTaskMessageDelta
+
+        agent = _make_agent()
+        events = await _run_yield_turn(agent)
+
+        accumulated = "".join(
+            ev.delta.text_delta
+            for ev in events
+            if isinstance(ev, StreamTaskMessageDelta) and isinstance(ev.delta, TextDelta) and ev.delta.text_delta
+        )
+        assert accumulated == "The weather in Paris is sunny and 72F."
+
+    async def test_every_start_has_matching_done(self) -> None:
+        """Every StreamTaskMessageStart has a corresponding StreamTaskMessageDone."""
+        agent = _make_agent()
+        events = await _run_yield_turn(agent)
+
+        starts = {ev.index for ev in events if isinstance(ev, StreamTaskMessageStart)}
+        dones = {ev.index for ev in events if isinstance(ev, StreamTaskMessageDone)}
+        assert starts == dones, f"Unmatched Start/Done indices: starts={starts} dones={dones}"
+
+
+class TestSyncYieldSpanDerivation:
+    """SpanDeriver is wired on the yield path; tool spans are opened/closed."""
+
+    async def test_tool_span_opened_and_closed(self) -> None:
+        """One tool span is opened and closed per tool call."""
+        agent = _make_agent()
+        fake_tracing = _FakeTracing()
+        tracer = SpanTracer(
+            trace_id="trace1",
+            parent_span_id="parent-span",
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id="trace1",
+                parent_span_id="parent-span",
+                tracer=tracer,
+            )
+            await emitter.yield_turn(turn).__anext__.__self__ if False else None
+            [_ async for _ in emitter.yield_turn(turn)]
+
+        assert len(fake_tracing.started) == 1, "Expected exactly one tool span opened"
+        assert len(fake_tracing.ended) == 1, "Expected exactly one tool span closed"
+        span_name, parent_id = fake_tracing.started[0]
+        assert span_name == "get_weather"
+        assert parent_id == "parent-span"
+
+    async def test_tool_span_output_is_tool_result(self) -> None:
+        """The closed tool span's output equals the tool's return value."""
+        agent = _make_agent()
+        fake_tracing = _FakeTracing()
+        tracer = SpanTracer(
+            trace_id="trace1",
+            parent_span_id="parent-span",
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id="trace1",
+                parent_span_id="parent-span",
+                tracer=tracer,
+            )
+            [_ async for _ in emitter.yield_turn(turn)]
+
+        name, output = fake_tracing.ended[0]
+        assert name == "get_weather"
+        assert output is not None
+        assert "72F" in str(output)
+
+    async def test_no_trace_id_means_no_spans(self) -> None:
+        """With trace_id=None, no spans are derived (emitter disables tracing)."""
+        agent = _make_agent()
+        fake_tracing = _FakeTracing()
+
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id=None,
+                parent_span_id=None,
+                tracing=fake_tracing,
+            )
+            [_ async for _ in emitter.yield_turn(turn)]
+
+        assert fake_tracing.started == []
+        assert fake_tracing.ended == []
+
+    async def test_tracer_false_suppresses_spans(self) -> None:
+        """tracer=False disables span derivation regardless of trace_id."""
+        agent = _make_agent()
+        fake_tracing = _FakeTracing()
+
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id="trace1",
+                parent_span_id="parent-span",
+                tracer=False,
+                tracing=fake_tracing,
+            )
+            [_ async for _ in emitter.yield_turn(turn)]
+
+        assert fake_tracing.started == []
+        assert fake_tracing.ended == []
+
+    async def test_span_signal_types(self) -> None:
+        """The signals received by the tracer are OpenSpan then CloseSpan."""
+        from agentex.lib.core.harness.tracer import SpanTracer as RealTracer
+
+        received_signals: list[Any] = []
+
+        class _RecordingTracer(RealTracer):
+            @override
+            async def handle(self, signal: Any) -> None:
+                received_signals.append(signal)
+                await super().handle(signal)
+
+        fake_tracing = _FakeTracing()
+        tracer = _RecordingTracer(
+            trace_id="trace1",
+            parent_span_id="parent",
+            task_id="task1",
+            tracing=fake_tracing,
+        )
+
+        agent = _make_agent()
+        async with agent.run_stream_events("What is the weather in Paris?") as stream:
+            turn = PydanticAITurn(stream, model="test")
+            emitter = UnifiedEmitter(
+                task_id="task1",
+                trace_id="trace1",
+                parent_span_id="parent",
+                tracer=tracer,
+            )
+            [_ async for _ in emitter.yield_turn(turn)]
+
+        assert len(received_signals) == 2
+        assert isinstance(received_signals[0], OpenSpan)
+        assert isinstance(received_signals[1], CloseSpan)
+        assert received_signals[0].name == "get_weather"
+
+
+@pytest.mark.parametrize(
+    "user_msg",
+    [
+        "What is the weather in Paris?",
+        "Tell me the weather in London.",
+    ],
+)
+async def test_sync_handler_produces_events_for_various_inputs(user_msg: str) -> None:
+    """Yield path produces at least a tool_response Full for any user message."""
+    agent = _make_agent()
+    events = await _run_yield_turn(agent, user_msg=user_msg)
+
+    full_event_types = [
+        getattr(getattr(ev, "content", None), "type", None) for ev in events if isinstance(ev, StreamTaskMessageFull)
+    ]
+    assert "tool_response" in full_event_types
diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_temporal.py b/tests/lib/core/harness/test_harness_pydantic_ai_temporal.py
new file mode 100644
index 000000000..0ead8e832
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_pydantic_ai_temporal.py
@@ -0,0 +1,370 @@
+"""Integration test: Temporal-backed pydantic-ai agent, offline.
+
+Exercises the core of the Temporal pydantic-ai harness path — the
+event_stream_handler activity — with a TemporalAgent backed by TestModel so the
+test runs fully offline (no Temporal server, no Redis, no API keys).
+
+Architecture overview
+---------------------
+In a real Temporal deployment the pydantic-ai Temporal harness runs like this:
+
+    HTTP POST /task/event/send
+        -> @workflow.signal on At110PydanticAiWorkflow
+        -> temporal_agent.run(user_message, deps=TaskDeps(...))
+            internally schedules:
+            1. request_activity (LLM HTTP call — recorded by Temporal)
+            2. call_tool_activity (for each tool call — also recorded)
+            3. event_stream_handler_activity (streams events to Redis)
+
+The third activity is what we test here: it receives a
+``RunContext[TaskDeps]`` and an ``AsyncIterable[AgentStreamEvent]`` from
+pydantic-ai, calls ``stream_pydantic_ai_events`` (which internally constructs
+a ``UnifiedEmitter`` + ``PydanticAITurn`` and calls ``auto_send_turn``), and
+pushes the resulting messages to Redis.
+
+What we test
+-----------
+Since ``TemporalAgent.run_stream_events`` works offline with TestModel (it does
+not schedule Temporal activities — it runs in-process), we can:
+
+1. Build a TemporalAgent with TestModel.
+2. Call ``run_stream_events`` on it directly, just as the event_stream_handler
+   would see the event iterable.
+3. Feed that stream into ``stream_pydantic_ai_events`` backed by a fake streaming
+   backend, and assert the canonical message sequence.
+
+This covers the full inner harness chain that the Temporal workflow exercises,
+minus the Temporal scheduling/durability layer itself.
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Temporal scheduling (the workflow.signal -> activity dispatch chain).
+- Temporal durability guarantees and replay behaviour.
+- Redis streaming (requires a running Redis instance).
+- Multi-turn history (pydantic-ai message_history round-tripping via Temporal
+  workflow state).
+- Real LLM calls or production model behaviour.
+- The full temporal_agent.run(...) path, which schedules activities and cannot
+  run without a connected Temporal client.
+
+To test with live infrastructure: spin up Temporal + Redis + the ACP server +
+the Temporal worker, then use the AsyncAgentex client to create a task, send a
+message, and poll for messages — exactly as the existing examples/tutorials/
+10_async/10_temporal/110_pydantic_ai/tests/test_agent.py does.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+from pydantic import BaseModel
+from pydantic_ai import Agent
+from pydantic_ai.models.test import TestModel
+from pydantic_ai.durable_exec.temporal import TemporalAgent
+
+from agentex.types.task_message import TaskMessage
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn
+
+# ---------------------------------------------------------------------------
+# Agent under test (mirrors examples/tutorials/10_async/10_temporal/110_pydantic_ai)
+# ---------------------------------------------------------------------------
+
+
+class TaskDeps(BaseModel):
+    """Per-run dependencies injected via RunContext.deps."""
+
+    task_id: str
+    parent_span_id: str | None = None
+
+
+def _make_temporal_agent() -> TemporalAgent[TaskDeps, str]:
+    """Build a TemporalAgent with TestModel and one weather tool.
+
+    The underlying pydantic-ai Agent is constructed with TaskDeps as the
+    deps_type, mirroring the real temporal tutorial agent. TestModel makes
+    the run deterministic and offline.
+    """
+    model = TestModel(
+        call_tools=["get_weather"],
+        custom_output_text="The weather in Paris is sunny and 72F.",
+    )
+    base: Agent[TaskDeps, str] = Agent(model, deps_type=TaskDeps)
+
+    @base.tool_plain
+    def get_weather(city: str) -> str:
+        """Get the current weather for a city."""
+        return f"The weather in {city} is sunny and 72F"
+
+    return TemporalAgent(base, name="test_temporal_agent")
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None:
+        self.sink = sink
+        self.ctype = ctype
+        self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content)
+
+    async def __aenter__(self) -> "_FakeCtx":
+        self.sink.append(("open", self.ctype, self.task_message.content))
+        return self
+
+    async def __aexit__(self, *args: Any) -> bool:
+        await self.close()
+        return False
+
+    async def close(self) -> None:
+        self.sink.append(("close", self.ctype))
+
+    async def stream_update(self, update: Any) -> Any:
+        self.sink.append(("delta", self.ctype, update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self) -> None:
+        self.sink: list[Any] = []
+        self.messages_opened: list[Any] = []
+
+    def streaming_task_message_context(
+        self,
+        task_id: str,
+        initial_content: Any,
+        streaming_mode: str = "coalesced",
+        created_at: Any = None,
+    ) -> _FakeCtx:
+        ctype = getattr(initial_content, "type", None) or ""
+        self.messages_opened.append(initial_content)
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Helpers: the event_stream_handler pattern tested offline
+# ---------------------------------------------------------------------------
+
+
+async def _run_event_stream_handler(
+    temporal_agent: TemporalAgent[TaskDeps, str],
+    user_msg: str = "What is the weather in Paris?",
+    task_id: str = "task1",
+) -> _FakeStreaming:
+    """Simulate the event_stream_handler activity offline.
+
+    In production the event_stream_handler receives the event stream from
+    pydantic-ai's model activity and calls stream_pydantic_ai_events.
+    Here we obtain the stream directly from run_stream_events (which works
+    offline with TestModel) and forward it to stream_pydantic_ai_events backed
+    by a fake streaming backend.
+
+    This is equivalent to:
+        async def event_handler(ctx: RunContext[TaskDeps], events: AsyncIterable[AgentStreamEvent]) -> None:
+            await stream_pydantic_ai_events(events, ctx.deps.task_id)
+    but without requiring a running Temporal server.
+    """
+    fake_streaming = _FakeStreaming()
+
+    async with temporal_agent.run_stream_events(user_msg) as stream:
+        await _fake_stream_pydantic_ai_events(stream, task_id, fake_streaming)
+
+    return fake_streaming
+
+
+async def _fake_stream_pydantic_ai_events(
+    stream: Any,
+    task_id: str,
+    fake_streaming: _FakeStreaming,
+) -> str:
+    """Like stream_pydantic_ai_events but uses an injected fake streaming backend.
+
+    Mirrors the exact chain that stream_pydantic_ai_events uses internally:
+      PydanticAITurn(stream)
+      + UnifiedEmitter.auto_send_turn(turn)
+    but with the fake backend injected so no Redis is needed.
+    """
+    turn = PydanticAITurn(stream, model=None)
+    emitter = UnifiedEmitter(
+        task_id=task_id,
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+    return result.final_text
+
+
+# ---------------------------------------------------------------------------
+# Tests: TemporalAgent + event_stream_handler pattern
+# ---------------------------------------------------------------------------
+
+
+class TestTemporalEventStreamHandlerMessageOrder:
+    """The event_stream_handler pushes messages in canonical order."""
+
+    async def test_tool_request_before_tool_response(self) -> None:
+        """tool_request is pushed before tool_response."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+        assert "tool_request" in types
+        assert "tool_response" in types
+        assert types.index("tool_request") < types.index("tool_response")
+
+    async def test_text_is_last(self) -> None:
+        """Text content is pushed last (after the tool round-trip)."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+        assert types[-1] == "text"
+
+    async def test_exactly_three_messages(self) -> None:
+        """Exactly tool_request + tool_response + text are pushed."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        assert len(fake_streaming.messages_opened) == 3, (
+            f"Expected 3 messages, got {len(fake_streaming.messages_opened)}: "
+            f"{[getattr(m, 'type', None) for m in fake_streaming.messages_opened]}"
+        )
+
+
+class TestTemporalEventStreamHandlerContent:
+    """Content verification for the messages pushed by the event_stream_handler."""
+
+    async def test_tool_request_is_get_weather(self) -> None:
+        """The pushed tool_request is for the get_weather function."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        tool_reqs = [m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)]
+        assert len(tool_reqs) == 1
+        assert tool_reqs[0].name == "get_weather"
+
+    async def test_tool_response_contains_weather_result(self) -> None:
+        """The pushed tool_response contains the get_weather return value."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)]
+        assert len(tool_resps) == 1
+        assert isinstance(tool_resps[0].content, str)
+        assert "72F" in tool_resps[0].content
+        assert tool_resps[0].name == "get_weather"
+
+    async def test_tool_call_ids_match(self) -> None:
+        """tool_request and tool_response share the same tool_call_id."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent))
+        tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent))
+        assert tool_req.tool_call_id == tool_resp.tool_call_id
+
+
+class TestTemporalFinalText:
+    """stream_pydantic_ai_events returns the correct final text."""
+
+    async def test_final_text_matches_model_output(self) -> None:
+        """The returned final text equals the TestModel custom_output_text."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = _FakeStreaming()
+
+        async with temporal_agent.run_stream_events("What is the weather in Paris?") as stream:
+            final = await _fake_stream_pydantic_ai_events(stream, "task1", fake_streaming)
+
+        assert final == "The weather in Paris is sunny and 72F."
+
+    async def test_context_lifecycle_complete(self) -> None:
+        """Every opened streaming context is also closed."""
+        temporal_agent = _make_temporal_agent()
+        fake_streaming = await _run_event_stream_handler(temporal_agent)
+
+        opens = [e for e in fake_streaming.sink if e[0] == "open"]
+        closes = [e for e in fake_streaming.sink if e[0] == "close"]
+        assert len(opens) == len(closes), "Every opened context must be closed"
+
+
+class TestTemporalAgentStreamEventsOffline:
+    """TemporalAgent.run_stream_events produces the expected raw pydantic-ai events.
+
+    This verifies that the TemporalAgent wrapper does not suppress event stream
+    delivery when used with TestModel, so the event_stream_handler pattern is
+    meaningful offline.
+    """
+
+    async def test_run_stream_events_yields_tool_call_and_text(self) -> None:
+        """TemporalAgent.run_stream_events with TestModel yields tool + text events."""
+
+        temporal_agent = _make_temporal_agent()
+        collected: list[Any] = []
+
+        async with temporal_agent.run_stream_events("What is the weather in Paris?") as stream:
+            async for ev in stream:
+                collected.append(ev)
+
+        event_types = {type(ev).__name__ for ev in collected}
+        assert "FunctionToolResultEvent" in event_types, "Expected FunctionToolResultEvent proving tool call ran"
+        assert "PartDeltaEvent" in event_types or "PartEndEvent" in event_types, (
+            "Expected text part events in the stream"
+        )
+
+    async def test_run_stream_events_contains_tool_result(self) -> None:
+        """The raw event stream contains a FunctionToolResultEvent with the tool output."""
+        from pydantic_ai.messages import FunctionToolResultEvent
+
+        temporal_agent = _make_temporal_agent()
+
+        async with temporal_agent.run_stream_events("What is the weather in Paris?") as stream:
+            events = [ev async for ev in stream]
+
+        tool_results = [ev for ev in events if isinstance(ev, FunctionToolResultEvent)]
+        assert len(tool_results) >= 1
+        assert isinstance(tool_results[0].part.content, str)
+        assert "72F" in tool_results[0].part.content
+
+
+class TestTemporalLiveInfraNote:
+    """Placeholder tests documenting what requires live Temporal infrastructure.
+
+    These tests are skipped by design. They document the gap between what the
+    offline tests cover and what a full integration test would exercise.
+    """
+
+    @pytest.mark.skip(
+        reason=(
+            "Requires live Temporal server + Redis + ACP server + worker. "
+            "See examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py "
+            "for the live integration test that exercises this path end-to-end."
+        )
+    )
+    async def test_temporal_workflow_full_round_trip(self) -> None:
+        """Full Temporal workflow: create_task -> send_event -> poll_messages."""
+        pass  # Covered by the live tutorial test
+
+
+@pytest.mark.parametrize(
+    "user_msg",
+    [
+        "What is the weather in Paris?",
+        "Tell me the weather in London.",
+    ],
+)
+async def test_temporal_handler_pushes_messages_for_various_inputs(user_msg: str) -> None:
+    """event_stream_handler pushes tool_request + tool_response + text for any input."""
+    temporal_agent = _make_temporal_agent()
+    fake_streaming = await _run_event_stream_handler(temporal_agent, user_msg=user_msg)
+
+    types = [getattr(m, "type", None) for m in fake_streaming.messages_opened]
+    assert "tool_request" in types
+    assert "tool_response" in types
+    assert "text" in types

From d10e1510bd5da44ad5acc5cac638750122083fce Mon Sep 17 00:00:00 2001
From: Declan Brady <declan.brady@scale.com>
Date: Mon, 22 Jun 2026 18:21:25 -0400
Subject: [PATCH 05/10] feat(openai-agents): migrate onto the unified harness
 surface (#416)

---
 .../00_sync/060_harness_openai/.dockerignore  |  43 +++
 .../00_sync/060_harness_openai/Dockerfile     |  50 ++++
 .../00_sync/060_harness_openai/README.md      |  35 +++
 .../00_sync/060_harness_openai/manifest.yaml  |  58 ++++
 .../060_harness_openai/project/__init__.py    |   0
 .../00_sync/060_harness_openai/project/acp.py |  87 ++++++
 .../060_harness_openai/project/agent.py       |  47 +++
 .../060_harness_openai/project/tools.py       |  19 ++
 .../00_sync/060_harness_openai/pyproject.toml |  36 +++
 .../060_harness_openai/tests/test_agent.py    |  48 +++
 .../00_base/130_harness_openai/.dockerignore  |  43 +++
 .../00_base/130_harness_openai/Dockerfile     |  50 ++++
 .../00_base/130_harness_openai/README.md      |  33 +++
 .../00_base/130_harness_openai/manifest.yaml  |  58 ++++
 .../130_harness_openai/project/__init__.py    |   0
 .../00_base/130_harness_openai/project/acp.py |  98 ++++++
 .../130_harness_openai/project/agent.py       |  43 +++
 .../130_harness_openai/project/tools.py       |  15 +
 .../00_base/130_harness_openai/pyproject.toml |  36 +++
 .../130_harness_openai/tests/test_agent.py    |  77 +++++
 .../140_harness_openai/.dockerignore          |  43 +++
 .../10_temporal/140_harness_openai/Dockerfile |  43 +++
 .../10_temporal/140_harness_openai/README.md  |  41 +++
 .../140_harness_openai/environments.yaml      |  64 ++++
 .../140_harness_openai/manifest.yaml          |  62 ++++
 .../140_harness_openai/project/__init__.py    |   0
 .../140_harness_openai/project/acp.py         |  33 +++
 .../140_harness_openai/project/activities.py  |  75 +++++
 .../140_harness_openai/project/agent.py       |  44 +++
 .../140_harness_openai/project/run_worker.py  |  44 +++
 .../140_harness_openai/project/tools.py       |  15 +
 .../140_harness_openai/project/workflow.py    | 121 ++++++++
 .../140_harness_openai/pyproject.toml         |  38 +++
 .../140_harness_openai/tests/test_agent.py    |  77 +++++
 .../lib/adk/providers/_modules/openai_turn.py | 134 +++++++++
 .../adk/providers/_modules/sync_provider.py   | 103 ++++---
 .../lib/core/services/adk/providers/openai.py | 280 +++---------------
 .../adk/providers/test_openai_activities.py   | 170 ++++++++++-
 tests/lib/adk/providers/test_openai_turn.py   | 246 +++++++++++++++
 .../conformance/test_openai_conformance.py    | 206 +++++++++++++
 40 files changed, 2430 insertions(+), 285 deletions(-)
 create mode 100644 examples/tutorials/00_sync/060_harness_openai/.dockerignore
 create mode 100644 examples/tutorials/00_sync/060_harness_openai/Dockerfile
 create mode 100644 examples/tutorials/00_sync/060_harness_openai/README.md
 create mode 100644 examples/tutorials/00_sync/060_harness_openai/manifest.yaml
 create mode 100644 examples/tutorials/00_sync/060_harness_openai/project/__init__.py
 create mode 100644 examples/tutorials/00_sync/060_harness_openai/project/acp.py
 create mode 100644 examples/tutorials/00_sync/060_harness_openai/project/agent.py
 create mode 100644 examples/tutorials/00_sync/060_harness_openai/project/tools.py
 create mode 100644 examples/tutorials/00_sync/060_harness_openai/pyproject.toml
 create mode 100644 examples/tutorials/00_sync/060_harness_openai/tests/test_agent.py
 create mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/.dockerignore
 create mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile
 create mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/README.md
 create mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/manifest.yaml
 create mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/project/__init__.py
 create mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/project/acp.py
 create mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/project/agent.py
 create mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/project/tools.py
 create mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/pyproject.toml
 create mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/tests/test_agent.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/README.md
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/manifest.yaml
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/project/__init__.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/project/acp.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/project/activities.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/project/agent.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/project/run_worker.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/project/tools.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/project/workflow.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/pyproject.toml
 create mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/tests/test_agent.py
 create mode 100644 src/agentex/lib/adk/providers/_modules/openai_turn.py
 create mode 100644 tests/lib/adk/providers/test_openai_turn.py
 create mode 100644 tests/lib/core/harness/conformance/test_openai_conformance.py

diff --git a/examples/tutorials/00_sync/060_harness_openai/.dockerignore b/examples/tutorials/00_sync/060_harness_openai/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/00_sync/060_harness_openai/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/00_sync/060_harness_openai/Dockerfile b/examples/tutorials/00_sync/060_harness_openai/Dockerfile
new file mode 100644
index 000000000..1bd4f4860
--- /dev/null
+++ b/examples/tutorials/00_sync/060_harness_openai/Dockerfile
@@ -0,0 +1,50 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+# Copy pyproject.toml and README.md to install dependencies
+COPY 00_sync/060_harness_openai/pyproject.toml /app/060_harness_openai/pyproject.toml
+COPY 00_sync/060_harness_openai/README.md /app/060_harness_openai/README.md
+
+WORKDIR /app/060_harness_openai
+
+# Copy the project code
+COPY 00_sync/060_harness_openai/project /app/060_harness_openai/project
+
+# Copy the test files
+COPY 00_sync/060_harness_openai/tests /app/060_harness_openai/tests
+
+# Copy shared test utilities
+COPY test_utils /app/test_utils
+
+# Install the required Python packages with dev dependencies
+RUN uv pip install --system .[dev]
+
+# Set environment variables
+ENV PYTHONPATH=/app
+
+# Set test environment variables
+ENV AGENT_NAME=s060-harness-openai
+
+# Run the agent using uvicorn
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/00_sync/060_harness_openai/README.md b/examples/tutorials/00_sync/060_harness_openai/README.md
new file mode 100644
index 000000000..e22e9aa8b
--- /dev/null
+++ b/examples/tutorials/00_sync/060_harness_openai/README.md
@@ -0,0 +1,35 @@
+# Sync OpenAI Agents on the unified harness surface
+
+A sync (HTTP) Agentex agent that runs the OpenAI Agents SDK and delivers its
+output through the **unified harness surface**.
+
+## What this demonstrates
+
+The OpenAI Agents SDK produces native streaming events. This tutorial wraps a
+`Runner.run_streamed` result in an `OpenAITurn` — the provider -> canonical
+`StreamTaskMessage*` adapter — and forwards the canonical stream to the frontend
+via `UnifiedEmitter.yield_turn`. The same `OpenAITurn` flows unchanged through
+`auto_send_turn` in the async (`130_harness_openai`) and temporal
+(`140_harness_openai`) variants; only the delivery method differs.
+
+```python
+result = Runner.run_streamed(starting_agent=agent, input=user_message)
+turn = OpenAITurn(result=result, model="gpt-4o")
+emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, parent_span_id=parent_span_id)
+async for event in emitter.yield_turn(turn):
+    yield event
+```
+
+## Run it
+
+```bash
+agentex agents run --manifest manifest.yaml
+```
+
+## Test it
+
+The offline test exercises the harness wiring without a server or API key:
+
+```bash
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/00_sync/060_harness_openai/manifest.yaml b/examples/tutorials/00_sync/060_harness_openai/manifest.yaml
new file mode 100644
index 000000000..4967c1f8d
--- /dev/null
+++ b/examples/tutorials/00_sync/060_harness_openai/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../
+    include_paths:
+      - 00_sync/060_harness_openai
+      - test_utils
+    dockerfile: 00_sync/060_harness_openai/Dockerfile
+    dockerignore: 00_sync/060_harness_openai/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: sync
+  name: s060-harness-openai
+  description: A sync OpenAI Agents SDK agent on the unified harness surface
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "s060-harness-openai"
+      description: "A sync OpenAI Agents SDK agent on the unified harness surface"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/00_sync/060_harness_openai/project/__init__.py b/examples/tutorials/00_sync/060_harness_openai/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/00_sync/060_harness_openai/project/acp.py b/examples/tutorials/00_sync/060_harness_openai/project/acp.py
new file mode 100644
index 000000000..caaa0b132
--- /dev/null
+++ b/examples/tutorials/00_sync/060_harness_openai/project/acp.py
@@ -0,0 +1,87 @@
+"""ACP handler for the sync OpenAI Agents harness tutorial.
+
+This is the API layer. It runs the OpenAI Agents SDK via ``Runner.run_streamed``,
+wraps the streamed run in an ``OpenAITurn`` (the provider -> canonical
+``StreamTaskMessage*`` adapter), and forwards the canonical stream to the
+Agentex frontend via ``UnifiedEmitter.yield_turn`` — the same harness surface
+used by the async and temporal variants of this tutorial.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import AsyncGenerator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from agents import Runner
+
+from agentex.lib import adk
+from project.agent import MODEL_NAME, create_agent
+from agentex.lib.types.acp import SendMessageParams
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_update import TaskMessageUpdate
+from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client
+# compatibility, so the same example works behind the Scale LiteLLM gateway.
+_litellm_key = os.environ.get("LITELLM_API_KEY")
+if _litellm_key and not os.environ.get("OPENAI_API_KEY"):
+    os.environ["OPENAI_API_KEY"] = _litellm_key
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(acp_type="sync")
+
+_agent = None
+
+
+def get_agent():
+    """Get or create the OpenAI Agents SDK agent instance."""
+    global _agent
+    if _agent is None:
+        _agent = create_agent()
+    return _agent
+
+
+@acp.on_message_send
+async def handle_message_send(
+    params: SendMessageParams,
+) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]:
+    """Handle incoming messages, streaming tokens and tool calls via the harness."""
+    agent = get_agent()
+    task_id = params.task.id
+    user_message = params.content.content
+    logger.info(f"Processing message for task {task_id}")
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        result = Runner.run_streamed(starting_agent=agent, input=user_message)
+        turn = OpenAITurn(result=result, model=MODEL_NAME)
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        async for event in emitter.yield_turn(turn):
+            yield event
diff --git a/examples/tutorials/00_sync/060_harness_openai/project/agent.py b/examples/tutorials/00_sync/060_harness_openai/project/agent.py
new file mode 100644
index 000000000..3611012fe
--- /dev/null
+++ b/examples/tutorials/00_sync/060_harness_openai/project/agent.py
@@ -0,0 +1,47 @@
+"""OpenAI Agents SDK agent definition for the harness tutorial.
+
+The agent is the boundary between this module and the API layer (acp.py).
+The OpenAI Agents SDK runs its own tool-call loop internally; acp.py wraps a
+``Runner.run_streamed`` result with ``OpenAITurn`` so it flows through the
+unified harness surface.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from agents import Agent, function_tool, set_tracing_disabled
+
+from project.tools import get_weather
+
+# Disable the openai-agents SDK's native tracer so it doesn't ship traces to
+# api.openai.com (the key may be a gateway/proxy key). Agentex tracing still
+# runs via the harness + tracing manager configured in acp.py.
+set_tracing_disabled(True)
+
+MODEL_NAME = "gpt-4o"
+INSTRUCTIONS = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use the weather tool when the user asks about the weather
+- Always report the real tool output back to the user
+"""
+
+
+@function_tool
+def weather(city: str) -> str:
+    """Get the current weather for a city."""
+    return get_weather(city)
+
+
+def create_agent() -> Agent:
+    """Build and return the OpenAI Agents SDK agent with the weather tool."""
+    return Agent(
+        name="Harness OpenAI Assistant",
+        model=MODEL_NAME,
+        instructions=INSTRUCTIONS.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
+        tools=[weather],
+    )
diff --git a/examples/tutorials/00_sync/060_harness_openai/project/tools.py b/examples/tutorials/00_sync/060_harness_openai/project/tools.py
new file mode 100644
index 000000000..b03aa7c31
--- /dev/null
+++ b/examples/tutorials/00_sync/060_harness_openai/project/tools.py
@@ -0,0 +1,19 @@
+"""Tool definitions for the OpenAI Agents harness tutorial.
+
+The bare function lives here so it's easy to unit-test; it's wrapped as an
+OpenAI Agents SDK ``function_tool`` in ``project.agent``.
+"""
+
+from __future__ import annotations
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
diff --git a/examples/tutorials/00_sync/060_harness_openai/pyproject.toml b/examples/tutorials/00_sync/060_harness_openai/pyproject.toml
new file mode 100644
index 000000000..39cceb8f2
--- /dev/null
+++ b/examples/tutorials/00_sync/060_harness_openai/pyproject.toml
@@ -0,0 +1,36 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "s060-harness-openai"
+version = "0.1.0"
+description = "A sync OpenAI Agents SDK agent on the unified harness surface"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "openai-agents",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
diff --git a/examples/tutorials/00_sync/060_harness_openai/tests/test_agent.py b/examples/tutorials/00_sync/060_harness_openai/tests/test_agent.py
new file mode 100644
index 000000000..960b232b7
--- /dev/null
+++ b/examples/tutorials/00_sync/060_harness_openai/tests/test_agent.py
@@ -0,0 +1,48 @@
+"""Offline test for the sync OpenAI Agents harness tutorial.
+
+This test does NOT require a running Agentex server or an OpenAI API key. It
+verifies the harness wiring this tutorial demonstrates: an ``OpenAITurn`` built
+from an injected canonical ``StreamTaskMessage*`` stream, forwarded through
+``UnifiedEmitter.yield_turn`` (the sync HTTP ACP delivery path), passes the
+events through unchanged.
+
+To run: ``pytest tests/test_agent.py -v``
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+
+
+async def _canonical_stream(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_yield_turn_forwards_canonical_stream():
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hi")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o")
+    # trace_id=None disables tracing, so no Agentex server is needed.
+    emitter = UnifiedEmitter(task_id="task-1", trace_id=None, parent_span_id=None)
+
+    out = [e async for e in emitter.yield_turn(turn)]
+    assert out == events
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/.dockerignore b/examples/tutorials/10_async/00_base/130_harness_openai/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_harness_openai/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile b/examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile
new file mode 100644
index 000000000..a31c89a31
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile
@@ -0,0 +1,50 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+# Copy pyproject.toml and README.md to install dependencies
+COPY 10_async/00_base/130_harness_openai/pyproject.toml /app/130_harness_openai/pyproject.toml
+COPY 10_async/00_base/130_harness_openai/README.md /app/130_harness_openai/README.md
+
+WORKDIR /app/130_harness_openai
+
+# Copy the project code
+COPY 10_async/00_base/130_harness_openai/project /app/130_harness_openai/project
+
+# Copy the test files
+COPY 10_async/00_base/130_harness_openai/tests /app/130_harness_openai/tests
+
+# Copy shared test utilities
+COPY test_utils /app/test_utils
+
+# Install the required Python packages with dev dependencies
+RUN uv pip install --system .[dev] pytest-asyncio httpx
+
+# Set environment variables
+ENV PYTHONPATH=/app
+
+# Set test environment variables
+ENV AGENT_NAME=ab130-harness-openai
+
+# Run the agent using uvicorn
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/README.md b/examples/tutorials/10_async/00_base/130_harness_openai/README.md
new file mode 100644
index 000000000..ac439e4ed
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_harness_openai/README.md
@@ -0,0 +1,33 @@
+# Async OpenAI Agents on the unified harness surface
+
+An async (Redis-streaming) Agentex agent that runs the OpenAI Agents SDK and
+delivers its output through the **unified harness surface**.
+
+## What this demonstrates
+
+Same `OpenAITurn` adapter as the sync tutorial (`060_harness_openai`), but the
+async ACP pushes the turn to the task stream via
+`UnifiedEmitter.auto_send_turn` instead of yielding over HTTP. `auto_send_turn`
+returns a `TurnResult` with the accumulated final text and normalized usage.
+
+```python
+result = Runner.run_streamed(starting_agent=agent, input=user_message)
+turn = OpenAITurn(result=result, model="gpt-4o")
+emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, parent_span_id=parent_span_id)
+turn_result = await emitter.auto_send_turn(turn)
+```
+
+## Run it
+
+```bash
+agentex agents run --manifest manifest.yaml
+```
+
+## Test it
+
+The offline test exercises the auto-send delivery path with an injected fake
+streaming backend (no server, Redis, or API key required):
+
+```bash
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/manifest.yaml b/examples/tutorials/10_async/00_base/130_harness_openai/manifest.yaml
new file mode 100644
index 000000000..7e67675fa
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_harness_openai/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/00_base/130_harness_openai
+      - test_utils
+    dockerfile: 10_async/00_base/130_harness_openai/Dockerfile
+    dockerignore: 10_async/00_base/130_harness_openai/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: async
+  name: ab130-harness-openai
+  description: An async OpenAI Agents SDK agent on the unified harness surface
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "ab130-harness-openai"
+      description: "An async OpenAI Agents SDK agent on the unified harness surface"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/__init__.py b/examples/tutorials/10_async/00_base/130_harness_openai/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/acp.py b/examples/tutorials/10_async/00_base/130_harness_openai/project/acp.py
new file mode 100644
index 000000000..fcd10cc62
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_harness_openai/project/acp.py
@@ -0,0 +1,98 @@
+"""ACP handler for the async OpenAI Agents harness tutorial.
+
+Uses the async ACP model with Redis streaming instead of HTTP yields. The
+OpenAI Agents SDK run is wrapped in an ``OpenAITurn`` and pushed to the task
+stream via ``UnifiedEmitter.auto_send_turn`` — the async/temporal delivery path
+of the unified harness surface. ``auto_send_turn`` returns a ``TurnResult``
+carrying the accumulated final text and normalized usage.
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from agents import Runner
+
+from agentex.lib import adk
+from project.agent import MODEL_NAME, create_agent
+from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
+from agentex.lib.types.fastacp import AsyncACPConfig
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+_litellm_key = os.environ.get("LITELLM_API_KEY")
+if _litellm_key and not os.environ.get("OPENAI_API_KEY"):
+    os.environ["OPENAI_API_KEY"] = _litellm_key
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(
+    acp_type="async",
+    config=AsyncACPConfig(type="base"),
+)
+
+_agent = None
+
+
+def get_agent():
+    global _agent
+    if _agent is None:
+        _agent = create_agent()
+    return _agent
+
+
+@acp.on_task_create
+async def handle_task_create(params: CreateTaskParams):
+    logger.info(f"Task created: {params.task.id}")
+
+
+@acp.on_task_event_send
+async def handle_task_event_send(params: SendEventParams):
+    """Handle each user message: run the agent and auto-send its turn."""
+    agent = get_agent()
+    task_id = params.task.id
+    user_message = params.event.content.content
+
+    logger.info(f"Processing message for task {task_id}")
+
+    # Echo the user's message into the task history.
+    await adk.messages.create(task_id=task_id, content=params.event.content)
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        result = Runner.run_streamed(starting_agent=agent, input=user_message)
+        turn = OpenAITurn(result=result, model=MODEL_NAME)
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        turn_result = await emitter.auto_send_turn(turn)
+        if turn_span:
+            turn_span.output = {"final_output": turn_result.final_text}
+
+
+@acp.on_task_cancel
+async def handle_task_canceled(params: CancelTaskParams):
+    logger.info(f"Task canceled: {params.task.id}")
diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/agent.py b/examples/tutorials/10_async/00_base/130_harness_openai/project/agent.py
new file mode 100644
index 000000000..5b83c5aab
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_harness_openai/project/agent.py
@@ -0,0 +1,43 @@
+"""OpenAI Agents SDK agent definition for the async harness tutorial.
+
+Identical agent shape to the sync tutorial (060). The only difference is the
+delivery path in acp.py: the async ACP uses ``UnifiedEmitter.auto_send_turn``
+(Redis streaming) instead of yielding events over an HTTP response.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from agents import Agent, function_tool, set_tracing_disabled
+
+from project.tools import get_weather
+
+set_tracing_disabled(True)
+
+MODEL_NAME = "gpt-4o"
+INSTRUCTIONS = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use the weather tool when the user asks about the weather
+- Always report the real tool output back to the user
+"""
+
+
+@function_tool
+def weather(city: str) -> str:
+    """Get the current weather for a city."""
+    return get_weather(city)
+
+
+def create_agent() -> Agent:
+    """Build and return the OpenAI Agents SDK agent with the weather tool."""
+    return Agent(
+        name="Harness OpenAI Assistant",
+        model=MODEL_NAME,
+        instructions=INSTRUCTIONS.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
+        tools=[weather],
+    )
diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/tools.py b/examples/tutorials/10_async/00_base/130_harness_openai/project/tools.py
new file mode 100644
index 000000000..d2e5468c9
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_harness_openai/project/tools.py
@@ -0,0 +1,15 @@
+"""Tool definitions for the async OpenAI Agents harness tutorial."""
+
+from __future__ import annotations
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/pyproject.toml b/examples/tutorials/10_async/00_base/130_harness_openai/pyproject.toml
new file mode 100644
index 000000000..c05e8c1c6
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_harness_openai/pyproject.toml
@@ -0,0 +1,36 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "ab130-harness-openai"
+version = "0.1.0"
+description = "An async OpenAI Agents SDK agent on the unified harness surface"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "openai-agents",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/tests/test_agent.py b/examples/tutorials/10_async/00_base/130_harness_openai/tests/test_agent.py
new file mode 100644
index 000000000..ceb95dbab
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_harness_openai/tests/test_agent.py
@@ -0,0 +1,77 @@
+"""Offline test for the async OpenAI Agents harness tutorial.
+
+This test does NOT require a running Agentex server, Redis, or an OpenAI API
+key. It verifies the async delivery path this tutorial demonstrates: an
+``OpenAITurn`` built from an injected canonical stream, pushed through
+``UnifiedEmitter.auto_send_turn`` with an injected fake streaming backend,
+returns the accumulated final text.
+
+To run: ``pytest tests/test_agent.py -v``
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+
+
+class _FakeCtx:
+    def __init__(self, initial_content):
+        self.task_message = TaskMessage(id="m-1", task_id="task-1", content=initial_content)
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        pass
+
+    async def stream_update(self, update):
+        return update
+
+
+class _FakeStreaming:
+    def streaming_task_message_context(self, task_id, initial_content, **_kwargs):  # noqa: ARG002
+        return _FakeCtx(initial_content)
+
+
+async def _canonical_stream(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_auto_send_turn_returns_final_text():
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hel")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="lo")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o")
+    emitter = UnifiedEmitter(
+        task_id="task-1",
+        trace_id=None,
+        parent_span_id=None,
+        streaming=_FakeStreaming(),
+    )
+
+    result = await emitter.auto_send_turn(turn)
+    assert result.final_text == "Hello"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore b/examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile b/examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile
new file mode 100644
index 000000000..c107e3269
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile
@@ -0,0 +1,43 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/10_temporal/140_harness_openai/pyproject.toml /app/140_harness_openai/pyproject.toml
+COPY 10_async/10_temporal/140_harness_openai/README.md /app/140_harness_openai/README.md
+
+WORKDIR /app/140_harness_openai
+
+COPY 10_async/10_temporal/140_harness_openai/project /app/140_harness_openai/project
+COPY 10_async/10_temporal/140_harness_openai/tests /app/140_harness_openai/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+
+ENV AGENT_NAME=at140-harness-openai
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
+
+# When we deploy the worker, we will replace the CMD with the following
+# CMD ["python", "-m", "run_worker"]
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/README.md b/examples/tutorials/10_async/10_temporal/140_harness_openai/README.md
new file mode 100644
index 000000000..0415ae225
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/README.md
@@ -0,0 +1,41 @@
+# Temporal OpenAI Agents on the unified harness surface
+
+A Temporal-backed Agentex agent that runs the OpenAI Agents SDK and delivers its
+output through the **unified harness surface**.
+
+## What this demonstrates
+
+LLM calls are non-deterministic, so they can't run directly in a Temporal
+workflow. This tutorial keeps the workflow (`project/workflow.py`)
+deterministic and delegates each turn to a custom activity
+(`project/activities.py`). The activity uses the SAME `OpenAITurn` adapter as
+the sync (`060_harness_openai`) and async (`130_harness_openai`) variants, and
+delivers via `UnifiedEmitter.auto_send_turn` — which is designed to run inside
+an activity (it writes streaming side effects to Redis and returns the final
+text + usage).
+
+```python
+# inside the activity:
+result = Runner.run_streamed(starting_agent=agent, input=user_message)
+turn = OpenAITurn(result=result, model="gpt-4o")
+emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=parent_span_id)
+turn_result = await emitter.auto_send_turn(turn)
+return turn_result.final_text
+```
+
+## Run it
+
+```bash
+agentex agents run --manifest manifest.yaml
+```
+
+This starts both the ACP HTTP server and the Temporal worker.
+
+## Test it
+
+The offline test exercises the activity's delivery path with an injected fake
+streaming backend (no server, Temporal, Redis, or API key required):
+
+```bash
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml b/examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml
new file mode 100644
index 000000000..f90511911
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml
@@ -0,0 +1,64 @@
+# Agent Environment Configuration
+# ------------------------------
+# This file defines environment-specific settings for your agent.
+# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment.
+
+# ********** EXAMPLE **********
+# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI
+# environments:
+#   dev:
+#     auth:
+#       principal:
+#         user_id: "1234567890"
+#         user_name: "John Doe"
+#         user_email: "john.doe@example.com"
+#         user_role: "admin"
+#       user_permissions: "read, write, delete"
+#     helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts
+#       replicas: 3
+#       resources:
+#         requests:
+#           cpu: "1000m"
+#           memory: "2Gi"
+#         limits:
+#           cpu: "2000m"
+#           memory: "4Gi"
+#       env:
+#         - name: LOG_LEVEL
+#           value: "DEBUG"
+#         - name: ENVIRONMENT
+#           value: "staging"
+#
+#     kubernetes: 
+#       # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived
+#       #   namespace and deploy it with in the same namespace that already exists for a separate agent.
+#       namespace: "team-example-tutorial"
+# ********** END EXAMPLE **********
+
+schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI
+environments:
+  dev:
+    auth:
+      principal:
+        user_id: # TODO: Fill in
+        account_id: # TODO: Fill in
+    helm_overrides: 
+      # This is used to override the global helm values.yaml file in the agentex-agent helm charts
+      replicaCount: 2
+      resources:
+        requests:
+          cpu: "500m"
+          memory: "1Gi"
+        limits:
+          cpu: "1000m"
+          memory: "2Gi"
+      temporal-worker:
+        enabled: true
+        replicaCount: 2
+        resources:
+          requests:
+            cpu: "500m"
+            memory: "1Gi"
+          limits:
+            cpu: "1000m"
+            memory: "2Gi"
\ No newline at end of file
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/manifest.yaml b/examples/tutorials/10_async/10_temporal/140_harness_openai/manifest.yaml
new file mode 100644
index 000000000..64a943438
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/manifest.yaml
@@ -0,0 +1,62 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/10_temporal/140_harness_openai
+      - test_utils
+    dockerfile: 10_async/10_temporal/140_harness_openai/Dockerfile
+    dockerignore: 10_async/10_temporal/140_harness_openai/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+    worker: project/run_worker.py
+
+agent:
+  acp_type: async
+  name: at140-harness-openai
+  description: A Temporal-backed OpenAI Agents SDK agent on the unified harness surface
+
+  temporal:
+    enabled: true
+    workflows:
+      - name: at140-harness-openai
+        queue_name: at140_harness_openai_queue
+
+  credentials:
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "at140-harness-openai"
+      description: "A Temporal-backed OpenAI Agents SDK agent on the unified harness surface"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/__init__.py b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/acp.py b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/acp.py
new file mode 100644
index 000000000..6076835ba
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/acp.py
@@ -0,0 +1,33 @@
+"""ACP server for the Temporal OpenAI Agents harness tutorial.
+
+Thin by design: with ``acp_type="async"`` + ``TemporalACPConfig``, FastACP
+auto-wires task/create, task/event/send, and task/cancel onto the workflow.
+The agent logic lives in ``project/workflow.py`` (deterministic) and
+``project/activities.py`` (the harness-backed LLM run), executed by the worker
+in ``project/run_worker.py``.
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from agentex.lib.types.fastacp import TemporalACPConfig
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+
+# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client
+# compatibility, so the same example works behind the Scale LiteLLM gateway.
+_litellm_key = os.environ.get("LITELLM_API_KEY")
+if _litellm_key and not os.environ.get("OPENAI_API_KEY"):
+    os.environ["OPENAI_API_KEY"] = _litellm_key
+
+acp = FastACP.create(
+    acp_type="async",
+    config=TemporalACPConfig(
+        type="temporal",
+        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
+    ),
+)
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/activities.py b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/activities.py
new file mode 100644
index 000000000..a70ee0c5d
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/activities.py
@@ -0,0 +1,75 @@
+"""Custom Temporal activity that runs the OpenAI agent on the harness surface.
+
+LLM calls are non-deterministic, so they must run inside a Temporal activity
+rather than directly in the workflow. This activity runs the OpenAI Agents SDK
+via ``Runner.run_streamed``, wraps the result in an ``OpenAITurn``, and pushes
+the canonical stream to the task stream via ``UnifiedEmitter.auto_send_turn``.
+
+``auto_send`` (which backs ``auto_send_turn``) is explicitly designed to be
+called from inside an activity: it writes streaming side effects to Redis and
+returns the accumulated final text + normalized usage.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+from agents import Runner
+from pydantic import BaseModel
+from temporalio import activity
+
+from project.agent import MODEL_NAME, create_agent
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+
+logger = make_logger(__name__)
+
+RUN_HARNESS_AGENT_ACTIVITY = "run_harness_openai_agent"
+
+
+class RunHarnessAgentParams(BaseModel):
+    """Parameters for the harness agent activity."""
+
+    task_id: str
+    user_message: str
+    # Prior conversation as OpenAI Agents SDK input items, so the agent sees the
+    # full history (not just the latest message) on every turn.
+    input_list: list[Any] = []
+    trace_id: str | None = None
+    parent_span_id: str | None = None
+
+
+class RunHarnessAgentResult(BaseModel):
+    """Result of one harness turn."""
+
+    final_text: str
+    # Updated conversation (prior history + this turn) to carry into the next turn.
+    input_list: list[Any]
+
+
+class HarnessActivities:
+    """Hosts the harness-backed OpenAI agent activity."""
+
+    @activity.defn(name=RUN_HARNESS_AGENT_ACTIVITY)
+    async def run_harness_openai_agent(self, params: RunHarnessAgentParams) -> RunHarnessAgentResult:
+        """Run the agent for one turn and auto-send its output.
+
+        Threads the running conversation through ``input_list`` so multi-turn
+        chats retain memory: prior history + the new user message go in, and the
+        updated conversation comes back out via ``result.to_input_list()``.
+        """
+        logger.info(f"Running harness OpenAI agent for task {params.task_id}")
+
+        agent = create_agent()
+        input_list: list[Any] = [*params.input_list, {"role": "user", "content": params.user_message}]
+        result = Runner.run_streamed(starting_agent=agent, input=input_list)
+        turn = OpenAITurn(result=result, model=MODEL_NAME)
+        emitter = UnifiedEmitter(
+            task_id=params.task_id,
+            trace_id=params.trace_id,
+            parent_span_id=params.parent_span_id,
+        )
+        turn_result = await emitter.auto_send_turn(turn)
+        # to_input_list() is valid now: auto_send_turn has exhausted the stream.
+        return RunHarnessAgentResult(final_text=turn_result.final_text, input_list=result.to_input_list())
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/agent.py b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/agent.py
new file mode 100644
index 000000000..385a80b69
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/agent.py
@@ -0,0 +1,44 @@
+"""OpenAI Agents SDK agent definition for the Temporal harness tutorial.
+
+Same agent shape as the sync (060) and async (130) variants. Here the agent is
+built and run inside a Temporal activity (see ``project.activities``); the
+workflow stays deterministic and delegates the non-deterministic LLM run to that
+activity, which delivers the turn via the unified harness surface.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+
+from agents import Agent, function_tool, set_tracing_disabled
+
+from project.tools import get_weather
+
+set_tracing_disabled(True)
+
+MODEL_NAME = "gpt-4o"
+INSTRUCTIONS = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use the weather tool when the user asks about the weather
+- Always report the real tool output back to the user
+"""
+
+
+@function_tool
+def weather(city: str) -> str:
+    """Get the current weather for a city."""
+    return get_weather(city)
+
+
+def create_agent() -> Agent:
+    """Build and return the OpenAI Agents SDK agent with the weather tool."""
+    return Agent(
+        name="Harness OpenAI Assistant",
+        model=MODEL_NAME,
+        instructions=INSTRUCTIONS.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
+        tools=[weather],
+    )
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/run_worker.py b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/run_worker.py
new file mode 100644
index 000000000..69586a395
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/run_worker.py
@@ -0,0 +1,44 @@
+"""Temporal worker for the OpenAI Agents harness tutorial.
+
+Runs as a separate long-lived process alongside the ACP HTTP server. Registers
+the built-in Agentex activities plus the custom harness agent activity
+(``HarnessActivities.run_harness_openai_agent``), and the workflow.
+"""
+
+import asyncio
+
+from project.workflow import At140HarnessOpenaiWorkflow
+from project.activities import HarnessActivities
+from agentex.lib.utils.debug import setup_debug_if_enabled
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.activities import get_all_activities
+from agentex.lib.core.temporal.workers.worker import AgentexWorker
+
+environment_variables = EnvironmentVariables.refresh()
+logger = make_logger(__name__)
+
+
+async def main():
+    setup_debug_if_enabled()
+
+    task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE
+    if task_queue_name is None:
+        raise ValueError("WORKFLOW_TASK_QUEUE is not set")
+
+    harness_activities = HarnessActivities()
+    all_activities = [
+        harness_activities.run_harness_openai_agent,
+        *get_all_activities(),
+    ]
+
+    worker = AgentexWorker(task_queue=task_queue_name)
+
+    await worker.run(
+        activities=all_activities,
+        workflow=At140HarnessOpenaiWorkflow,
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/tools.py b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/tools.py
new file mode 100644
index 000000000..d26f9b097
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/tools.py
@@ -0,0 +1,15 @@
+"""Tool definitions for the Temporal OpenAI Agents harness tutorial."""
+
+from __future__ import annotations
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/workflow.py b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/workflow.py
new file mode 100644
index 000000000..69ad7b365
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/workflow.py
@@ -0,0 +1,121 @@
+"""Temporal workflow for the OpenAI Agents harness tutorial.
+
+The workflow stays deterministic: it echoes the user message and delegates the
+non-deterministic LLM run to ``run_harness_openai_agent`` (see
+``project.activities``). That activity runs the OpenAI Agents SDK and delivers
+the turn through the unified harness surface (``OpenAITurn`` +
+``UnifiedEmitter.auto_send_turn``).
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from datetime import timedelta
+
+from temporalio import workflow
+from temporalio.common import RetryPolicy
+
+from agentex.lib import adk
+from project.activities import (
+    RUN_HARNESS_AGENT_ACTIVITY,
+    RunHarnessAgentParams,
+    RunHarnessAgentResult,
+)
+from agentex.lib.types.acp import SendEventParams, CreateTaskParams
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.types.text_content import TextContent
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.types.workflow import SignalName
+from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+environment_variables = EnvironmentVariables.refresh()
+
+if environment_variables.WORKFLOW_NAME is None:
+    raise ValueError("Environment variable WORKFLOW_NAME is not set")
+if environment_variables.AGENT_NAME is None:
+    raise ValueError("Environment variable AGENT_NAME is not set")
+
+logger = make_logger(__name__)
+
+
+@workflow.defn(name=environment_variables.WORKFLOW_NAME)
+class At140HarnessOpenaiWorkflow(BaseWorkflow):
+    """Long-running workflow that runs each turn through the harness activity."""
+
+    def __init__(self):
+        super().__init__(display_name=environment_variables.AGENT_NAME)
+        self._complete_task = False
+        self._turn_number = 0
+        # Running conversation (OpenAI Agents SDK input items) so each turn sees
+        # the full history, not just the latest user message.
+        self._messages: list = []
+
+    @workflow.signal(name=SignalName.RECEIVE_EVENT)
+    async def on_task_event_send(self, params: SendEventParams) -> None:
+        """Handle a user message: echo it, then run the harness activity durably."""
+        logger.info(f"Received task event: {params.task.id}")
+        self._turn_number += 1
+
+        # Echo the user's message so it shows up in the UI as a chat bubble.
+        await adk.messages.create(task_id=params.task.id, content=params.event.content)
+
+        async with adk.tracing.span(
+            trace_id=params.task.id,
+            task_id=params.task.id,
+            name=f"Turn {self._turn_number}",
+            input={"message": params.event.content.content},
+        ) as span:
+            turn_result = await workflow.execute_activity(
+                RUN_HARNESS_AGENT_ACTIVITY,
+                RunHarnessAgentParams(
+                    task_id=params.task.id,
+                    user_message=params.event.content.content,
+                    input_list=self._messages,
+                    trace_id=params.task.id,
+                    parent_span_id=span.id if span else None,
+                ),
+                start_to_close_timeout=timedelta(minutes=5),
+                retry_policy=RetryPolicy(maximum_attempts=3),
+                result_type=RunHarnessAgentResult,
+            )
+            # Carry the updated conversation into the next turn.
+            self._messages = turn_result.input_list
+            if span:
+                span.output = {"final_output": turn_result.final_text}
+
+    @workflow.run
+    async def on_task_create(self, params: CreateTaskParams) -> str:
+        """Workflow entry point — keep the conversation alive for incoming signals."""
+        logger.info(f"Task created: {params.task.id}")
+
+        await adk.messages.create(
+            task_id=params.task.id,
+            content=TextContent(
+                author="agent",
+                content=(
+                    f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n"
+                    f"Send me a message and I'll respond using an OpenAI Agents SDK agent "
+                    f"delivered through the unified harness surface."
+                ),
+            ),
+        )
+
+        await workflow.wait_condition(lambda: self._complete_task, timeout=None)
+        return "Task completed"
+
+    @workflow.signal
+    async def complete_task_signal(self) -> None:
+        """Graceful workflow shutdown signal."""
+        logger.info("Received complete_task signal")
+        self._complete_task = True
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/pyproject.toml b/examples/tutorials/10_async/10_temporal/140_harness_openai/pyproject.toml
new file mode 100644
index 000000000..5bf53f6be
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/pyproject.toml
@@ -0,0 +1,38 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "at140-harness-openai"
+version = "0.1.0"
+description = "A Temporal-backed OpenAI Agents SDK agent on the unified harness surface"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "temporalio>=1.18.2",
+    "openai-agents",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+    "debugpy>=1.8.15",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/140_harness_openai/tests/test_agent.py
new file mode 100644
index 000000000..dd043c44c
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_harness_openai/tests/test_agent.py
@@ -0,0 +1,77 @@
+"""Offline test for the Temporal OpenAI Agents harness tutorial.
+
+This test does NOT require a running Agentex server, Temporal, Redis, or an
+OpenAI API key. It verifies the delivery path the harness activity uses: an
+``OpenAITurn`` built from an injected canonical stream, pushed through
+``UnifiedEmitter.auto_send_turn`` with an injected fake streaming backend,
+returns the accumulated final text (which the activity returns to the workflow).
+
+To run: ``pytest tests/test_agent.py -v``
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+
+
+class _FakeCtx:
+    def __init__(self, initial_content):
+        self.task_message = TaskMessage(id="m-1", task_id="task-1", content=initial_content)
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        pass
+
+    async def stream_update(self, update):
+        return update
+
+
+class _FakeStreaming:
+    def streaming_task_message_context(self, task_id, initial_content, **_kwargs):  # noqa: ARG002
+        return _FakeCtx(initial_content)
+
+
+async def _canonical_stream(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_activity_delivery_returns_final_text():
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="72")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="F")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o")
+    emitter = UnifiedEmitter(
+        task_id="task-1",
+        trace_id=None,
+        parent_span_id=None,
+        streaming=_FakeStreaming(),
+    )
+
+    result = await emitter.auto_send_turn(turn)
+    assert result.final_text == "72F"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/src/agentex/lib/adk/providers/_modules/openai_turn.py b/src/agentex/lib/adk/providers/_modules/openai_turn.py
new file mode 100644
index 000000000..17a6518ee
--- /dev/null
+++ b/src/agentex/lib/adk/providers/_modules/openai_turn.py
@@ -0,0 +1,134 @@
+"""OpenAITurn: adapt an OpenAI Agents SDK streamed run onto the harness surface.
+
+A ``HarnessTurn`` exposes a single canonical ``StreamTaskMessage*`` stream plus
+normalized usage. ``OpenAITurn`` wraps a ``RunResultStreaming`` (from
+``Runner.run_streamed``), converts its native OpenAI events into the canonical
+stream via ``convert_openai_to_agentex_events``, and after exhaustion reads the
+run's ``raw_responses`` to aggregate usage into a provider-independent
+``TurnUsage``.
+
+Delivery (yield vs auto-send) and tracing are owned by ``UnifiedEmitter``; this
+module is purely the provider->canonical adapter.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, AsyncIterator
+
+from agents.usage import Usage
+
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage
+from agentex.lib.adk.providers._modules.sync_provider import (
+    convert_openai_to_agentex_events,
+)
+
+if TYPE_CHECKING:
+    from agents import ModelResponse, RunResultStreaming
+
+logger = make_logger(__name__)
+
+
+def openai_usage_to_turn_usage(usage: Usage | None, model: str | None) -> TurnUsage:
+    """Map an ``agents.Usage`` to a harness-independent ``TurnUsage``.
+
+    All field access is defensive (``getattr(..., None)``): different model
+    backends populate different subsets of the usage object, and real zeros are
+    valid values (e.g. 0 output tokens on a pure cache hit), so we never coerce
+    a present-but-zero value into ``None``.
+    """
+    if usage is None:
+        return TurnUsage(model=model)
+
+    input_details = getattr(usage, "input_tokens_details", None)
+    output_details = getattr(usage, "output_tokens_details", None)
+
+    return TurnUsage(
+        model=model,
+        num_llm_calls=getattr(usage, "requests", None) or 0,
+        input_tokens=getattr(usage, "input_tokens", None),
+        cached_input_tokens=getattr(input_details, "cached_tokens", None),
+        output_tokens=getattr(usage, "output_tokens", None),
+        reasoning_tokens=getattr(output_details, "reasoning_tokens", None),
+        total_tokens=getattr(usage, "total_tokens", None),
+    )
+
+
+def _aggregate_usage(raw_responses: list[ModelResponse]) -> Usage | None:
+    """Sum the per-response ``Usage`` across a run's ``ModelResponse`` list.
+
+    Returns ``None`` when no response carries usage so the caller can emit a
+    usage object with only the model name set. ``Usage.add`` accumulates
+    requests/tokens (including cached/reasoning detail fields).
+    """
+    total: Usage | None = None
+    for response in raw_responses:
+        resp_usage = getattr(response, "usage", None)
+        if resp_usage is None:
+            continue
+        if total is None:
+            total = Usage()
+        total.add(resp_usage)
+    return total
+
+
+class OpenAITurn:
+    """A single OpenAI Agents SDK turn adapted to the ``HarnessTurn`` protocol.
+
+    Construct with exactly one of:
+    - ``result``: a ``RunResultStreaming`` from ``Runner.run_streamed``. Its
+      ``stream_events()`` is converted to the canonical stream, and after the
+      stream is exhausted ``raw_responses`` is read to compute usage.
+    - ``stream``: a pre-built async iterator of canonical ``StreamTaskMessage``
+      events (bypasses ``convert_openai_to_agentex_events``). Useful for tests
+      and for callers that have already produced canonical events. Usage stays
+      at ``TurnUsage(model=...)`` because there is no run to read usage from.
+
+    ``coalesce_tool_requests`` is accepted for API parity with other provider
+    turns but is a no-op for OpenAI: the OpenAI converter already emits a single
+    ``Full(ToolRequestContent)`` per tool call rather than streamed argument
+    deltas, so there is nothing to coalesce.
+    """
+
+    def __init__(
+        self,
+        result: RunResultStreaming | None = None,
+        model: str | None = None,
+        stream: AsyncIterator[StreamTaskMessage] | None = None,
+        coalesce_tool_requests: bool = False,  # noqa: ARG002 - API parity, no-op for OpenAI
+    ) -> None:
+        if result is None and stream is None:
+            raise ValueError("OpenAITurn requires either `result` or `stream`")
+        self._result = result
+        self._model = model
+        self._stream = stream
+        self._usage: TurnUsage = TurnUsage(model=model)
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]:
+        return self._iter_events()
+
+    async def _iter_events(self) -> AsyncIterator[StreamTaskMessage]:
+        if self._stream is not None:
+            async for event in self._stream:
+                yield event
+            return
+
+        result = self._result
+        assert result is not None  # guaranteed by __init__
+        async for event in convert_openai_to_agentex_events(result.stream_events()):
+            yield event
+
+        # Stream is exhausted: the run has finished and raw_responses is now
+        # populated, so usage can be aggregated and normalized.
+        try:
+            raw_responses: list[Any] = list(getattr(result, "raw_responses", None) or [])
+            aggregated = _aggregate_usage(raw_responses)
+            self._usage = openai_usage_to_turn_usage(aggregated, self._model)
+        except Exception as exc:  # pragma: no cover - defensive: never break delivery on usage
+            logger.warning(f"Failed to aggregate OpenAI usage: {exc}")
+            self._usage = TurnUsage(model=self._model)
+
+    def usage(self) -> TurnUsage:
+        """Normalized turn usage. Valid only after ``events`` is exhausted."""
+        return self._usage
diff --git a/src/agentex/lib/adk/providers/_modules/sync_provider.py b/src/agentex/lib/adk/providers/_modules/sync_provider.py
index a34cfcda1..9996bf30d 100644
--- a/src/agentex/lib/adk/providers/_modules/sync_provider.py
+++ b/src/agentex/lib/adk/providers/_modules/sync_provider.py
@@ -55,24 +55,28 @@ def _serialize_item(item: Any) -> dict[str, Any]:
     Uses model_dump() for Pydantic models, otherwise extracts attributes manually.
     Filters out internal Pydantic fields that can't be serialized.
     """
-    if hasattr(item, 'model_dump'):
+    if hasattr(item, "model_dump"):
         # Pydantic model - use model_dump for proper serialization
         try:
-            return item.model_dump(mode='json', exclude_unset=True)
+            return item.model_dump(mode="json", exclude_unset=True)
         except Exception:
             # Fallback to dict conversion
-            return dict(item) if hasattr(item, '__iter__') else {}
+            return dict(item) if hasattr(item, "__iter__") else {}
     else:
         # Not a Pydantic model - extract attributes manually
         item_dict = {}
         for attr_name in dir(item):
-            if not attr_name.startswith('_') and attr_name not in ('model_fields', 'model_config', 'model_computed_fields'):
+            if not attr_name.startswith("_") and attr_name not in (
+                "model_fields",
+                "model_config",
+                "model_computed_fields",
+            ):
                 try:
                     attr_value = getattr(item, attr_name, None)
                     # Skip methods and None values
                     if attr_value is not None and not callable(attr_value):
                         # Convert to JSON-serializable format
-                        if hasattr(attr_value, 'model_dump'):
+                        if hasattr(attr_value, "model_dump"):
                             item_dict[attr_name] = attr_value.model_dump()
                         elif isinstance(attr_value, (str, int, float, bool, list, dict)):
                             item_dict[attr_name] = attr_value
@@ -85,9 +89,26 @@ def _serialize_item(item: Any) -> dict[str, Any]:
 
 
 class SyncStreamingModel(Model):
-    """Simple model wrapper that adds logging to stream_response and supports tracing."""
+    """Simple model wrapper that adds logging to stream_response and supports tracing.
+
+    .. deprecated::
+        Prefer the unified harness surface for new OpenAI Agents integrations:
+        wrap a ``Runner.run_streamed`` result in
+        ``agentex.lib.adk.providers._modules.openai_turn.OpenAITurn`` and drive
+        delivery + tracing through ``UnifiedEmitter`` (see the
+        ``060_harness_openai`` / ``130_harness_openai`` / ``140_harness_openai``
+        tutorials). This per-model tracing wrapper predates the harness and is
+        retained only for backwards compatibility; it will be removed in a
+        future release. No runtime warning is emitted.
+    """
 
-    def __init__(self, original_model: Model, trace_id: str | None = None, parent_span_id: str | None = None, tracer: AsyncTracer | None = None):
+    def __init__(
+        self,
+        original_model: Model,
+        trace_id: str | None = None,
+        parent_span_id: str | None = None,
+        tracer: AsyncTracer | None = None,
+    ):
         """Initialize with the original OpenAI model to wrap.
         Args:
             original_model: The OpenAI model instance to wrap
@@ -147,7 +168,7 @@ async def get_response(
                 }
 
                 # Only add conversation_id if the model supports it
-                if hasattr(self.original_model, 'supports_conversation_id'):
+                if hasattr(self.original_model, "supports_conversation_id"):
                     kwargs["conversation_id"] = conversation_id
 
                 response = await self.original_model.get_response(**kwargs)
@@ -158,12 +179,12 @@ async def get_response(
                     final_output = None
 
                     # Extract final output text from response
-                    response_final_output = getattr(response, 'final_output', None)
+                    response_final_output = getattr(response, "final_output", None)
                     if response_final_output:
                         final_output = response_final_output
 
                     # Extract items from the response output
-                    response_output = getattr(response, 'output', None)
+                    response_output = getattr(response, "output", None)
                     if response_output:
                         output_items = response_output if isinstance(response_output, list) else [response_output]
 
@@ -174,12 +195,12 @@ async def get_response(
                                     new_items.append(item_dict)
 
                                     # Extract final_output from message type if available
-                                    if item_dict.get('type') == 'message' and not final_output:
-                                        content = item_dict.get('content', [])
+                                    if item_dict.get("type") == "message" and not final_output:
+                                        content = item_dict.get("content", [])
                                         if content and isinstance(content, list):
                                             for content_part in content:
-                                                if isinstance(content_part, dict) and 'text' in content_part:
-                                                    final_output = content_part['text']
+                                                if isinstance(content_part, dict) and "text" in content_part:
+                                                    final_output = content_part["text"]
                                                     break
                             except Exception as e:
                                 logger.warning(f"Failed to serialize item in get_response: {e}")
@@ -207,7 +228,7 @@ async def get_response(
             }
 
             # Only add conversation_id if the model supports it
-            if hasattr(self.original_model, 'supports_conversation_id'):
+            if hasattr(self.original_model, "supports_conversation_id"):
                 kwargs["conversation_id"] = conversation_id
 
             return await self.original_model.get_response(**kwargs)
@@ -266,7 +287,7 @@ async def stream_response(
                 }
 
                 # Only add conversation_id if the model supports it
-                if hasattr(self.original_model, 'supports_conversation_id'):
+                if hasattr(self.original_model, "supports_conversation_id"):
                     stream_kwargs["conversation_id"] = conversation_id
 
                 # Get the stream response from the original model and yield each event
@@ -277,11 +298,11 @@ async def stream_response(
                 final_response_text = ""
 
                 async for event in stream_response:
-                    event_type = getattr(event, 'type', 'no-type')
+                    event_type = getattr(event, "type", "no-type")
 
                     # Handle response.output_item.done events which contain completed items
-                    if event_type == 'response.output_item.done':
-                        item = getattr(event, 'item', None)
+                    if event_type == "response.output_item.done":
+                        item = getattr(event, "item", None)
                         if item is not None:
                             try:
                                 item_dict = _serialize_item(item)
@@ -289,12 +310,12 @@ async def stream_response(
                                     new_items.append(item_dict)
 
                                     # Update final_response_text from message type if available
-                                    if item_dict.get('type') == 'message':
-                                        content = item_dict.get('content', [])
+                                    if item_dict.get("type") == "message":
+                                        content = item_dict.get("content", [])
                                         if content and isinstance(content, list):
                                             for content_part in content:
-                                                if isinstance(content_part, dict) and 'text' in content_part:
-                                                    final_response_text = content_part['text']
+                                                if isinstance(content_part, dict) and "text" in content_part:
+                                                    final_response_text = content_part["text"]
                                                     break
                             except Exception as e:
                                 logger.warning(f"Failed to serialize item in stream_response: {e}")
@@ -326,7 +347,7 @@ async def stream_response(
             }
 
             # Only add conversation_id if the model supports it
-            if hasattr(self.original_model, 'supports_conversation_id'):
+            if hasattr(self.original_model, "supports_conversation_id"):
                 stream_kwargs["conversation_id"] = conversation_id
 
             # Get the stream response from the original model and yield each event
@@ -336,8 +357,17 @@ async def stream_response(
             async for event in stream_response:
                 yield event
 
+
 class SyncStreamingProvider(OpenAIProvider):
-    """Simple OpenAI provider wrapper that adds logging to streaming and supports tracing."""
+    """Simple OpenAI provider wrapper that adds logging to streaming and supports tracing.
+
+    .. deprecated::
+        Prefer the unified harness surface for new OpenAI Agents integrations
+        (see :class:`SyncStreamingModel` and the ``OpenAITurn`` +
+        ``UnifiedEmitter`` pattern). This provider wrapper predates the harness
+        and is retained only for backwards compatibility; it will be removed in
+        a future release. No runtime warning is emitted.
+    """
 
     def __init__(self, trace_id: str | None = None, parent_span_id: str | None = None, *args, **kwargs):
         """Initialize the provider with tracing support.
@@ -405,6 +435,7 @@ def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, An
         if tool_call_item.arguments:
             if isinstance(tool_call_item.arguments, str):
                 import json
+
                 tool_arguments = json.loads(tool_call_item.arguments) if tool_call_item.arguments else {}
             else:
                 tool_arguments = tool_call_item.arguments
@@ -418,6 +449,7 @@ def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, An
             arguments = tool_call_item.arguments
             if isinstance(arguments, str):
                 import json
+
                 tool_arguments = json.loads(arguments) if arguments else {}
             elif arguments is None:
                 tool_arguments = {}
@@ -466,11 +498,11 @@ def _extract_tool_response_info(tool_map: dict[str, Any], tool_output_item: Any)
 
 async def convert_openai_to_agentex_events(stream_response):
     """Convert OpenAI streaming events to AgentEx TaskMessageUpdate events with reasoning support.
-    
+
     This is an enhanced version of the base converter that includes support for:
     - Reasoning content deltas (for o1 models)
     - Reasoning summary deltas (for o1 models)
-    
+
     Args:
         stream_response: An async iterator of OpenAI streaming events
     Yields:
@@ -488,8 +520,8 @@ async def convert_openai_to_agentex_events(stream_response):
         event_count += 1
 
         # Check for raw response events which contain the actual OpenAI streaming events
-        if hasattr(event, 'type') and event.type == 'raw_response_event':
-            if hasattr(event, 'data'):
+        if hasattr(event, "type") and event.type == "raw_response_event":
+            if hasattr(event, "data"):
                 raw_event = event.data
 
                 # Check for ResponseOutputItemAddedEvent which signals a new message starting
@@ -504,7 +536,7 @@ async def convert_openai_to_agentex_events(stream_response):
                     if item_id in item_id_to_index:
                         # Get the message type to decide whether to send done event
                         message_type = item_id_to_type.get(item_id, "text")
-                        
+
                         # Don't send done events for reasoning content/summary
                         # They just end with their last delta
                         if message_type not in ("reasoning_content", "reasoning_summary"):
@@ -608,7 +640,7 @@ async def convert_openai_to_agentex_events(stream_response):
                 # Check if this is a text delta event from OpenAI
                 elif isinstance(raw_event, ResponseTextDeltaEvent):
                     # Check if this event has an item_id
-                    item_id = getattr(raw_event, 'item_id', None)
+                    item_id = getattr(raw_event, "item_id", None)
 
                     # If this is a new item_id we haven't seen, it's a new message
                     if item_id and item_id not in item_id_to_index:
@@ -647,13 +679,13 @@ async def convert_openai_to_agentex_events(stream_response):
                     )
                     yield delta_message
 
-        elif hasattr(event, 'type') and event.type == 'run_item_stream_event':
+        elif hasattr(event, "type") and event.type == "run_item_stream_event":
             # Skip reasoning_item events - they're handled via raw_response_event above
-            if hasattr(event, 'item') and event.item.type == 'reasoning_item':
+            if hasattr(event, "item") and event.item.type == "reasoning_item":
                 continue
 
             # Check for tool_call_item type (this is when a tool is being called)
-            elif hasattr(event, 'item') and event.item.type == 'tool_call_item':
+            elif hasattr(event, "item") and event.item.type == "tool_call_item":
                 # Extract tool call information using the helper method
                 call_id, tool_name, tool_arguments = _extract_tool_call_info(event.item.raw_item)
                 tool_map[call_id] = tool_name
@@ -671,7 +703,7 @@ async def convert_openai_to_agentex_events(stream_response):
                 )
 
             # Check for tool_call_output_item type (this is when a tool returns output)
-            elif hasattr(event, 'item') and event.item.type == 'tool_call_output_item':
+            elif hasattr(event, "item") and event.item.type == "tool_call_output_item":
                 # Extract tool response information using the helper method
                 call_id, tool_name, content = _extract_tool_response_info(tool_map, event.item.raw_item)
                 tool_response_content = ToolResponseContent(
@@ -687,4 +719,3 @@ async def convert_openai_to_agentex_events(stream_response):
                     index=message_index,
                     content=tool_response_content,
                 )
-
diff --git a/src/agentex/lib/core/services/adk/providers/openai.py b/src/agentex/lib/core/services/adk/providers/openai.py
index 75e507d8a..1ae29589d 100644
--- a/src/agentex/lib/core/services/adk/providers/openai.py
+++ b/src/agentex/lib/core/services/adk/providers/openai.py
@@ -14,15 +14,8 @@
 from agents.guardrail import InputGuardrail, OutputGuardrail
 from agents.exceptions import InputGuardrailTripwireTriggered, OutputGuardrailTripwireTriggered
 from openai.types.responses import (
-    ResponseCompletedEvent,
-    ResponseTextDeltaEvent,
-    ResponseFunctionToolCall,
     ResponseFunctionWebSearch,
-    ResponseOutputItemDoneEvent,
     ResponseCodeInterpreterToolCall,
-    ResponseReasoningSummaryPartDoneEvent,
-    ResponseReasoningSummaryPartAddedEvent,
-    ResponseReasoningSummaryTextDeltaEvent,
 )
 
 # Local imports
@@ -31,24 +24,14 @@
 from agentex.lib.utils.mcp import redact_mcp_server_params
 from agentex.lib.utils.temporal import heartbeat_if_in_workflow
 from agentex.lib.core.tracing.tracer import AsyncTracer
-from agentex.types.task_message_delta import (
-    TextDelta,
-    ReasoningSummaryDelta,
-)
-from agentex.types.task_message_update import (
-    StreamTaskMessageFull,
-    StreamTaskMessageDelta,
-)
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_update import StreamTaskMessageFull
 from agentex.types.task_message_content import (
     TextContent,
-    ReasoningContent,
     ToolRequestContent,
     ToolResponseContent,
 )
-from agentex.lib.core.services.adk.streaming import (
-    StreamingService,
-    StreamingTaskMessageContext,
-)
+from agentex.lib.core.services.adk.streaming import StreamingService
 
 logger = logging.make_logger(__name__)
 
@@ -695,7 +678,7 @@ async def run_agent_streamed_auto_send(
         input_guardrails: list[InputGuardrail] | None = None,
         output_guardrails: list[OutputGuardrail] | None = None,
         max_turns: int | None = None,
-        previous_response_id: str | None = None,  # noqa: ARG002
+        previous_response_id: str | None = None,
         created_at: datetime | None = None,
     ) -> RunResultStreaming:
         """
@@ -733,8 +716,6 @@ async def run_agent_streamed_auto_send(
         if self.agentex_client is None:
             raise ValueError("Agentex client must be provided for auto_send methods")
 
-        tool_call_map: dict[str, ResponseFunctionToolCall] = {}
-
         if self.tracer is None:
             raise RuntimeError("Tracer not initialized - ensure tracer is provided to OpenAIService")
         trace = self.tracer.trace(trace_id)
@@ -761,12 +742,13 @@ async def run_agent_streamed_auto_send(
         ) as span:
             heartbeat_if_in_workflow("run agent streamed auto send")
 
-            # Consume the workflow-supplied created_at on the FIRST message
-            # opened by this activity (whichever streaming context opens first
-            # for this turn). That's the message that races the workflow's
-            # user-echo at the server. Subsequent messages in the same turn are
-            # separated by network/processing latency and rely on the server's
-            # wall clock.
+            # AGX1-378 restored: created_at is now threaded through
+            # UnifiedEmitter.auto_send_turn -> auto_send -> every
+            # streaming_task_message_context call, so the first agent message of
+            # the turn is stamped with the workflow-supplied timestamp (e.g.
+            # workflow.now()) just as the original inline loop did.
+            # The dispenser is still used below for guardrail-rejection messages,
+            # which open their own streaming contexts directly.
             _take_created_at = _make_created_at_dispenser(created_at)
 
             async with mcp_server_context(mcp_server_params, mcp_timeout_seconds) as servers:
@@ -803,204 +785,48 @@ async def run_agent_streamed_auto_send(
 
                 agent = Agent(**agent_kwargs)
 
-                # Run with streaming
-                if max_turns is not None:
+                # Run with streaming. Forward previous_response_id so callers that
+                # continue a Responses-API conversation resume the prior response
+                # instead of silently starting a fresh one (mirrors the non-auto-send
+                # run_agent_streamed path).
+                if max_turns is not None and previous_response_id is not None:
+                    result = Runner.run_streamed(
+                        starting_agent=agent,
+                        input=input_list,
+                        max_turns=max_turns,
+                        previous_response_id=previous_response_id,
+                    )
+                elif max_turns is not None:
                     result = Runner.run_streamed(starting_agent=agent, input=input_list, max_turns=max_turns)
+                elif previous_response_id is not None:
+                    result = Runner.run_streamed(
+                        starting_agent=agent, input=input_list, previous_response_id=previous_response_id
+                    )
                 else:
                     result = Runner.run_streamed(starting_agent=agent, input=input_list)
 
-                item_id_to_streaming_context: dict[str, StreamingTaskMessageContext] = {}
-                unclosed_item_ids: set[str] = set()
-                # Simple string to accumulate reasoning summary
-                current_reasoning_summary: str = ""
+                # Migrate onto the unified harness surface: wrap the streamed run
+                # as an OpenAITurn (provider -> canonical StreamTaskMessage*
+                # adapter) and let UnifiedEmitter.auto_send_turn drive delivery +
+                # tracing + usage. The previous ~270-line inline loop that hand-
+                # rolled per-item streaming contexts, reasoning handling, and
+                # span derivation now lives in the shared harness modules.
+                # Imported lazily: openai_turn pulls in agentex.lib.adk, which
+                # imports this service module, so an eager import would create a
+                # circular import at package init.
+                from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn
+
+                turn = OpenAITurn(result=result, model=model)
+                emitter = UnifiedEmitter(
+                    task_id=task_id,
+                    trace_id=trace_id,
+                    parent_span_id=parent_span_id,
+                    tracer=self.tracer,
+                    streaming=self.streaming_service,
+                )
 
                 try:
-                    # Process streaming events with TaskMessage creation
-                    async for event in result.stream_events():
-                        heartbeat_if_in_workflow("processing stream event with auto send")
-
-                        if event.type == "run_item_stream_event":
-                            if event.item.type == "tool_call_item":
-                                tool_call_item = event.item.raw_item
-
-                                # Extract tool call information using the helper method
-                                call_id, tool_name, tool_arguments = self._extract_tool_call_info(tool_call_item)
-                                tool_call_map[call_id] = tool_call_item
-
-                                tool_request_content = ToolRequestContent(
-                                    author="agent",
-                                    tool_call_id=call_id,
-                                    name=tool_name,
-                                    arguments=tool_arguments,
-                                )
-
-                                # Create tool request using streaming context (immediate completion)
-                                async with self.streaming_service.streaming_task_message_context(
-                                    task_id=task_id,
-                                    initial_content=tool_request_content,
-                                    created_at=_take_created_at(),
-                                ) as streaming_context:
-                                    # The message has already been persisted, but we still need to send an upda
-                                    await streaming_context.stream_update(
-                                        update=StreamTaskMessageFull(
-                                            parent_task_message=streaming_context.task_message,
-                                            content=tool_request_content,
-                                            type="full",
-                                        ),
-                                    )
-
-                            elif event.item.type == "tool_call_output_item":
-                                tool_output_item = event.item.raw_item
-
-                                # Extract tool response information using the helper method
-                                call_id, tool_name, content = self._extract_tool_response_info(
-                                    tool_call_map, tool_output_item
-                                )
-
-                                tool_response_content = ToolResponseContent(
-                                    author="agent",
-                                    tool_call_id=call_id,
-                                    name=tool_name,
-                                    content=content,
-                                )
-
-                                # Create tool response using streaming context (immediate completion)
-                                async with self.streaming_service.streaming_task_message_context(
-                                    task_id=task_id,
-                                    initial_content=tool_response_content,
-                                    created_at=_take_created_at(),
-                                ) as streaming_context:
-                                    # The message has already been persisted, but we still need to send an update
-                                    await streaming_context.stream_update(
-                                        update=StreamTaskMessageFull(
-                                            parent_task_message=streaming_context.task_message,
-                                            content=tool_response_content,
-                                            type="full",
-                                        ),
-                                    )
-
-                        elif event.type == "raw_response_event":
-                            if isinstance(event.data, ResponseTextDeltaEvent):
-                                # Handle text delta
-                                item_id = event.data.item_id
-
-                                # Check if we already have a streaming context for this item
-                                if item_id not in item_id_to_streaming_context:
-                                    # Create a new streaming context for this item
-                                    streaming_context = self.streaming_service.streaming_task_message_context(
-                                        task_id=task_id,
-                                        initial_content=TextContent(
-                                            author="agent",
-                                            content="",
-                                        ),
-                                        created_at=_take_created_at(),
-                                    )
-                                    # Open the streaming context
-                                    item_id_to_streaming_context[item_id] = await streaming_context.open()
-                                    unclosed_item_ids.add(item_id)
-                                else:
-                                    streaming_context = item_id_to_streaming_context[item_id]
-
-                                # Stream the delta through the streaming service
-                                await streaming_context.stream_update(
-                                    update=StreamTaskMessageDelta(
-                                        parent_task_message=streaming_context.task_message,
-                                        delta=TextDelta(text_delta=event.data.delta, type="text"),
-                                        type="delta",
-                                    ),
-                                )
-                            # Reasoning step one: new summary part added
-                            elif isinstance(event.data, ResponseReasoningSummaryPartAddedEvent):
-                                # We need to create a new streaming context for this reasoning item
-                                item_id = event.data.item_id
-
-                                # Reset the reasoning summary string
-                                current_reasoning_summary = ""
-
-                                streaming_context = self.streaming_service.streaming_task_message_context(
-                                    task_id=task_id,
-                                    initial_content=ReasoningContent(
-                                        author="agent",
-                                        summary=[],
-                                        content=[],
-                                        type="reasoning",
-                                        style="active",
-                                    ),
-                                    created_at=_take_created_at(),
-                                )
-
-                                # Replace the existing streaming context (if it exists)
-                                # Why do we replace? Cause all the reasoning parts use the same item_id!
-                                item_id_to_streaming_context[item_id] = await streaming_context.open()
-                                unclosed_item_ids.add(item_id)
-
-                            # Reasoning step two: handling summary text delta
-                            elif isinstance(event.data, ResponseReasoningSummaryTextDeltaEvent):
-                                # Accumulate the delta into the string
-                                current_reasoning_summary += event.data.delta
-                                streaming_context = item_id_to_streaming_context[item_id]
-
-                                # Stream the summary delta through the streaming service
-                                await streaming_context.stream_update(
-                                    update=StreamTaskMessageDelta(
-                                        parent_task_message=streaming_context.task_message,
-                                        delta=ReasoningSummaryDelta(
-                                            summary_index=event.data.summary_index,
-                                            summary_delta=event.data.delta,
-                                            type="reasoning_summary",
-                                        ),
-                                        type="delta",
-                                    ),
-                                )
-
-                            # Reasoning step three: handling summary text done, closing the streaming context
-                            elif isinstance(event.data, ResponseReasoningSummaryPartDoneEvent):
-                                # Handle reasoning summary text completion
-                                streaming_context = item_id_to_streaming_context[item_id]
-
-                                # Create the complete reasoning content with the accumulated summary
-                                complete_reasoning_content = ReasoningContent(
-                                    author="agent",
-                                    summary=[current_reasoning_summary],
-                                    content=[],
-                                    type="reasoning",
-                                    style="static",
-                                )
-
-                                # Send a full message update with the complete reasoning content
-                                await streaming_context.stream_update(
-                                    update=StreamTaskMessageFull(
-                                        parent_task_message=streaming_context.task_message,
-                                        content=complete_reasoning_content,
-                                        type="full",
-                                    ),
-                                )
-
-                                await streaming_context.close()
-                                unclosed_item_ids.discard(item_id)
-
-                            elif isinstance(event.data, ResponseOutputItemDoneEvent):
-                                # Handle item completion
-                                item_id = event.data.item.id
-
-                                # Finish the streaming context (sends DONE event and updates message)
-                                if item_id in item_id_to_streaming_context:
-                                    streaming_context = item_id_to_streaming_context[item_id]
-                                    await streaming_context.close()
-                                    if item_id in unclosed_item_ids:
-                                        unclosed_item_ids.remove(item_id)
-
-                            elif isinstance(event.data, ResponseCompletedEvent):
-                                # All items complete, finish all remaining streaming contexts for this session
-                                # Create a copy to avoid modifying set during iteration
-                                remaining_items = list(unclosed_item_ids)
-                                for item_id in remaining_items:
-                                    if (
-                                        item_id in unclosed_item_ids and item_id in item_id_to_streaming_context
-                                    ):  # Check if still unclosed
-                                        streaming_context = item_id_to_streaming_context[item_id]
-                                        await streaming_context.close()
-                                        unclosed_item_ids.discard(item_id)
+                    await emitter.auto_send_turn(turn, created_at=created_at)
 
                 except InputGuardrailTripwireTriggered as e:
                     # Handle guardrail trigger by sending a rejection message
@@ -1080,18 +906,6 @@ async def run_agent_streamed_auto_send(
                     # Re-raise to let the activity handle it
                     raise
 
-                finally:
-                    # Cleanup: ensure all streaming contexts for this session are properly finished
-                    # Create a copy to avoid modifying set during iteration
-                    remaining_items = list(unclosed_item_ids)
-                    for item_id in remaining_items:
-                        if (
-                            item_id in unclosed_item_ids and item_id in item_id_to_streaming_context
-                        ):  # Check if still unclosed
-                            streaming_context = item_id_to_streaming_context[item_id]
-                            await streaming_context.close()
-                            unclosed_item_ids.discard(item_id)
-
                 if span:
                     span.output = {
                         "new_items": [
diff --git a/tests/lib/adk/providers/test_openai_activities.py b/tests/lib/adk/providers/test_openai_activities.py
index c933b6ce4..2f89308a9 100644
--- a/tests/lib/adk/providers/test_openai_activities.py
+++ b/tests/lib/adk/providers/test_openai_activities.py
@@ -335,23 +335,61 @@ async def mock_stream_events():
         expected_params.tools = [CodeInterpreterTool(tool_config={"type": "code_interpreter"})]
         self._assert_starting_agent_params(starting_agent, expected_params)
 
-        # Verify streaming context received tool request and response updates
-        # Should have been called twice - once for tool request, once for response
-        assert mock_streaming_context.stream_update.call_count == 2
+        # Under the unified harness, the OpenAI events are converted to canonical
+        # StreamTaskMessageFull events and auto_send posts each full tool message
+        # by opening a streaming context with the content as initial_content and
+        # closing it (no stream_update). So assert on the opened contents.
+        opened = mock_streaming_context.opened_contents
+        tool_contents = [c for c in opened if getattr(c, "type", None) in ("tool_request", "tool_response")]
+        assert len(tool_contents) == 2
+
+        # First opened context is the tool request.
+        first = tool_contents[0]
+        assert first.type == "tool_request"
+        assert first.name == "code_interpreter"
+        assert first.tool_call_id == "code_interpreter_call_123"
+
+        # Second opened context is the tool response.
+        second = tool_contents[1]
+        assert second.type == "tool_response"
+        assert second.tool_call_id == "code_interpreter_call_123"
 
-        # First call should be tool request
-        first_call = mock_streaming_context.stream_update.call_args_list[0]
-        first_update = first_call[1]["update"]  # keyword argument
-        assert hasattr(first_update, "content")
-        assert first_update.content.name == "code_interpreter"
-        assert first_update.content.tool_call_id == "code_interpreter_call_123"
+    @patch("agents.Runner.run_streamed")
+    async def test_run_agent_streamed_auto_send_forwards_previous_response_id(self, mock_runner_run_streamed):
+        """previous_response_id must reach Runner.run_streamed so a Responses-API
+        conversation continues instead of silently starting fresh."""
+        from agentex.lib.core.temporal.activities.adk.providers.openai_activities import (
+            RunAgentStreamedAutoSendParams,
+        )
 
-        # Second call should be tool response
-        second_call = mock_streaming_context.stream_update.call_args_list[1]
-        second_update = second_call[1]["update"]  # keyword argument
-        assert hasattr(second_update, "content")
-        assert second_update.content.name == "code_interpreter_call"
-        assert second_update.content.tool_call_id == "code_interpreter_call_123"
+        mock_streaming_result = self._create_streaming_result_mock()
+
+        async def _no_events():
+            return
+            yield
+
+        mock_streaming_result.stream_events = _no_events
+        mock_runner_run_streamed.return_value = mock_streaming_result
+
+        mock_tracer = self._create_mock_tracer()
+        openai_service, openai_activities, env = self._create_test_setup(mock_tracer)
+        self._setup_streaming_service_mocks(openai_service)
+
+        params = RunAgentStreamedAutoSendParams(
+            input_list=[{"role": "user", "content": "continue"}],
+            mcp_server_params=[],
+            agent_name="test_agent",
+            agent_instructions="You are a helpful assistant",
+            trace_id="test-trace-id",
+            parent_span_id="test-span-id",
+            task_id="test-task-id",
+            previous_response_id="response_123",
+        )
+
+        await env.run(openai_activities.run_agent_streamed_auto_send, params)
+
+        mock_runner_run_streamed.assert_called_once()
+        assert mock_runner_run_streamed.call_args.kwargs.get("previous_response_id") == "response_123"
 
     def _create_mock_tracer(self):
         """Helper method to create a properly mocked tracer with async context manager support."""
@@ -613,6 +651,60 @@ def _assert_tools_conversion(self, starting_agent, tools_case, _original_tools):
         else:
             raise ValueError(f"Unknown tools_case: {tools_case}")
 
+    @patch("agents.Runner.run_streamed")
+    async def test_run_agent_streamed_auto_send_forwards_created_at(self, mock_runner_run_streamed):
+        """created_at is forwarded to every streaming context opened by auto_send_turn (AGX1-378)."""
+        from datetime import datetime, timezone
+
+        from agentex.lib.core.temporal.activities.adk.providers.openai_activities import (
+            RunAgentStreamedAutoSendParams,
+        )
+
+        deterministic_ts = datetime(2025, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
+
+        mock_streaming_result = self._create_streaming_result_mock()
+
+        # Emit a tool call + tool response so auto_send actually opens streaming
+        # contexts; an empty stream opens none, making the assertion below
+        # vacuously true and unable to catch a created_at regression.
+        async def mock_stream_events():
+            tool_call_event = Mock()
+            tool_call_event.type = "run_item_stream_event"
+            tool_call_event.item = self._create_tool_call_item_mock(self._create_code_interpreter_tool_call_mock())
+            yield tool_call_event
+
+            tool_response_event = Mock()
+            tool_response_event.type = "run_item_stream_event"
+            tool_response_event.item = self._create_tool_output_item_mock()
+            yield tool_response_event
+
+        mock_streaming_result.stream_events = mock_stream_events
+        mock_runner_run_streamed.return_value = mock_streaming_result
+
+        mock_tracer = self._create_mock_tracer()
+        openai_service, openai_activities, env = self._create_test_setup(mock_tracer)
+        mock_ctx, recorded_created_ats = self._setup_streaming_service_mocks_with_created_at(openai_service)
+
+        params = RunAgentStreamedAutoSendParams(
+            input_list=[{"role": "user", "content": "hello"}],
+            mcp_server_params=[],
+            agent_name="test_agent",
+            agent_instructions="You are a helpful assistant",
+            trace_id="test-trace-id",
+            parent_span_id="test-span-id",
+            task_id="test-task-id",
+            created_at=deterministic_ts,
+        )
+
+        await env.run(openai_activities.run_agent_streamed_auto_send, params)
+
+        # Guard against a vacuous pass: at least one streaming context must have
+        # been opened so the per-context created_at assertion is meaningful.
+        assert recorded_created_ats, "expected at least one streaming context to be opened"
+        assert all(ts == deterministic_ts for ts in recorded_created_ats), (
+            f"Expected all streaming contexts to receive created_at={deterministic_ts!r}, got: {recorded_created_ats!r}"
+        )
+
     def _setup_streaming_service_mocks(self, openai_service):
         """Helper method to setup streaming service mocks for run_agent_auto_send."""
         from unittest.mock import AsyncMock
@@ -635,21 +727,64 @@ def _setup_streaming_service_mocks(self, openai_service):
         mock_streaming_context.task_message = mock_task_message
         mock_streaming_context.stream_update = AsyncMock()
 
+        # Record the initial_content passed to each opened streaming context.
+        # The unified harness auto_send path posts full tool messages by opening
+        # a context with initial_content and closing it (no stream_update), so
+        # assertions inspect the opened contents rather than stream_update calls.
+        opened_contents: list = []
+
         # Create a proper async context manager mock
         from contextlib import asynccontextmanager
         from unittest.mock import AsyncMock
 
         @asynccontextmanager
-        async def mock_streaming_context_manager(*_args, **_kwargs):
+        async def mock_streaming_context_manager(*_args, **kwargs):
+            if "initial_content" in kwargs:
+                opened_contents.append(kwargs["initial_content"])
             yield mock_streaming_context
 
         mock_streaming_service.streaming_task_message_context = mock_streaming_context_manager
+        # Expose the recorded contents on the returned context mock for assertions.
+        mock_streaming_context.opened_contents = opened_contents
 
         openai_service.streaming_service = mock_streaming_service
         openai_service.agentex_client = mock_agentex_client
 
         return mock_streaming_context
 
+    def _setup_streaming_service_mocks_with_created_at(self, openai_service):
+        """Like _setup_streaming_service_mocks but also records every created_at kwarg."""
+        from contextlib import asynccontextmanager
+        from unittest.mock import AsyncMock
+
+        from agentex.types.task_message import TaskMessage
+
+        mock_streaming_service = AsyncMock()
+        mock_agentex_client = AsyncMock()
+
+        mock_streaming_context = AsyncMock()
+        mock_task_message = Mock(spec=TaskMessage)
+        mock_task_message.id = "test-task-message-id"
+        mock_task_message.task_id = "test-task-id"
+        mock_task_message.content = {"type": "text", "content": "test"}
+        mock_streaming_context.task_message = mock_task_message
+        mock_streaming_context.stream_update = AsyncMock()
+
+        recorded_created_ats: list = []
+
+        @asynccontextmanager
+        async def mock_ctx_manager(*_args, **kwargs):
+            recorded_created_ats.append(kwargs.get("created_at"))
+            yield mock_streaming_context
+
+        mock_streaming_service.streaming_task_message_context = mock_ctx_manager
+        mock_streaming_context.opened_contents = []
+
+        openai_service.streaming_service = mock_streaming_service
+        openai_service.agentex_client = mock_agentex_client
+
+        return mock_streaming_context, recorded_created_ats
+
     def _create_code_interpreter_tool_call_mock(self, call_id="code_interpreter_call_123"):
         """Helper to create ResponseCodeInterpreterToolCall mock objects."""
         return ResponseCodeInterpreterToolCall(
@@ -680,6 +815,9 @@ def _create_streaming_result_mock(self, final_output="Code executed successfully
         mock_streaming_result = Mock(spec=RunResultStreaming)
         mock_streaming_result.final_output = final_output
         mock_streaming_result.new_items = []
+        # OpenAITurn reads raw_responses after stream exhaustion to aggregate
+        # usage; provide an empty list so usage normalizes to model-only.
+        mock_streaming_result.raw_responses = []
         mock_streaming_result.final_input_list = [
             {"role": "user", "content": "Run some Python code"},
             {"role": "assistant", "content": final_output},
diff --git a/tests/lib/adk/providers/test_openai_turn.py b/tests/lib/adk/providers/test_openai_turn.py
new file mode 100644
index 000000000..023b0ed4e
--- /dev/null
+++ b/tests/lib/adk/providers/test_openai_turn.py
@@ -0,0 +1,246 @@
+"""Tests for OpenAITurn and its usage mapping.
+
+OpenAITurn adapts an OpenAI Agents SDK streamed run onto the harness
+``HarnessTurn`` protocol. These tests cover:
+- ``openai_usage_to_turn_usage`` (full usage, None, real zeros)
+- ``_aggregate_usage`` (empty, single, multiple ModelResponses)
+- ``OpenAITurn.events`` driven by an injected canonical stream (bypassing the
+  OpenAI->canonical converter), plus ``usage()`` before/after exhaustion
+- the ``ValueError`` guard when neither ``result`` nor ``stream`` is supplied
+"""
+
+import types as _types
+
+import pytest
+from agents.usage import Usage
+from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
+
+from agentex.types.text_content import TextContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+
+
+def _import_target():
+    from agentex.lib.adk.providers._modules.openai_turn import (
+        OpenAITurn,
+        _aggregate_usage,
+        openai_usage_to_turn_usage,
+    )
+
+    return OpenAITurn, _aggregate_usage, openai_usage_to_turn_usage
+
+
+# ---------------------------------------------------------------------------
+# openai_usage_to_turn_usage
+# ---------------------------------------------------------------------------
+
+
+def test_usage_mapping_full():
+    _, _, openai_usage_to_turn_usage = _import_target()
+    usage = Usage(
+        requests=3,
+        input_tokens=100,
+        input_tokens_details=InputTokensDetails(cached_tokens=20),
+        output_tokens=50,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=10),
+        total_tokens=150,
+    )
+    turn_usage = openai_usage_to_turn_usage(usage, model="gpt-4o")
+
+    assert turn_usage.model == "gpt-4o"
+    assert turn_usage.num_llm_calls == 3
+    assert turn_usage.input_tokens == 100
+    assert turn_usage.cached_input_tokens == 20
+    assert turn_usage.output_tokens == 50
+    assert turn_usage.reasoning_tokens == 10
+    assert turn_usage.total_tokens == 150
+
+
+def test_usage_mapping_none_usage():
+    _, _, openai_usage_to_turn_usage = _import_target()
+    turn_usage = openai_usage_to_turn_usage(None, model="gpt-4o")
+
+    assert turn_usage.model == "gpt-4o"
+    assert turn_usage.num_llm_calls == 0
+    assert turn_usage.input_tokens is None
+    assert turn_usage.output_tokens is None
+    assert turn_usage.total_tokens is None
+
+
+def test_usage_mapping_real_zeros_are_preserved():
+    # A cache hit can legitimately produce 0 output tokens; a present-but-zero
+    # value must survive as 0, not be coerced to None.
+    _, _, openai_usage_to_turn_usage = _import_target()
+    usage = Usage(
+        requests=1,
+        input_tokens=0,
+        input_tokens_details=InputTokensDetails(cached_tokens=0),
+        output_tokens=0,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
+        total_tokens=0,
+    )
+    turn_usage = openai_usage_to_turn_usage(usage, model="m")
+
+    assert turn_usage.input_tokens == 0
+    assert turn_usage.cached_input_tokens == 0
+    assert turn_usage.output_tokens == 0
+    assert turn_usage.reasoning_tokens == 0
+    assert turn_usage.total_tokens == 0
+    assert turn_usage.num_llm_calls == 1
+
+
+# ---------------------------------------------------------------------------
+# _aggregate_usage
+# ---------------------------------------------------------------------------
+
+
+def _resp(usage):
+    return _types.SimpleNamespace(usage=usage)
+
+
+def test_aggregate_usage_empty():
+    _, _aggregate_usage, _ = _import_target()
+    assert _aggregate_usage([]) is None
+
+
+def test_aggregate_usage_single():
+    _, _aggregate_usage, _ = _import_target()
+    usage = Usage(requests=1, input_tokens=10, output_tokens=5, total_tokens=15)
+    total = _aggregate_usage([_resp(usage)])
+
+    assert total is not None
+    assert total.requests == 1
+    assert total.input_tokens == 10
+    assert total.output_tokens == 5
+    assert total.total_tokens == 15
+
+
+def test_aggregate_usage_multiple():
+    _, _aggregate_usage, _ = _import_target()
+    u1 = Usage(
+        requests=1,
+        input_tokens=10,
+        input_tokens_details=InputTokensDetails(cached_tokens=2),
+        output_tokens=5,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=1),
+        total_tokens=15,
+    )
+    u2 = Usage(
+        requests=2,
+        input_tokens=20,
+        input_tokens_details=InputTokensDetails(cached_tokens=3),
+        output_tokens=7,
+        output_tokens_details=OutputTokensDetails(reasoning_tokens=4),
+        total_tokens=27,
+    )
+    # A response without usage must be skipped, not crash the aggregation.
+    total = _aggregate_usage([_resp(u1), _resp(None), _resp(u2)])
+
+    assert total is not None
+    assert total.requests == 3
+    assert total.input_tokens == 30
+    assert total.output_tokens == 12
+    assert total.total_tokens == 42
+    assert total.input_tokens_details.cached_tokens == 5
+    assert total.output_tokens_details.reasoning_tokens == 5
+
+
+# ---------------------------------------------------------------------------
+# OpenAITurn.events / usage / construction
+# ---------------------------------------------------------------------------
+
+
+async def _canonical_stream(events):
+    for e in events:
+        yield e
+
+
+@pytest.mark.asyncio
+async def test_turn_events_forwards_injected_stream():
+    OpenAITurn, _, _ = _import_target()
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hi")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o")
+
+    out = [e async for e in turn.events]
+    assert out == events
+
+
+@pytest.mark.asyncio
+async def test_turn_usage_before_and_after_exhaustion_with_injected_stream():
+    OpenAITurn, _, _ = _import_target()
+    events = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+    turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o")
+
+    # Before exhaustion: usage carries only the model name.
+    before = turn.usage()
+    assert before.model == "gpt-4o"
+    assert before.input_tokens is None
+
+    async for _ in turn.events:
+        pass
+
+    # With an injected stream there is no run to read usage from, so usage
+    # stays model-only after exhaustion.
+    after = turn.usage()
+    assert after.model == "gpt-4o"
+    assert after.input_tokens is None
+
+
+@pytest.mark.asyncio
+async def test_turn_usage_populated_from_result_after_exhaustion():
+    OpenAITurn, _, _ = _import_target()
+
+    canonical = [
+        StreamTaskMessageStart(type="start", index=0, content=TextContent(type="text", author="agent", content="")),
+        StreamTaskMessageDone(type="done", index=0),
+    ]
+
+    class _FakeResult:
+        def __init__(self):
+            self.raw_responses = [
+                _resp(Usage(requests=1, input_tokens=8, output_tokens=4, total_tokens=12)),
+            ]
+
+        def stream_events(self):
+            # OpenAITurn passes this to convert_openai_to_agentex_events; we
+            # monkeypatch that converter below so this can yield canonical events.
+            return _canonical_stream(canonical)
+
+    import agentex.lib.adk.providers._modules.openai_turn as mod
+
+    async def _passthrough(stream):
+        async for e in stream:
+            yield e
+
+    original = mod.convert_openai_to_agentex_events
+    mod.convert_openai_to_agentex_events = _passthrough
+    try:
+        turn = OpenAITurn(result=_FakeResult(), model="gpt-4o")
+        out = [e async for e in turn.events]
+    finally:
+        mod.convert_openai_to_agentex_events = original
+
+    assert out == canonical
+    usage = turn.usage()
+    assert usage.model == "gpt-4o"
+    assert usage.num_llm_calls == 1
+    assert usage.input_tokens == 8
+    assert usage.output_tokens == 4
+    assert usage.total_tokens == 12
+
+
+def test_turn_requires_result_or_stream():
+    OpenAITurn, _, _ = _import_target()
+    with pytest.raises(ValueError, match="either"):
+        OpenAITurn()
diff --git a/tests/lib/core/harness/conformance/test_openai_conformance.py b/tests/lib/core/harness/conformance/test_openai_conformance.py
new file mode 100644
index 000000000..e8630ca7f
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_openai_conformance.py
@@ -0,0 +1,206 @@
+"""OpenAI conformance fixtures for the shared harness span-derivation engine.
+
+The cross-channel guarantee is that yield-delivery and auto_send observe the
+SAME canonical StreamTaskMessage* stream, so span derivation and logical
+delivery over that stream must be equivalent regardless of channel. These
+fixtures express the canonical sequences an OpenAI turn produces (text,
+tool-call, reasoning, and a combined multi-step turn) and assert that property
+via run_cross_channel_conformance.
+
+Registry hazard (see conformance/runner.py): _REGISTRY is process-global and
+collection order across modules is not guaranteed. To stay deterministic this
+module keeps its OWN fixture list and parametrizes over THAT list, rather than
+over all_fixtures(). It still calls register() so the cross-module conformance
+suite can see these fixtures too.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.text_delta import TextDelta
+from agentex.types.text_content import TextContent
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+from .runner import Fixture, register, run_cross_channel_conformance
+
+_OPENAI_FIXTURES: list[Fixture] = []
+
+
+def _add(fixture: Fixture) -> None:
+    """Register both module-locally (for parametrization) and globally."""
+    _OPENAI_FIXTURES.append(fixture)
+    register(fixture)
+
+
+# Text-only turn: start -> deltas -> done.
+# Uses non-empty initial_content so payload comparison catches a channel that
+# drops StreamTaskMessageStart.content.
+_add(
+    Fixture(
+        name="openai-text-only",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=TextContent(type="text", author="agent", content="Init"),
+            ),
+            StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="Hel")),
+            StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="lo")),
+            StreamTaskMessageDone(type="done", index=0),
+        ],
+    )
+)
+
+# Tool-call turn: Full(ToolRequestContent) for the call + Full(ToolResponseContent)
+# for the result, matched by tool_call_id. Mirrors the OpenAI converter's tool path.
+_add(
+    Fixture(
+        name="openai-tool-call",
+        events=[
+            StreamTaskMessageFull(
+                type="full",
+                index=0,
+                content=ToolRequestContent(
+                    type="tool_request",
+                    author="agent",
+                    tool_call_id="call_1",
+                    name="get_weather",
+                    arguments={"city": "SF"},
+                ),
+            ),
+            StreamTaskMessageFull(
+                type="full",
+                index=1,
+                content=ToolResponseContent(
+                    type="tool_response",
+                    author="agent",
+                    tool_call_id="call_1",
+                    name="get_weather",
+                    content="72F",
+                ),
+            ),
+        ],
+    )
+)
+
+# Reasoning turn: start(ReasoningContent) -> content deltas -> done.
+# ReasoningContent.summary is seeded in the payload so a channel that drops the
+# summary fails the cross-channel comparison.
+_add(
+    Fixture(
+        name="openai-reasoning",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ReasoningContent(
+                    type="reasoning",
+                    author="agent",
+                    summary=["Thinking..."],
+                ),
+            ),
+            StreamTaskMessageDelta(
+                type="delta",
+                index=0,
+                delta=ReasoningContentDelta(
+                    type="reasoning_content",
+                    content_index=0,
+                    content_delta="step 1",
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+        ],
+    )
+)
+
+# Multi-step turn: reasoning, then a tool round, then the final answer text.
+_add(
+    Fixture(
+        name="openai-multi-step",
+        events=[
+            StreamTaskMessageStart(
+                type="start",
+                index=0,
+                content=ReasoningContent(
+                    type="reasoning",
+                    author="agent",
+                    summary=["plan"],
+                ),
+            ),
+            StreamTaskMessageDelta(
+                type="delta",
+                index=0,
+                delta=ReasoningContentDelta(
+                    type="reasoning_content",
+                    content_index=0,
+                    content_delta="elaboration",
+                ),
+            ),
+            StreamTaskMessageDone(type="done", index=0),
+            StreamTaskMessageFull(
+                type="full",
+                index=1,
+                content=ToolRequestContent(
+                    type="tool_request",
+                    author="agent",
+                    tool_call_id="call_2",
+                    name="search",
+                    arguments={"q": "x"},
+                ),
+            ),
+            StreamTaskMessageFull(
+                type="full",
+                index=2,
+                content=ToolResponseContent(
+                    type="tool_response",
+                    author="agent",
+                    tool_call_id="call_2",
+                    name="search",
+                    content="result",
+                ),
+            ),
+            StreamTaskMessageStart(
+                type="start",
+                index=3,
+                content=TextContent(type="text", author="agent", content=""),
+            ),
+            StreamTaskMessageDelta(type="delta", index=3, delta=TextDelta(type="text", text_delta="done")),
+            StreamTaskMessageDone(type="done", index=3),
+        ],
+    )
+)
+
+
+@pytest.mark.parametrize("fixture", _OPENAI_FIXTURES, ids=lambda f: f.name)
+@pytest.mark.asyncio
+async def test_openai_cross_channel_equivalence(fixture: Fixture) -> None:
+    """Assert that yield_events and auto_send produce equivalent logical
+    deliveries and identical span signals for every OpenAI fixture.
+
+    This is the cross-channel guarantee: the two delivery adapters agree on
+    WHAT was delivered (logical content) and HOW spans were derived, even
+    though their streaming-envelope shapes differ (Full vs Start+Done for tool
+    messages).
+
+    The span signals are the ones each channel's tracer ACTUALLY recorded while
+    delivering, not a re-derivation, so a regression where one channel skips
+    deriver.observe() for some event type is caught here.
+    """
+    yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
+
+    assert yield_deliveries == auto_deliveries, (
+        f"[{fixture.name}] logical deliveries differ:\n  yield:     {yield_deliveries}\n  auto_send: {auto_deliveries}"
+    )
+    assert yield_spans == auto_spans, (
+        f"[{fixture.name}] span signals differ:\n  yield:     {yield_spans}\n  auto_send: {auto_spans}"
+    )

From 904339c21b8cd641a02d903c03d4a8730b4d7e84 Mon Sep 17 00:00:00 2001
From: Declan Brady <declan.brady@scale.com>
Date: Mon, 22 Jun 2026 18:21:44 -0400
Subject: [PATCH 06/10] feat(claude-code): stream-json parser tap for the
 unified harness surface (#420)

---
 .../00_sync/060_claude_code/.dockerignore     |  43 ++
 .../00_sync/060_claude_code/Dockerfile        |  46 ++
 .../00_sync/060_claude_code/README.md         |  76 +++
 .../00_sync/060_claude_code/manifest.yaml     |  55 ++
 .../060_claude_code/project/__init__.py       |   0
 .../00_sync/060_claude_code/project/acp.py    | 137 ++++
 .../00_sync/060_claude_code/pyproject.toml    |  25 +
 .../060_claude_code/tests/test_agent.py       | 162 +++++
 .../tests/test_agent_offline.py               | 210 ++++++
 .../00_base/130_claude_code/.dockerignore     |  43 ++
 .../00_base/130_claude_code/Dockerfile        |  43 ++
 .../00_base/130_claude_code/README.md         |  76 +++
 .../00_base/130_claude_code/manifest.yaml     |  58 ++
 .../130_claude_code/project/__init__.py       |   0
 .../00_base/130_claude_code/project/acp.py    | 149 ++++
 .../00_base/130_claude_code/pyproject.toml    |  25 +
 .../130_claude_code/tests/test_agent.py       | 250 +++++++
 .../tests/test_agent_offline.py               | 243 +++++++
 .../10_temporal/140_claude_code/.dockerignore |  43 ++
 .../10_temporal/140_claude_code/Dockerfile    |  46 ++
 .../10_temporal/140_claude_code/README.md     |  76 +++
 .../10_temporal/140_claude_code/manifest.yaml |  62 ++
 .../140_claude_code/project/__init__.py       |   0
 .../140_claude_code/project/acp.py            |  31 +
 .../140_claude_code/project/activities.py     | 139 ++++
 .../140_claude_code/project/run_worker.py     |  41 ++
 .../140_claude_code/project/workflow.py       | 137 ++++
 .../140_claude_code/pyproject.toml            |  27 +
 .../140_claude_code/tests/test_agent.py       | 249 +++++++
 .../tests/test_agent_offline.py               | 230 +++++++
 src/agentex/lib/adk/__init__.py               |   9 +
 .../lib/adk/_modules/_claude_code_sync.py     | 378 +++++++++++
 .../lib/adk/_modules/_claude_code_turn.py     | 161 +++++
 src/agentex/lib/core/harness/types.py         |   5 +-
 tests/lib/adk/test_claude_code_sync.py        | 637 ++++++++++++++++++
 tests/lib/adk/test_claude_code_turn.py        | 283 ++++++++
 .../test_claude_code_conformance.py           | 202 ++++++
 37 files changed, 4396 insertions(+), 1 deletion(-)
 create mode 100644 examples/tutorials/00_sync/060_claude_code/.dockerignore
 create mode 100644 examples/tutorials/00_sync/060_claude_code/Dockerfile
 create mode 100644 examples/tutorials/00_sync/060_claude_code/README.md
 create mode 100644 examples/tutorials/00_sync/060_claude_code/manifest.yaml
 create mode 100644 examples/tutorials/00_sync/060_claude_code/project/__init__.py
 create mode 100644 examples/tutorials/00_sync/060_claude_code/project/acp.py
 create mode 100644 examples/tutorials/00_sync/060_claude_code/pyproject.toml
 create mode 100644 examples/tutorials/00_sync/060_claude_code/tests/test_agent.py
 create mode 100644 examples/tutorials/00_sync/060_claude_code/tests/test_agent_offline.py
 create mode 100644 examples/tutorials/10_async/00_base/130_claude_code/.dockerignore
 create mode 100644 examples/tutorials/10_async/00_base/130_claude_code/Dockerfile
 create mode 100644 examples/tutorials/10_async/00_base/130_claude_code/README.md
 create mode 100644 examples/tutorials/10_async/00_base/130_claude_code/manifest.yaml
 create mode 100644 examples/tutorials/10_async/00_base/130_claude_code/project/__init__.py
 create mode 100644 examples/tutorials/10_async/00_base/130_claude_code/project/acp.py
 create mode 100644 examples/tutorials/10_async/00_base/130_claude_code/pyproject.toml
 create mode 100644 examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent.py
 create mode 100644 examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent_offline.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/.dockerignore
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/Dockerfile
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/README.md
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/manifest.yaml
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/project/__init__.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/project/acp.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/project/activities.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/project/run_worker.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/project/workflow.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/pyproject.toml
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent.py
 create mode 100644 examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent_offline.py
 create mode 100644 src/agentex/lib/adk/_modules/_claude_code_sync.py
 create mode 100644 src/agentex/lib/adk/_modules/_claude_code_turn.py
 create mode 100644 tests/lib/adk/test_claude_code_sync.py
 create mode 100644 tests/lib/adk/test_claude_code_turn.py
 create mode 100644 tests/lib/core/harness/conformance/test_claude_code_conformance.py

diff --git a/examples/tutorials/00_sync/060_claude_code/.dockerignore b/examples/tutorials/00_sync/060_claude_code/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/00_sync/060_claude_code/Dockerfile b/examples/tutorials/00_sync/060_claude_code/Dockerfile
new file mode 100644
index 000000000..ec22d7e0b
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/Dockerfile
@@ -0,0 +1,46 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies including Node.js (required by the claude CLI)
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    nodejs \
+    npm \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+# Install the claude CLI (requires Node.js)
+# NOTE: live runs require ANTHROPIC_API_KEY in the environment.
+RUN npm install -g @anthropic-ai/claude-code || true
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 00_sync/060_claude_code/pyproject.toml /app/060_claude_code/pyproject.toml
+COPY 00_sync/060_claude_code/README.md /app/060_claude_code/README.md
+
+WORKDIR /app/060_claude_code
+
+COPY 00_sync/060_claude_code/project /app/060_claude_code/project
+COPY 00_sync/060_claude_code/tests /app/060_claude_code/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+
+ENV AGENT_NAME=s060-claude-code
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/00_sync/060_claude_code/README.md b/examples/tutorials/00_sync/060_claude_code/README.md
new file mode 100644
index 000000000..e9c724732
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/README.md
@@ -0,0 +1,76 @@
+# Tutorial 060: Sync Claude Code Agent
+
+This tutorial demonstrates how to build a **synchronous** agent that spawns the
+Claude Code CLI as a local subprocess and streams its output through the Agentex
+unified harness surface via ``ClaudeCodeTurn`` and ``UnifiedEmitter``.
+
+## Key Concepts
+
+### ClaudeCodeTurn + UnifiedEmitter
+
+``ClaudeCodeTurn`` wraps ``convert_claude_code_to_agentex_events``, which
+parses the newline-delimited JSON envelopes emitted by
+``claude -p --output-format stream-json``. It implements the ``HarnessTurn``
+protocol: an ``events`` async iterator of canonical ``StreamTaskMessage*``
+objects and a ``usage()`` method (populated once the stream is exhausted).
+
+``UnifiedEmitter.yield_turn(turn)`` is the sync delivery path: it forwards
+events as HTTP yield chunks while tracing as a side effect.
+
+### Local subprocess spawn
+
+The ``_spawn_claude`` function in ``project/acp.py`` uses
+``asyncio.create_subprocess_exec`` to run:
+
+```
+claude -p --output-format stream-json --verbose
+```
+
+The prompt is written to stdin. Stdout is read line by line and fed into
+``ClaudeCodeTurn``. This is purely local -- no Scale sandbox is involved.
+
+Production isolation (Scale sandbox, secret injection, MCP configuration)
+is the golden agent's concern at
+``teams/sgp/agents/golden_agent/project/harness/providers/claude.py``.
+
+### Injectable spawn seam
+
+``_spawn_claude`` is a top-level async generator in ``project/acp.py``.
+Tests monkeypatch it to inject pre-recorded stream-json lines instead of
+spawning the real process, so offline unit tests run without the CLI.
+
+## Files
+
+| File | Description |
+|------|-------------|
+| ``project/acp.py`` | ACP server, ``_spawn_claude`` seam, and message handler |
+| ``tests/test_agent.py`` | Live integration tests (needs CLI + API key) |
+| ``tests/test_agent_offline.py`` | Offline unit tests with injected fake subprocess |
+| ``manifest.yaml`` | Agent configuration |
+
+## Running Locally (live)
+
+Requires the ``claude`` CLI installed and ``ANTHROPIC_API_KEY`` set:
+
+```bash
+npm install -g @anthropic-ai/claude-code
+export ANTHROPIC_API_KEY=sk-ant-...
+agentex agents run
+```
+
+## Running Offline Tests
+
+No CLI or API key needed:
+
+```bash
+uv run pytest tests/test_agent_offline.py -v
+```
+
+## Notes
+
+- Production isolation (sandbox, secrets, MCP) is the golden agent's concern.
+  This tutorial runs the CLI directly to keep the code as simple as possible.
+- Multi-turn session resumption (``claude -r <session_id>``) is out of scope
+  for this tutorial. See the golden agent for that pattern.
+- The ``--verbose`` flag is included to match the golden agent's invocation;
+  it causes the CLI to emit ``stream_event`` triples for incremental streaming.
diff --git a/examples/tutorials/00_sync/060_claude_code/manifest.yaml b/examples/tutorials/00_sync/060_claude_code/manifest.yaml
new file mode 100644
index 000000000..56b9fd9e4
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/manifest.yaml
@@ -0,0 +1,55 @@
+build:
+  context:
+    root: ../../
+    include_paths:
+      - 00_sync/060_claude_code
+      - test_utils
+    dockerfile: 00_sync/060_claude_code/Dockerfile
+    dockerignore: 00_sync/060_claude_code/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: sync
+  name: s060-claude-code
+  description: A sync Claude Code agent streaming the unified harness surface via a local CLI subprocess
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: ANTHROPIC_API_KEY
+      secret_name: anthropic-api-key
+      secret_key: api-key
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "s060-claude-code"
+      description: "A sync Claude Code agent streaming via local CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/00_sync/060_claude_code/project/__init__.py b/examples/tutorials/00_sync/060_claude_code/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/00_sync/060_claude_code/project/acp.py b/examples/tutorials/00_sync/060_claude_code/project/acp.py
new file mode 100644
index 000000000..aad53801a
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/project/acp.py
@@ -0,0 +1,137 @@
+"""ACP handler for the sync Claude Code tutorial.
+
+Spawns ``claude -p --output-format stream-json --verbose`` as a LOCAL
+asyncio subprocess (no Scale sandbox -- that is the golden agent's
+production concern). Stdout lines are fed into ``ClaudeCodeTurn``, which
+wraps ``convert_claude_code_to_agentex_events``. Events are delivered via
+``UnifiedEmitter.yield_turn``, the sync HTTP yield path.
+
+Live runs require the ``claude`` CLI to be installed and an
+ANTHROPIC_API_KEY (or equivalent credential) to be in the environment.
+For offline testing, see ``tests/test_agent_offline.py``, which injects a
+fake subprocess.
+"""
+
+from __future__ import annotations
+
+import os
+import asyncio
+from typing import AsyncIterator, AsyncGenerator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.types.acp import SendMessageParams
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.types.task_message_update import TaskMessageUpdate
+from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(acp_type="sync")
+
+
+async def _spawn_claude(prompt: str) -> AsyncIterator[str]:
+    """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines.
+
+    This is a seam: tests replace it with a fake async iterator of
+    pre-recorded lines so no real CLI invocation is needed offline.
+    """
+    proc = await asyncio.create_subprocess_exec(
+        "claude",
+        "-p",
+        "--output-format",
+        "stream-json",
+        "--verbose",
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    assert proc.stdout is not None
+    assert proc.stdin is not None
+
+    proc.stdin.write(prompt.encode())
+    proc.stdin.close()
+
+    # Drain stderr concurrently. With --verbose, Claude Code can write enough to
+    # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks
+    # on its stderr write while we block reading stdout — a deadlock. A
+    # background task keeps stderr flowing so stdout never stalls.
+    async def _drain_stderr() -> None:
+        assert proc.stderr is not None
+        async for _ in proc.stderr:
+            pass
+
+    stderr_task = asyncio.create_task(_drain_stderr())
+
+    try:
+        buffer = ""
+        async for chunk in proc.stdout:
+            buffer += chunk.decode("utf-8", errors="replace")
+            while "\n" in buffer:
+                line, buffer = buffer.split("\n", 1)
+                line = line.strip()
+                if line:
+                    yield line
+
+        if buffer.strip():
+            yield buffer.strip()
+
+        await proc.wait()
+    finally:
+        # Release the subprocess and stderr drain task even if the consumer
+        # abandons the generator early (task cancellation / client disconnect):
+        # cancel the drain task and terminate+reap the process if it is still
+        # running, so neither is leaked.
+        stderr_task.cancel()
+        try:
+            await stderr_task
+        except asyncio.CancelledError:
+            pass
+        if proc.returncode is None:
+            try:
+                proc.terminate()
+            except ProcessLookupError:
+                pass
+            await proc.wait()
+
+
+@acp.on_message_send
+async def handle_message_send(
+    params: SendMessageParams,
+) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]:
+    """Handle an incoming message: run Claude Code locally and stream events."""
+    task_id = params.task.id
+    prompt = params.content.content
+    logger.info("Processing message for task %s", task_id)
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": prompt},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        turn = ClaudeCodeTurn(_spawn_claude(prompt))
+        async for event in emitter.yield_turn(turn):
+            yield event
diff --git a/examples/tutorials/00_sync/060_claude_code/pyproject.toml b/examples/tutorials/00_sync/060_claude_code/pyproject.toml
new file mode 100644
index 000000000..e5c1c4ea6
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/pyproject.toml
@@ -0,0 +1,25 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "s060-claude-code"
+version = "0.1.0"
+description = "A sync Claude Code agent streaming the unified harness surface via a local CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "python-dotenv>=1.0,<2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
diff --git a/examples/tutorials/00_sync/060_claude_code/tests/test_agent.py b/examples/tutorials/00_sync/060_claude_code/tests/test_agent.py
new file mode 100644
index 000000000..954a520f3
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/tests/test_agent.py
@@ -0,0 +1,162 @@
+"""Tests for the sync Claude Code tutorial agent.
+
+LIVE tests (``TestClaudeCodeLive``):
+  - Require the ``claude`` CLI on PATH and ``ANTHROPIC_API_KEY`` set.
+  - Run the full agent end-to-end against a live Agentex server.
+  - Skipped automatically when ``CLAUDE_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestClaudeCodeOffline``):
+  - Inject a fake async iterator of pre-recorded stream-json lines.
+  - Assert the ``ClaudeCodeTurn`` + ``UnifiedEmitter`` pipeline yields events,
+    populates usage, and satisfies the ``HarnessTurn`` protocol.
+  - Always run -- no CLI or API key needed.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import AsyncIterator
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Recorded stream-json fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-offline-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 10, "output_tokens": 5},
+            "cost_usd": 0.0001,
+            "duration_ms": 250,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    """Async iterator of pre-recorded stream-json lines (no subprocess)."""
+    for line in lines:
+        yield line
+
+
+# ---------------------------------------------------------------------------
+# Offline tests (always run -- no CLI or API key needed)
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeOffline:
+    """Unit tests that run without a real claude CLI or network."""
+
+    @pytest.mark.asyncio
+    async def test_yields_stream_events(self):
+        """ClaudeCodeTurn drives UnifiedEmitter and yields StreamTaskMessage* events."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message_update import StreamTaskMessageStart
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert len(events) > 0, "No events yielded"
+        assert any(isinstance(e, StreamTaskMessageStart) for e in events)
+
+    @pytest.mark.asyncio
+    async def test_stream_task_message_done_present(self):
+        """StreamTaskMessageDone must appear after stream exhaustion."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message_update import StreamTaskMessageDone
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert any(isinstance(e, StreamTaskMessageDone) for e in events), (
+            "Expected at least one StreamTaskMessageDone event"
+        )
+
+    @pytest.mark.asyncio
+    async def test_usage_populated_after_stream_exhausted(self):
+        """ClaudeCodeTurn.usage() returns correct tokens after stream is exhausted."""
+        from agentex.lib.adk import ClaudeCodeTurn
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        _ = [e async for e in turn.events]
+        usage = turn.usage()
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 5
+        assert usage.num_llm_calls == 1
+
+    @pytest.mark.asyncio
+    async def test_protocol_compliance(self):
+        """ClaudeCodeTurn satisfies the HarnessTurn protocol."""
+        from agentex.lib.adk import ClaudeCodeTurn
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        assert hasattr(turn, "events"), "ClaudeCodeTurn missing .events"
+        assert hasattr(turn, "usage"), "ClaudeCodeTurn missing .usage()"
+
+
+# ---------------------------------------------------------------------------
+# Live tests (skipped unless CLAUDE_LIVE_TESTS=1)
+# ---------------------------------------------------------------------------
+
+pytestmark_live = pytest.mark.skipif(
+    not os.environ.get("CLAUDE_LIVE_TESTS"),
+    reason="Set CLAUDE_LIVE_TESTS=1 and ensure the `claude` CLI + ANTHROPIC_API_KEY are available",
+)
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "s060-claude-code")
+
+
+@pytestmark_live
+class TestClaudeCodeLive:
+    """Live streaming tests -- needs the claude CLI + ANTHROPIC_API_KEY."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    @pytest.fixture
+    def agent_name(self):
+        return AGENT_NAME
+
+    def test_stream_simple_message(self, client, agent_name: str):
+        """Stream a simple prompt through the local Claude Code subprocess."""
+        from test_utils.sync import collect_streaming_response
+
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendMessageRequest
+
+        stream = client.agents.send_message_stream(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="Reply with exactly three words: hello from claude",
+                    type="text",
+                )
+            ),
+        )
+        aggregated_content, chunks = collect_streaming_response(stream)
+        assert aggregated_content is not None
+        assert len(chunks) >= 1
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/00_sync/060_claude_code/tests/test_agent_offline.py b/examples/tutorials/00_sync/060_claude_code/tests/test_agent_offline.py
new file mode 100644
index 000000000..23ac52a57
--- /dev/null
+++ b/examples/tutorials/00_sync/060_claude_code/tests/test_agent_offline.py
@@ -0,0 +1,210 @@
+"""Offline unit tests for the sync Claude Code tutorial agent.
+
+These tests do NOT require the ``claude`` CLI or an ANTHROPIC_API_KEY.
+They inject a fake async iterator of pre-recorded stream-json lines in
+place of the real subprocess spawn, and a fake streaming backend in place
+of the real Redis/AGP layer, then assert that the handler correctly drives
+the unified surface (``UnifiedEmitter.yield_turn``).
+
+The injection seam is the ``_spawn_claude`` function in ``project/acp.py``.
+Tests monkeypatch it with a coroutine that returns a pre-recorded async
+iterator, so the handler code runs in full without any subprocess.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import AsyncIterator
+
+import pytest
+
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.types.task_message_update import (
+    StreamTaskMessageStart,
+)
+
+# ---------------------------------------------------------------------------
+# Recorded stream-json fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 10, "output_tokens": 5},
+            "cost_usd": 0.0001,
+            "duration_ms": 250,
+            "num_turns": 1,
+        }
+    ),
+]
+
+_TOOL_CALL_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-2"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_use",
+                        "id": "tool_abc",
+                        "name": "Bash",
+                        "input": {"command": "echo hello"},
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "user",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "tool_abc",
+                        "content": "hello\n",
+                        "is_error": False,
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Done."}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 20, "output_tokens": 8},
+            "cost_usd": 0.0002,
+            "duration_ms": 400,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    for line in lines:
+        yield line
+
+
+async def _collect_yield_turn(lines: list[str]) -> list:
+    """Run a ClaudeCodeTurn through UnifiedEmitter.yield_turn and collect events."""
+    turn = ClaudeCodeTurn(_fake_lines(lines))
+    emitter = UnifiedEmitter(task_id="t1", trace_id=None, parent_span_id=None)
+    return [e async for e in emitter.yield_turn(turn)]
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_text_only_produces_start_and_done():
+    events = await _collect_yield_turn(_TEXT_ONLY_LINES)
+    types = [type(e).__name__ for e in events]
+    assert "StreamTaskMessageStart" in types
+    assert "StreamTaskMessageDone" in types
+
+
+@pytest.mark.asyncio
+async def test_text_only_content():
+    events = await _collect_yield_turn(_TEXT_ONLY_LINES)
+    starts = [e for e in events if isinstance(e, StreamTaskMessageStart)]
+    assert len(starts) == 1
+    assert starts[0].content.type == "text"
+
+
+@pytest.mark.asyncio
+async def test_usage_is_populated_after_stream():
+    turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+    _ = [e async for e in turn.events]
+    usage = turn.usage()
+    assert usage.input_tokens == 10
+    assert usage.output_tokens == 5
+    assert usage.cost_usd == pytest.approx(0.0001, rel=1e-4)
+    assert usage.num_llm_calls == 1
+
+
+@pytest.mark.asyncio
+async def test_tool_call_produces_tool_request_and_response():
+    events = await _collect_yield_turn(_TOOL_CALL_LINES)
+    content_types = {
+        getattr(e, "content", None) and getattr(e.content, "type", None) for e in events if hasattr(e, "content")
+    }
+    assert "tool_request" in content_types
+    assert "tool_response" in content_types
+
+
+@pytest.mark.asyncio
+async def test_tool_call_has_one_text_block():
+    """The tool_use block is not text; only 'Done.' is the text block."""
+    events = await _collect_yield_turn(_TOOL_CALL_LINES)
+    text_starts = [
+        e for e in events if isinstance(e, StreamTaskMessageStart) and getattr(e.content, "type", None) == "text"
+    ]
+    assert len(text_starts) == 1
+
+
+@pytest.mark.asyncio
+async def test_empty_lines_are_skipped():
+    """Inserting blank lines in the stream must not crash the parser."""
+    lines_with_blanks = ["", "  "] + _TEXT_ONLY_LINES + [""]
+    events = await _collect_yield_turn(lines_with_blanks)
+    assert any(isinstance(e, StreamTaskMessageStart) for e in events)
+
+
+@pytest.mark.asyncio
+async def test_spawn_seam_concept():
+    """Demonstrate the injectable spawn seam pattern used in project/acp.py.
+
+    The ``_spawn_claude`` function in ``project/acp.py`` is a top-level async
+    generator. Production code calls it like::
+
+        turn = ClaudeCodeTurn(_spawn_claude(prompt))
+
+    In tests, a replacement function is injected (e.g. via monkeypatch) to
+    return pre-recorded lines. This test proves the pattern works end-to-end
+    without importing the full ACP module (which has module-level env-var
+    checks that only pass in a running agent environment).
+    """
+    recorded_lines = _TEXT_ONLY_LINES
+
+    async def _fake_spawn(prompt: str) -> AsyncIterator[str]:  # noqa: ARG001
+        """Drop-in replacement for _spawn_claude."""
+        for line in recorded_lines:
+            yield line
+
+    called_with: list[str] = []
+
+    async def _wrapped_spawn(prompt: str) -> AsyncIterator[str]:
+        called_with.append(prompt)
+        async for line in _fake_spawn(prompt):
+            yield line
+
+    turn = ClaudeCodeTurn(_wrapped_spawn("test prompt"))
+    emitter = UnifiedEmitter(task_id="t2", trace_id=None, parent_span_id=None)
+    events = [e async for e in emitter.yield_turn(turn)]
+
+    assert called_with == ["test prompt"]
+    assert any(isinstance(e, StreamTaskMessageStart) for e in events)
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/.dockerignore b/examples/tutorials/10_async/00_base/130_claude_code/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/Dockerfile b/examples/tutorials/10_async/00_base/130_claude_code/Dockerfile
new file mode 100644
index 000000000..e36b9e56d
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/Dockerfile
@@ -0,0 +1,43 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    nodejs \
+    npm \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+RUN npm install -g @anthropic-ai/claude-code || true
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/00_base/130_claude_code/pyproject.toml /app/130_claude_code/pyproject.toml
+COPY 10_async/00_base/130_claude_code/README.md /app/130_claude_code/README.md
+
+WORKDIR /app/130_claude_code
+
+COPY 10_async/00_base/130_claude_code/project /app/130_claude_code/project
+COPY 10_async/00_base/130_claude_code/tests /app/130_claude_code/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+
+ENV AGENT_NAME=ab130-claude-code
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/README.md b/examples/tutorials/10_async/00_base/130_claude_code/README.md
new file mode 100644
index 000000000..695207c57
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/README.md
@@ -0,0 +1,76 @@
+# Tutorial 130 (async/base): Async Claude Code Agent
+
+This tutorial demonstrates how to build an **async (non-Temporal)** agent that
+spawns the Claude Code CLI as a local subprocess and delivers its output through
+the Agentex unified harness surface via ``ClaudeCodeTurn`` and
+``UnifiedEmitter.auto_send_turn``.
+
+## Key Concepts
+
+### Async delivery path
+
+Unlike the sync tutorial (060), this agent uses the async ACP model. The
+``@acp.on_task_event_send`` handler does not return a generator -- instead,
+``UnifiedEmitter.auto_send_turn(turn)`` pushes events to the task's Redis
+stream in real time and returns a ``TurnResult`` when the turn is complete.
+The UI polls or streams that Redis channel independently.
+
+### ClaudeCodeTurn + UnifiedEmitter
+
+Same tap as the sync tutorial:
+- ``ClaudeCodeTurn`` wraps ``convert_claude_code_to_agentex_events``.
+- ``UnifiedEmitter`` wires trace context + chosen delivery.
+- ``auto_send_turn`` is the async push path.
+
+### Local subprocess spawn
+
+``_spawn_claude`` in ``project/acp.py`` uses ``asyncio.create_subprocess_exec``
+to run:
+
+```
+claude -p --output-format stream-json --verbose
+```
+
+The prompt is written to stdin. Stdout is read line by line.
+
+Production isolation (Scale sandbox, secret injection, MCP configuration)
+is the golden agent's concern at
+``teams/sgp/agents/golden_agent/project/harness/providers/claude.py``.
+
+### Injectable spawn seam
+
+``_spawn_claude`` is a top-level async generator. Tests monkeypatch it to
+inject pre-recorded stream-json lines so offline unit tests run without the CLI.
+
+## Files
+
+| File | Description |
+|------|-------------|
+| ``project/acp.py`` | ACP server, ``_spawn_claude`` seam, and event handler |
+| ``tests/test_agent.py`` | Live integration tests (needs CLI + API key) |
+| ``tests/test_agent_offline.py`` | Offline unit tests with injected fake subprocess |
+| ``manifest.yaml`` | Agent configuration |
+
+## Running Locally (live)
+
+Requires the ``claude`` CLI installed and ``ANTHROPIC_API_KEY`` set:
+
+```bash
+npm install -g @anthropic-ai/claude-code
+export ANTHROPIC_API_KEY=sk-ant-...
+agentex agents run
+```
+
+## Running Offline Tests
+
+No CLI or API key needed:
+
+```bash
+uv run pytest tests/test_agent_offline.py -v
+```
+
+## Notes
+
+- Production isolation (sandbox, secrets, MCP) is the golden agent's concern.
+- For multi-turn memory, persist the Claude Code session_id from the
+  ``result`` envelope and pass it to ``claude -r <session_id>`` on the next turn.
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/manifest.yaml b/examples/tutorials/10_async/00_base/130_claude_code/manifest.yaml
new file mode 100644
index 000000000..7d74de7c6
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/00_base/130_claude_code
+      - test_utils
+    dockerfile: 10_async/00_base/130_claude_code/Dockerfile
+    dockerignore: 10_async/00_base/130_claude_code/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: async
+  name: ab130-claude-code
+  description: An async Claude Code agent streaming the unified harness surface via a local CLI subprocess
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: ANTHROPIC_API_KEY
+      secret_name: anthropic-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "ab130-claude-code"
+      description: "An async Claude Code agent streaming via local CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/project/__init__.py b/examples/tutorials/10_async/00_base/130_claude_code/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/project/acp.py b/examples/tutorials/10_async/00_base/130_claude_code/project/acp.py
new file mode 100644
index 000000000..b6681f6a8
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/project/acp.py
@@ -0,0 +1,149 @@
+"""ACP handler for the async Claude Code tutorial.
+
+Spawns ``claude -p --output-format stream-json --verbose`` as a LOCAL
+asyncio subprocess (no Scale sandbox -- that is the golden agent's
+production concern). Stdout lines are fed into ``ClaudeCodeTurn``. Events
+are delivered via ``UnifiedEmitter.auto_send_turn``, the async Redis push
+path.
+
+Live runs require the ``claude`` CLI to be installed and an
+ANTHROPIC_API_KEY (or equivalent credential) in the environment.
+For offline testing, see ``tests/test_agent_offline.py``.
+"""
+
+from __future__ import annotations
+
+import os
+import asyncio
+from typing import AsyncIterator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.types.fastacp import AsyncACPConfig
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(
+    acp_type="async",
+    config=AsyncACPConfig(type="base"),
+)
+
+
+async def _spawn_claude(prompt: str) -> AsyncIterator[str]:
+    """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines.
+
+    Injectable seam: tests monkeypatch this with a fake async iterator of
+    pre-recorded lines so no real CLI invocation is needed offline.
+    """
+    proc = await asyncio.create_subprocess_exec(
+        "claude",
+        "-p",
+        "--output-format",
+        "stream-json",
+        "--verbose",
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    assert proc.stdout is not None
+    assert proc.stdin is not None
+
+    proc.stdin.write(prompt.encode())
+    proc.stdin.close()
+
+    # Drain stderr concurrently. With --verbose, Claude Code can write enough to
+    # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks
+    # on its stderr write while we block reading stdout — a deadlock. A
+    # background task keeps stderr flowing so stdout never stalls.
+    async def _drain_stderr() -> None:
+        assert proc.stderr is not None
+        async for _ in proc.stderr:
+            pass
+
+    stderr_task = asyncio.create_task(_drain_stderr())
+
+    try:
+        buffer = ""
+        async for chunk in proc.stdout:
+            buffer += chunk.decode("utf-8", errors="replace")
+            while "\n" in buffer:
+                line, buffer = buffer.split("\n", 1)
+                line = line.strip()
+                if line:
+                    yield line
+
+        if buffer.strip():
+            yield buffer.strip()
+
+        await proc.wait()
+    finally:
+        # Release the subprocess and stderr drain task even if the consumer
+        # abandons the generator early (task cancellation / client disconnect):
+        # cancel the drain task and terminate+reap the process if it is still
+        # running, so neither is leaked.
+        stderr_task.cancel()
+        try:
+            await stderr_task
+        except asyncio.CancelledError:
+            pass
+        if proc.returncode is None:
+            try:
+                proc.terminate()
+            except ProcessLookupError:
+                pass
+            await proc.wait()
+
+
+@acp.on_task_create
+async def handle_task_create(params: CreateTaskParams):
+    logger.info("Task created: %s", params.task.id)
+
+
+@acp.on_task_event_send
+async def handle_task_event_send(params: SendEventParams):
+    """Handle a user message: spawn Claude Code locally and push events to the task stream."""
+    task_id = params.task.id
+    prompt = params.event.content.content
+    logger.info("Processing message for task %s", task_id)
+
+    await adk.messages.create(task_id=task_id, content=params.event.content)
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": prompt},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        turn = ClaudeCodeTurn(_spawn_claude(prompt))
+        result = await emitter.auto_send_turn(turn)
+        if turn_span:
+            turn_span.output = {"final_text": result.final_text}
+
+
+@acp.on_task_cancel
+async def handle_task_canceled(params: CancelTaskParams):
+    logger.info("Task canceled: %s", params.task.id)
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/pyproject.toml b/examples/tutorials/10_async/00_base/130_claude_code/pyproject.toml
new file mode 100644
index 000000000..66c3cdaf3
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/pyproject.toml
@@ -0,0 +1,25 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "ab130-claude-code"
+version = "0.1.0"
+description = "An async Claude Code agent streaming the unified harness surface via a local CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "python-dotenv>=1.0,<2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent.py b/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent.py
new file mode 100644
index 000000000..ee254da23
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent.py
@@ -0,0 +1,250 @@
+"""Tests for the async Claude Code tutorial agent.
+
+LIVE tests (``TestClaudeCodeLive``):
+  - Require the ``claude`` CLI on PATH and ``ANTHROPIC_API_KEY`` set.
+  - Run the full agent end-to-end against a live Agentex server.
+  - Skipped automatically when ``CLAUDE_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestClaudeCodeOffline``):
+  - Inject a fake async iterator of pre-recorded stream-json lines.
+  - Assert the ``ClaudeCodeTurn`` + ``UnifiedEmitter`` pipeline drives
+    ``auto_send_turn``, populates usage, and satisfies the ``HarnessTurn``
+    protocol.
+  - Always run -- no CLI or API key needed.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import AsyncIterator
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+
+# ---------------------------------------------------------------------------
+# Recorded stream-json fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-offline-async-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from async Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 12, "output_tokens": 6},
+            "cost_usd": 0.0001,
+            "duration_ms": 300,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    """Async iterator of pre-recorded stream-json lines (no subprocess)."""
+    for line in lines:
+        yield line
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(id="msg-1", task_id="task-offline", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self):
+        self.sink: list = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):  # noqa: ARG002
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Offline tests (always run -- no CLI or API key needed)
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeOffline:
+    """Unit tests that run without a real claude CLI or network."""
+
+    @pytest.mark.asyncio
+    async def test_auto_send_text_only_opens_and_closes_context(self):
+        """auto_send_turn opens and closes exactly one streaming context."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="offline-task",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        result = await emitter.auto_send_turn(turn)
+
+        opened = [s for s in fake_streaming.sink if s[0] == "open"]
+        closed = [s for s in fake_streaming.sink if s[0] == "close"]
+        assert len(opened) == 1
+        assert len(closed) == 1
+        assert opened[0][1] == "text"
+
+    @pytest.mark.asyncio
+    async def test_auto_send_populates_final_text(self):
+        """auto_send_turn result carries the agent's reply text."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="offline-task",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        result = await emitter.auto_send_turn(turn)
+        assert "Hello from async Claude Code" in result.final_text
+
+    @pytest.mark.asyncio
+    async def test_usage_populated_after_stream_exhausted(self):
+        """Usage is populated after the events stream is exhausted."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="t",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        await emitter.auto_send_turn(turn)
+        usage = turn.usage()
+        assert usage.input_tokens == 12
+        assert usage.output_tokens == 6
+        assert usage.num_llm_calls == 1
+
+    @pytest.mark.asyncio
+    async def test_stream_task_message_done_present(self):
+        """StreamTaskMessageDone must appear via yield_turn on a ClaudeCodeTurn."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message_update import StreamTaskMessageDone
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert any(isinstance(e, StreamTaskMessageDone) for e in events), (
+            "Expected at least one StreamTaskMessageDone event"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Live tests (skipped unless CLAUDE_LIVE_TESTS=1)
+# ---------------------------------------------------------------------------
+
+pytestmark_live = pytest.mark.skipif(
+    not os.environ.get("CLAUDE_LIVE_TESTS"),
+    reason="Set CLAUDE_LIVE_TESTS=1 and ensure the `claude` CLI + ANTHROPIC_API_KEY are available",
+)
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "ab130-claude-code")
+
+
+@pytestmark_live
+class TestClaudeCodeLive:
+    """Live async tests -- needs the claude CLI + ANTHROPIC_API_KEY."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    @pytest.fixture
+    def agent_name(self):
+        return AGENT_NAME
+
+    @pytest.fixture
+    def agent_id(self, client, agent_name):
+        agents = client.agents.list()
+        for agent in agents:
+            if agent.name == agent_name:
+                return agent.id
+        raise ValueError(f"Agent {agent_name!r} not found.")
+
+    def test_send_simple_message(self, client, agent_id: str):
+        """Create a task, send a message, and poll until a response appears."""
+        import time
+        import uuid
+
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendEventRequest, ParamsCreateTaskRequest
+
+        task = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)).result
+        assert task is not None
+        task_id = task.id
+
+        client.agents.send_event(
+            agent_id=agent_id,
+            params=ParamsSendEventRequest(
+                task_id=task_id,
+                content=TextContentParam(
+                    author="user",
+                    content="Reply with exactly three words: hello from claude",
+                    type="text",
+                ),
+            ),
+        )
+
+        deadline = time.monotonic() + 60
+        while time.monotonic() < deadline:
+            msgs = client.messages.list(task_id=task_id)
+            agent_msgs = [m for m in msgs if getattr(m.content, "author", None) == "agent"]
+            if agent_msgs:
+                assert len(agent_msgs) >= 1
+                return
+            time.sleep(2)
+
+        raise AssertionError("No agent response received within 60 s")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent_offline.py b/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent_offline.py
new file mode 100644
index 000000000..ac48474ee
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/130_claude_code/tests/test_agent_offline.py
@@ -0,0 +1,243 @@
+"""Offline unit tests for the async Claude Code tutorial agent.
+
+These tests do NOT require the ``claude`` CLI or an ANTHROPIC_API_KEY.
+They inject a fake async iterator of pre-recorded stream-json lines in
+place of the real subprocess spawn and a fake streaming backend, then
+assert that the handler drives ``UnifiedEmitter.auto_send_turn`` correctly.
+
+The injection seam is the ``_spawn_claude`` function in ``project/acp.py``.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import AsyncIterator
+
+import pytest
+
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.types.task_message import TaskMessage
+
+# ---------------------------------------------------------------------------
+# Recorded fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from async Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 12, "output_tokens": 6},
+            "cost_usd": 0.0001,
+            "duration_ms": 300,
+            "num_turns": 1,
+        }
+    ),
+]
+
+_TOOL_CALL_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-2"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_use",
+                        "id": "tool_xyz",
+                        "name": "Read",
+                        "input": {"file_path": "/tmp/foo.txt"},
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "user",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "tool_xyz",
+                        "content": "file contents",
+                        "is_error": False,
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Read the file."}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "usage": {"input_tokens": 25, "output_tokens": 10},
+            "cost_usd": 0.0003,
+            "duration_ms": 500,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(id="msg-1", task_id="task-offline", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self):
+        self.sink: list = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):  # noqa: ARG002
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    for line in lines:
+        yield line
+
+
+async def _run_auto_send(lines: list[str]):
+    """Drive ClaudeCodeTurn through auto_send_turn with a fake streaming backend."""
+    fake_streaming = _FakeStreaming()
+    turn = ClaudeCodeTurn(_fake_lines(lines))
+    emitter = UnifiedEmitter(
+        task_id="offline-task",
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+    return result, fake_streaming.sink
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_auto_send_text_only_opens_and_closes_context():
+    result, sink = await _run_auto_send(_TEXT_ONLY_LINES)
+    opened = [s for s in sink if s[0] == "open"]
+    closed = [s for s in sink if s[0] == "close"]
+    assert len(opened) == 1
+    assert len(closed) == 1
+    assert opened[0][1] == "text"
+
+
+@pytest.mark.asyncio
+async def test_auto_send_populates_final_text():
+    result, _ = await _run_auto_send(_TEXT_ONLY_LINES)
+    assert "Hello from async Claude Code" in result.final_text
+
+
+@pytest.mark.asyncio
+async def test_auto_send_usage_is_populated():
+    """Usage is populated after the events stream is exhausted.
+
+    UnifiedEmitter.auto_send_turn evaluates turn.usage() eagerly (before
+    the events are consumed) so the TurnResult.usage reflects a pre-exhaust
+    snapshot. Test usage directly from the turn after auto_send_turn completes
+    instead -- the result envelope is populated by the generator being consumed
+    inside auto_send.
+    """
+    turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+    fake_streaming = _FakeStreaming()
+    emitter = UnifiedEmitter(
+        task_id="t",
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    await emitter.auto_send_turn(turn)
+    # After auto_send_turn, the events generator is exhausted and
+    # ClaudeCodeTurn._on_result has been called with the result envelope.
+    usage = turn.usage()
+    assert usage.input_tokens == 12
+    assert usage.output_tokens == 6
+    assert usage.num_llm_calls == 1
+
+
+@pytest.mark.asyncio
+async def test_auto_send_tool_call_opens_two_contexts():
+    result, sink = await _run_auto_send(_TOOL_CALL_LINES)
+    opened = [s for s in sink if s[0] == "open"]
+    content_types = [s[1] for s in opened]
+    assert "tool_request" in content_types
+    assert "text" in content_types
+
+
+@pytest.mark.asyncio
+async def test_spawn_seam_concept():
+    """Demonstrate the injectable spawn seam pattern used in project/acp.py.
+
+    The ``_spawn_claude`` function is a top-level async generator. A drop-in
+    replacement can be injected (e.g. via monkeypatch) to supply pre-recorded
+    lines without spawning the real CLI. This test proves the pattern works
+    end-to-end without importing the full ACP module.
+    """
+    called: list[str] = []
+
+    async def _fake_spawn(prompt: str) -> AsyncIterator[str]:
+        called.append(prompt)
+        for line in _TEXT_ONLY_LINES:
+            yield line
+
+    fake_streaming = _FakeStreaming()
+    turn = ClaudeCodeTurn(_fake_spawn("ping"))
+    emitter = UnifiedEmitter(
+        task_id="t",
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+
+    assert called == ["ping"]
+    assert "Hello from async Claude Code" in result.final_text
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/.dockerignore b/examples/tutorials/10_async/10_temporal/140_claude_code/.dockerignore
new file mode 100644
index 000000000..c49489471
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/Dockerfile b/examples/tutorials/10_async/10_temporal/140_claude_code/Dockerfile
new file mode 100644
index 000000000..c909ee6c7
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/Dockerfile
@@ -0,0 +1,46 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    nodejs \
+    npm \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+RUN npm install -g @anthropic-ai/claude-code || true
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/10_temporal/140_claude_code/pyproject.toml /app/140_claude_code/pyproject.toml
+COPY 10_async/10_temporal/140_claude_code/README.md /app/140_claude_code/README.md
+
+WORKDIR /app/140_claude_code
+
+COPY 10_async/10_temporal/140_claude_code/project /app/140_claude_code/project
+COPY 10_async/10_temporal/140_claude_code/tests /app/140_claude_code/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+
+ENV AGENT_NAME=at140-claude-code
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
+
+# When deploying the worker, replace the CMD with:
+# CMD ["python", "project/run_worker.py"]
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/README.md b/examples/tutorials/10_async/10_temporal/140_claude_code/README.md
new file mode 100644
index 000000000..61cc94183
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/README.md
@@ -0,0 +1,76 @@
+# Tutorial 140 (async/temporal): Temporal Claude Code Agent
+
+This tutorial demonstrates how to build a **Temporal-backed** agent that
+spawns the Claude Code CLI as a local subprocess and delivers its output
+through the Agentex unified harness surface via ``ClaudeCodeTurn`` and
+``UnifiedEmitter.auto_send_turn``, with Temporal providing durable execution
+and crash recovery.
+
+## Key Concepts
+
+### Temporal + ClaudeCodeTurn
+
+The Temporal workflow (``project/workflow.py``) holds state durably. Each user
+message arrives as a signal (``on_task_event_send``), spawns the Claude Code
+CLI locally, wraps the stdout line stream in ``ClaudeCodeTurn``, and pushes
+events to the task's Redis stream via ``UnifiedEmitter.auto_send_turn``.
+
+``workflow.now()`` is passed as ``created_at`` so message timestamps are
+deterministic under Temporal replay.
+
+### Multi-turn session resume
+
+The workflow persists the Claude Code ``session_id`` from the ``result``
+envelope. On the next turn, ``-r <session_id>`` is passed to the CLI to
+resume the conversation. Temporal's durable state ensures the session_id
+survives worker crashes.
+
+### Note on subprocess in workflow code
+
+For simplicity, this tutorial spawns the subprocess directly inside the
+workflow signal handler. For production use, move the spawn into a custom
+Temporal activity so each subprocess invocation gets independent retry and
+timeout guarantees. See
+``examples/tutorials/10_async/10_temporal/030_custom_activities/`` for
+that pattern.
+
+### Injectable spawn seam
+
+``_spawn_claude`` in ``project/workflow.py`` is a top-level async generator.
+Tests monkeypatch it to inject pre-recorded stream-json lines so offline
+unit tests run without the CLI.
+
+## Files
+
+| File | Description |
+|------|-------------|
+| ``project/acp.py`` | Thin ACP server; wires Temporal (no handlers) |
+| ``project/workflow.py`` | Temporal workflow + ``_spawn_claude`` seam |
+| ``project/run_worker.py`` | Temporal worker entry point |
+| ``tests/test_agent.py`` | Live integration tests (needs CLI + Temporal + API key) |
+| ``tests/test_agent_offline.py`` | Offline unit tests with injected fake subprocess |
+| ``manifest.yaml`` | Agent configuration |
+
+## Running Locally (live)
+
+Requires Temporal server, the ``claude`` CLI, and ``ANTHROPIC_API_KEY``:
+
+```bash
+npm install -g @anthropic-ai/claude-code
+export ANTHROPIC_API_KEY=sk-ant-...
+agentex agents run
+```
+
+## Running Offline Tests
+
+No CLI, Temporal, or API key needed:
+
+```bash
+uv run pytest tests/test_agent_offline.py -v
+```
+
+## Notes
+
+- Production isolation (sandbox, secrets, MCP) is the golden agent's concern.
+- The subprocess spawn should be moved to a custom activity in production.
+- The ``--verbose`` flag is included to match the golden agent's invocation.
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/manifest.yaml b/examples/tutorials/10_async/10_temporal/140_claude_code/manifest.yaml
new file mode 100644
index 000000000..9328b1713
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/manifest.yaml
@@ -0,0 +1,62 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/10_temporal/140_claude_code
+      - test_utils
+    dockerfile: 10_async/10_temporal/140_claude_code/Dockerfile
+    dockerignore: 10_async/10_temporal/140_claude_code/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+    worker: project/run_worker.py
+
+agent:
+  acp_type: async
+  name: at140-claude-code
+  description: A Temporal-backed Claude Code agent streaming the unified harness surface via a local CLI subprocess
+
+  temporal:
+    enabled: true
+    workflows:
+      - name: at140-claude-code
+        queue_name: at140_claude_code_queue
+
+  credentials:
+    - env_var_name: ANTHROPIC_API_KEY
+      secret_name: anthropic-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "at140-claude-code"
+      description: "A Temporal-backed Claude Code agent streaming via local CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/project/__init__.py b/examples/tutorials/10_async/10_temporal/140_claude_code/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/project/acp.py b/examples/tutorials/10_async/10_temporal/140_claude_code/project/acp.py
new file mode 100644
index 000000000..07258f6d8
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/project/acp.py
@@ -0,0 +1,31 @@
+"""ACP server for the Temporal Claude Code tutorial.
+
+This file is intentionally thin. When ``acp_type="async"`` is combined
+with ``TemporalACPConfig``, FastACP auto-wires:
+
+    HTTP task/create       -> @workflow.run on the workflow class
+    HTTP task/event/send   -> @workflow.signal(SignalName.RECEIVE_EVENT)
+    HTTP task/cancel       -> workflow cancellation via the Temporal client
+
+The actual agent code lives in ``project/workflow.py`` and is executed by
+the Temporal worker (``project/run_worker.py``), not by this HTTP process.
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from agentex.lib.types.fastacp import TemporalACPConfig
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+
+acp = FastACP.create(
+    acp_type="async",
+    config=TemporalACPConfig(
+        type="temporal",
+        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
+    ),
+)
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/project/activities.py b/examples/tutorials/10_async/10_temporal/140_claude_code/project/activities.py
new file mode 100644
index 000000000..dcba0f9a7
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/project/activities.py
@@ -0,0 +1,139 @@
+"""Temporal activity for the Claude Code tutorial.
+
+Subprocess spawning (and any other I/O) must run inside a Temporal *activity*,
+not in workflow code. Temporal runs workflow + signal-handler bodies on a
+deterministic sandbox event loop that does not implement ``subprocess_exec``
+(or threads / sockets), so spawning the CLI directly in the signal handler
+raises ``NotImplementedError``. This activity runs the Claude Code CLI, drives
+the ``ClaudeCodeTurn`` through ``UnifiedEmitter.auto_send_turn`` (the async
+Redis push path), and returns the turn result to the workflow.
+
+The ``_spawn_claude`` async generator is an injectable seam: offline tests
+provide a fake that yields pre-recorded stdout lines so no real CLI runs.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Any, AsyncIterator
+from datetime import datetime
+
+from temporalio import activity
+
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.utils.model_utils import BaseModel
+
+logger = make_logger(__name__)
+
+RUN_CLAUDE_CODE_TURN_ACTIVITY = "run_claude_code_turn"
+
+
+class RunClaudeCodeTurnParams(BaseModel):
+    """Arguments for one Claude Code turn run inside an activity."""
+
+    task_id: str
+    prompt: str
+    trace_id: str | None = None
+    parent_span_id: str | None = None
+    session_id: str | None = None
+    created_at: datetime | None = None
+
+
+class RunClaudeCodeTurnResult(BaseModel):
+    """Result returned from the activity to the workflow."""
+
+    final_text: str
+    session_id: str | None = None
+
+
+async def _spawn_claude(prompt: str, session_id: str | None = None) -> AsyncIterator[str]:
+    """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines.
+
+    Pass ``session_id`` to resume a previous Claude Code session (multi-turn
+    memory via ``-r <session_id>``).
+
+    Injectable seam: tests monkeypatch this with a fake async iterator so no
+    real CLI invocation is needed offline.
+    """
+    cmd = [
+        "claude",
+        "-p",
+        "--output-format",
+        "stream-json",
+        "--verbose",
+    ]
+    if session_id:
+        cmd.extend(["-r", session_id])
+
+    proc = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    assert proc.stdout is not None
+    assert proc.stdin is not None
+
+    proc.stdin.write(prompt.encode())
+    proc.stdin.close()
+
+    # Drain stderr concurrently. With --verbose, Claude Code can write enough to
+    # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks
+    # on its stderr write while we block reading stdout — a deadlock. A
+    # background task keeps stderr flowing so stdout never stalls.
+    async def _drain_stderr() -> None:
+        assert proc.stderr is not None
+        async for _ in proc.stderr:
+            pass
+
+    stderr_task = asyncio.create_task(_drain_stderr())
+
+    try:
+        buffer = ""
+        async for chunk in proc.stdout:
+            buffer += chunk.decode("utf-8", errors="replace")
+            while "\n" in buffer:
+                line, buffer = buffer.split("\n", 1)
+                line = line.strip()
+                if line:
+                    yield line
+
+        if buffer.strip():
+            yield buffer.strip()
+
+        await proc.wait()
+    finally:
+        # Release the subprocess and stderr drain task even if the consumer
+        # abandons the generator early (task cancellation / client disconnect):
+        # cancel the drain task and terminate+reap the process if it is still
+        # running, so neither is leaked.
+        stderr_task.cancel()
+        try:
+            await stderr_task
+        except asyncio.CancelledError:
+            pass
+        if proc.returncode is None:
+            try:
+                proc.terminate()
+            except ProcessLookupError:
+                pass
+            await proc.wait()
+
+
+@activity.defn(name=RUN_CLAUDE_CODE_TURN_ACTIVITY)
+async def run_claude_code_turn(params: RunClaudeCodeTurnParams) -> dict[str, Any]:
+    """Run one Claude Code turn end-to-end and stream events to the task.
+
+    Runs in an activity (real asyncio loop) so subprocess I/O is permitted.
+    """
+    emitter = UnifiedEmitter(
+        task_id=params.task_id,
+        trace_id=params.trace_id,
+        parent_span_id=params.parent_span_id,
+    )
+    turn = ClaudeCodeTurn(_spawn_claude(params.prompt, session_id=params.session_id))
+    result = await emitter.auto_send_turn(turn, created_at=params.created_at)
+
+    return RunClaudeCodeTurnResult(final_text=result.final_text, session_id=turn.session_id).model_dump()
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/project/run_worker.py b/examples/tutorials/10_async/10_temporal/140_claude_code/project/run_worker.py
new file mode 100644
index 000000000..58802737e
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/project/run_worker.py
@@ -0,0 +1,41 @@
+"""Temporal worker for the Claude Code tutorial.
+
+Run as a separate long-lived process alongside the ACP HTTP server. The
+worker polls Temporal for workflow + activity tasks and executes them.
+
+The Claude Code CLI subprocess runs in the ``run_claude_code_turn`` activity
+(registered below alongside the built-in Agentex activities), because
+subprocess I/O is not permitted on the Temporal workflow event loop.
+"""
+
+import asyncio
+
+from project.workflow import At140ClaudeCodeWorkflow
+from project.activities import run_claude_code_turn
+from agentex.lib.utils.debug import setup_debug_if_enabled
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.activities import get_all_activities
+from agentex.lib.core.temporal.workers.worker import AgentexWorker
+
+environment_variables = EnvironmentVariables.refresh()
+logger = make_logger(__name__)
+
+
+async def main():
+    setup_debug_if_enabled()
+
+    task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE
+    if task_queue_name is None:
+        raise ValueError("WORKFLOW_TASK_QUEUE is not set")
+
+    worker = AgentexWorker(task_queue=task_queue_name)
+
+    await worker.run(
+        activities=[run_claude_code_turn, *get_all_activities()],
+        workflow=At140ClaudeCodeWorkflow,
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/project/workflow.py b/examples/tutorials/10_async/10_temporal/140_claude_code/project/workflow.py
new file mode 100644
index 000000000..7f50ba8d5
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/project/workflow.py
@@ -0,0 +1,137 @@
+"""Temporal workflow for the Claude Code tutorial.
+
+Holds conversation state (session_id for multi-turn resume) durably across
+crashes. Each user message triggers ``on_task_event_send``, which delegates the
+turn to the ``run_claude_code_turn`` activity. The activity spawns the Claude
+Code CLI, wraps its stdout in ``ClaudeCodeTurn``, and delivers the turn via
+``UnifiedEmitter.auto_send_turn`` (the async Redis push path).
+
+Note on subprocess inside Temporal
+------------------------------------
+Subprocess (and all other) I/O must run in a Temporal *activity*, never in
+workflow code. Temporal runs workflow + signal-handler bodies on a
+deterministic sandbox event loop that does not implement ``subprocess_exec``
+(spawning the CLI there raises ``NotImplementedError``). The activity also gets
+Temporal's retry + timeout guarantees. See
+``examples/tutorials/10_async/10_temporal/030_custom_activities/`` for the
+activity pattern.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from datetime import timedelta
+
+from temporalio import workflow
+
+from agentex.lib import adk
+from agentex.lib.types.acp import SendEventParams, CreateTaskParams
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.types.text_content import TextContent
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.types.workflow import SignalName
+from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+with workflow.unsafe.imports_passed_through():
+    from project.activities import RunClaudeCodeTurnParams, run_claude_code_turn
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+environment_variables = EnvironmentVariables.refresh()
+
+if environment_variables.WORKFLOW_NAME is None:
+    raise ValueError("Environment variable WORKFLOW_NAME is not set")
+if environment_variables.AGENT_NAME is None:
+    raise ValueError("Environment variable AGENT_NAME is not set")
+
+logger = make_logger(__name__)
+
+
+@workflow.defn(name=environment_variables.WORKFLOW_NAME)
+class At140ClaudeCodeWorkflow(BaseWorkflow):
+    """Temporal workflow that runs Claude Code locally for each user message.
+
+    Persists the Claude Code session_id across turns so the CLI can resume
+    the conversation (``-r <session_id>``). Temporal's durable state ensures
+    the session_id survives worker crashes.
+    """
+
+    def __init__(self):
+        super().__init__(display_name=environment_variables.AGENT_NAME)
+        self._complete_task = False
+        self._turn_number = 0
+        # Claude Code session_id for multi-turn resume.
+        self._session_id: str | None = None
+
+    @workflow.signal(name=SignalName.RECEIVE_EVENT)
+    async def on_task_event_send(self, params: SendEventParams) -> None:
+        """Handle a user message: spawn Claude Code and push events to the task stream."""
+        self._turn_number += 1
+        task_id = params.task.id
+        prompt = params.event.content.content
+        logger.info("Turn %d for task %s", self._turn_number, task_id)
+
+        await adk.messages.create(task_id=task_id, content=params.event.content)
+
+        async with adk.tracing.span(
+            trace_id=task_id,
+            task_id=task_id,
+            name=f"Turn {self._turn_number}",
+            input={"message": prompt},
+        ) as span:
+            # Delegate the subprocess turn to an activity: subprocess I/O is not
+            # permitted on the Temporal workflow event loop. The activity streams
+            # events to the task and returns the final text + session_id.
+            # workflow.now() gives a deterministic timestamp under replay.
+            result = await workflow.execute_activity(
+                run_claude_code_turn,
+                RunClaudeCodeTurnParams(
+                    task_id=task_id,
+                    prompt=prompt,
+                    trace_id=task_id,
+                    parent_span_id=span.id if span else None,
+                    session_id=self._session_id,
+                    created_at=workflow.now(),
+                ),
+                start_to_close_timeout=timedelta(minutes=5),
+            )
+
+            # Capture session_id to enable Claude Code resume on the next turn.
+            sid = result.get("session_id")
+            if sid:
+                self._session_id = sid
+
+            if span:
+                span.output = {"final_text": result.get("final_text")}
+
+    @workflow.run
+    async def on_task_create(self, params: CreateTaskParams) -> str:
+        logger.info("Task created: %s", params.task.id)
+
+        await adk.messages.create(
+            task_id=params.task.id,
+            content=TextContent(
+                author="agent",
+                content=(
+                    f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n"
+                    "Send me a message and I'll run it through Claude Code locally."
+                ),
+            ),
+        )
+
+        await workflow.wait_condition(lambda: self._complete_task, timeout=None)
+        return "Task completed"
+
+    @workflow.signal
+    async def complete_task_signal(self) -> None:
+        logger.info("Received complete_task signal")
+        self._complete_task = True
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/pyproject.toml b/examples/tutorials/10_async/10_temporal/140_claude_code/pyproject.toml
new file mode 100644
index 000000000..b9d517267
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/pyproject.toml
@@ -0,0 +1,27 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "at140-claude-code"
+version = "0.1.0"
+description = "A Temporal-backed Claude Code agent streaming the unified harness surface via a local CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "temporalio>=1.18.2",
+    "python-dotenv>=1.0,<2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "debugpy>=1.8.15",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent.py
new file mode 100644
index 000000000..767c707b9
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent.py
@@ -0,0 +1,249 @@
+"""Tests for the Temporal Claude Code tutorial agent.
+
+LIVE tests (``TestClaudeCodeLive``):
+  - Require Temporal server, the ACP server, the Temporal worker, the ``claude``
+    CLI on PATH, and ``ANTHROPIC_API_KEY`` set.
+  - Run the full agent end-to-end against a live Agentex server.
+  - Skipped automatically when ``CLAUDE_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestClaudeCodeOffline``):
+  - Inject a fake async iterator of pre-recorded stream-json lines.
+  - Assert the ``ClaudeCodeTurn`` + ``UnifiedEmitter`` pipeline drives
+    ``auto_send_turn``, populates usage, and satisfies the ``HarnessTurn``
+    protocol.
+  - Always run -- no CLI or API key needed.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import AsyncIterator
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+
+# ---------------------------------------------------------------------------
+# Recorded stream-json fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-temporal-offline-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from Temporal Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "session_id": "sess-temporal-offline-1",
+            "usage": {"input_tokens": 15, "output_tokens": 7},
+            "cost_usd": 0.00015,
+            "duration_ms": 350,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    """Async iterator of pre-recorded stream-json lines (no subprocess)."""
+    for line in lines:
+        yield line
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(id="msg-t1", task_id="task-temporal-offline", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self):
+        self.sink: list = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):  # noqa: ARG002
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Offline tests (always run -- no CLI or API key needed)
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeOffline:
+    """Unit tests that run without a real claude CLI, Temporal, or network."""
+
+    @pytest.mark.asyncio
+    async def test_auto_send_text_only_produces_output(self):
+        """auto_send_turn result carries the agent's reply text."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="offline-temporal",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        result = await emitter.auto_send_turn(turn)
+        assert "Hello from Temporal Claude Code" in result.final_text
+
+    @pytest.mark.asyncio
+    async def test_usage_populated_after_stream_exhausted(self):
+        """Usage is populated after the events stream is exhausted."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="t",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        await emitter.auto_send_turn(turn)
+        usage = turn.usage()
+        assert usage.input_tokens == 15
+        assert usage.output_tokens == 7
+        assert usage.num_llm_calls == 1
+
+    @pytest.mark.asyncio
+    async def test_stream_task_message_done_present(self):
+        """StreamTaskMessageDone must appear via yield_turn on a ClaudeCodeTurn."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message_update import StreamTaskMessageDone
+
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert any(isinstance(e, StreamTaskMessageDone) for e in events), (
+            "Expected at least one StreamTaskMessageDone event"
+        )
+
+    @pytest.mark.asyncio
+    async def test_session_id_captured_in_result_envelope(self):
+        """The result envelope carries session_id (multi-turn resume support)."""
+        from agentex.lib.adk import ClaudeCodeTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        fake_streaming = _FakeStreaming()
+        turn = ClaudeCodeTurn(_fake_lines(_TEXT_ONLY_LINES))
+        emitter = UnifiedEmitter(
+            task_id="t",
+            trace_id=None,
+            parent_span_id=None,
+            tracer=False,
+            streaming=fake_streaming,
+        )
+        await emitter.auto_send_turn(turn)
+        assert turn._result_envelope is not None
+        assert turn._result_envelope.get("session_id") == "sess-temporal-offline-1"
+
+
+# ---------------------------------------------------------------------------
+# Live tests (skipped unless CLAUDE_LIVE_TESTS=1)
+# ---------------------------------------------------------------------------
+
+pytestmark_live = pytest.mark.skipif(
+    not os.environ.get("CLAUDE_LIVE_TESTS"),
+    reason="Set CLAUDE_LIVE_TESTS=1 and ensure the `claude` CLI + ANTHROPIC_API_KEY are available",
+)
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "at140-claude-code")
+
+
+@pytestmark_live
+class TestClaudeCodeLive:
+    """Live Temporal tests -- needs Temporal server + the claude CLI + ANTHROPIC_API_KEY."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    @pytest.fixture
+    def agent_name(self):
+        return AGENT_NAME
+
+    @pytest.fixture
+    def agent_id(self, client, agent_name):
+        agents = client.agents.list()
+        for agent in agents:
+            if agent.name == agent_name:
+                return agent.id
+        raise ValueError(f"Agent {agent_name!r} not found.")
+
+    def test_send_simple_message(self, client, agent_id: str):
+        """Create a task, send a message, and poll until a response appears."""
+        import time
+        import uuid
+
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendEventRequest, ParamsCreateTaskRequest
+
+        task = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)).result
+        assert task is not None
+        task_id = task.id
+
+        client.agents.send_event(
+            agent_id=agent_id,
+            params=ParamsSendEventRequest(
+                task_id=task_id,
+                content=TextContentParam(
+                    author="user",
+                    content="Reply with exactly three words: hello from claude",
+                    type="text",
+                ),
+            ),
+        )
+
+        deadline = time.monotonic() + 90
+        while time.monotonic() < deadline:
+            msgs = client.messages.list(task_id=task_id)
+            agent_msgs = [m for m in msgs if getattr(m.content, "author", None) == "agent"]
+            response_msgs = [m for m in agent_msgs if "Task initialized" not in str(getattr(m.content, "content", ""))]
+            if response_msgs:
+                assert len(response_msgs) >= 1
+                return
+            time.sleep(3)
+
+        raise AssertionError("No agent response received within 90 s")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent_offline.py b/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent_offline.py
new file mode 100644
index 000000000..1adc553f1
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/140_claude_code/tests/test_agent_offline.py
@@ -0,0 +1,230 @@
+"""Offline unit tests for the Temporal Claude Code tutorial agent.
+
+These tests do NOT require the ``claude`` CLI, Temporal, or ANTHROPIC_API_KEY.
+They inject a fake async iterator of pre-recorded stream-json lines in place of
+the real subprocess spawn and a fake streaming backend, then assert that the
+workflow's turn logic correctly drives ``UnifiedEmitter.auto_send_turn``.
+
+The injection seam is the ``_spawn_claude`` function in ``project/workflow.py``.
+Tests monkeypatch it with a coroutine returning a pre-recorded async iterator.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import AsyncIterator
+
+import pytest
+
+from agentex.lib.adk import ClaudeCodeTurn
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.types.task_message import TaskMessage
+
+# ---------------------------------------------------------------------------
+# Recorded fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-temporal-1"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Hello from Temporal Claude Code!"}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "session_id": "sess-temporal-1",
+            "usage": {"input_tokens": 15, "output_tokens": 7},
+            "cost_usd": 0.00015,
+            "duration_ms": 350,
+            "num_turns": 1,
+        }
+    ),
+]
+
+_TOOL_CALL_LINES: list[str] = [
+    json.dumps({"type": "system", "subtype": "init", "session_id": "sess-temporal-2"}),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_use",
+                        "id": "tool_temporal",
+                        "name": "Bash",
+                        "input": {"command": "ls /tmp"},
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "user",
+            "message": {
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "tool_temporal",
+                        "content": "file1\nfile2\n",
+                        "is_error": False,
+                    }
+                ]
+            },
+        }
+    ),
+    json.dumps(
+        {
+            "type": "assistant",
+            "message": {"content": [{"type": "text", "text": "Listed files."}]},
+        }
+    ),
+    json.dumps(
+        {
+            "type": "result",
+            "session_id": "sess-temporal-2",
+            "usage": {"input_tokens": 30, "output_tokens": 12},
+            "cost_usd": 0.0004,
+            "duration_ms": 600,
+            "num_turns": 1,
+        }
+    ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeCtx:
+    def __init__(self, sink, content_type, initial_content):
+        self.sink = sink
+        self.content_type = content_type
+        self.task_message = TaskMessage(id="msg-t1", task_id="task-temporal-offline", content=initial_content)
+
+    async def __aenter__(self):
+        self.sink.append(("open", self.content_type))
+        return self
+
+    async def __aexit__(self, *a):
+        await self.close()
+        return False
+
+    async def close(self):
+        self.sink.append(("close", self.content_type))
+
+    async def stream_update(self, update):
+        self.sink.append(("update", update))
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self):
+        self.sink: list = []
+
+    def streaming_task_message_context(self, task_id, initial_content, streaming_mode="coalesced", created_at=None):  # noqa: ARG002
+        ctype = getattr(initial_content, "type", None)
+        self.sink.append(("ctx", ctype))
+        return _FakeCtx(self.sink, ctype, initial_content)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _fake_lines(lines: list[str]) -> AsyncIterator[str]:
+    for line in lines:
+        yield line
+
+
+async def _run_turn(lines: list[str]):
+    fake_streaming = _FakeStreaming()
+    turn = ClaudeCodeTurn(_fake_lines(lines))
+    emitter = UnifiedEmitter(
+        task_id="offline-temporal",
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+    return result, fake_streaming.sink, turn
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_text_only_produces_agent_output():
+    result, sink, _ = await _run_turn(_TEXT_ONLY_LINES)
+    assert "Hello from Temporal Claude Code" in result.final_text
+
+
+@pytest.mark.asyncio
+async def test_usage_from_result_envelope():
+    """Usage is available from turn.usage() after the events are exhausted.
+
+    UnifiedEmitter.auto_send_turn evaluates turn.usage() eagerly before the
+    async generator is consumed, so result.usage is a pre-exhaust snapshot.
+    Read usage directly from the turn after _run_turn completes instead.
+    """
+    result, _, turn = await _run_turn(_TEXT_ONLY_LINES)
+    usage = turn.usage()
+    assert usage.input_tokens == 15
+    assert usage.output_tokens == 7
+    assert usage.num_llm_calls == 1
+
+
+@pytest.mark.asyncio
+async def test_session_id_captured_in_result_envelope():
+    """Verify the result envelope carries session_id (multi-turn resume support)."""
+    _, _, turn = await _run_turn(_TEXT_ONLY_LINES)
+    assert turn._result_envelope is not None
+    assert turn._result_envelope.get("session_id") == "sess-temporal-1"
+
+
+@pytest.mark.asyncio
+async def test_tool_call_context_types():
+    result, sink, _ = await _run_turn(_TOOL_CALL_LINES)
+    opened = [s for s in sink if s[0] == "open"]
+    content_types = [s[1] for s in opened]
+    assert "tool_request" in content_types
+    assert "text" in content_types
+
+
+@pytest.mark.asyncio
+async def test_spawn_seam_concept():
+    """Demonstrate the injectable spawn seam pattern used in project/workflow.py.
+
+    ``_spawn_claude(prompt, session_id=None)`` is a top-level async generator.
+    A drop-in replacement (e.g. via monkeypatch) supplies pre-recorded lines
+    and captures call arguments. The session_id parameter enables multi-turn
+    resume (``claude -r <session_id>``).
+    """
+    called: list[tuple] = []
+
+    async def _fake_spawn(prompt: str, session_id=None) -> AsyncIterator[str]:
+        called.append((prompt, session_id))
+        for line in _TEXT_ONLY_LINES:
+            yield line
+
+    fake_streaming = _FakeStreaming()
+    turn = ClaudeCodeTurn(_fake_spawn("temporal prompt", session_id="old-sid"))
+    emitter = UnifiedEmitter(
+        task_id="t",
+        trace_id=None,
+        parent_span_id=None,
+        tracer=False,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+
+    assert called == [("temporal prompt", "old-sid")]
+    assert "Hello from Temporal Claude Code" in result.final_text
diff --git a/src/agentex/lib/adk/__init__.py b/src/agentex/lib/adk/__init__.py
index a08131260..c2b343b72 100644
--- a/src/agentex/lib/adk/__init__.py
+++ b/src/agentex/lib/adk/__init__.py
@@ -13,6 +13,11 @@
 from agentex.lib.adk._modules._pydantic_ai_async import stream_pydantic_ai_events
 from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events
 from agentex.lib.adk._modules._pydantic_ai_tracing import create_pydantic_ai_tracing_handler
+from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events
+from agentex.lib.adk._modules._claude_code_turn import (
+    ClaudeCodeTurn,
+    claude_code_usage_to_turn_usage,
+)
 from agentex.lib.adk._modules.events import EventsModule
 from agentex.lib.adk._modules.messages import MessagesModule
 from agentex.lib.adk._modules.state import StateModule
@@ -54,6 +59,10 @@
     "stream_pydantic_ai_events",
     "convert_pydantic_ai_to_agentex_events",
     "create_pydantic_ai_tracing_handler",
+    # Claude Code
+    "convert_claude_code_to_agentex_events",
+    "ClaudeCodeTurn",
+    "claude_code_usage_to_turn_usage",
     # Providers
     "providers",
     # Utils
diff --git a/src/agentex/lib/adk/_modules/_claude_code_sync.py b/src/agentex/lib/adk/_modules/_claude_code_sync.py
new file mode 100644
index 000000000..4e25503cf
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_claude_code_sync.py
@@ -0,0 +1,378 @@
+"""Claude Code stream-json parser tap for the unified harness surface.
+
+Converts the newline-delimited JSON envelopes emitted by
+``claude -p --output-format stream-json`` into the canonical
+``StreamTaskMessage*`` stream consumed by the Agentex harness.
+
+Envelope → canonical mapping
+-----------------------------
+system/init
+    Ignored at this layer (session_id tracking is a provider concern).
+
+assistant / user  (content blocks)
+    text block           → Start(TextContent) + Delta(TextDelta)* + Done
+    thinking block       → Start(ReasoningContent) + Delta(ReasoningContentDelta)* + Done
+    tool_use block       → Start(ToolRequestContent) + Done   (Full args in Start content)
+    tool_result block    → Full(ToolResponseContent)
+
+stream_event / content_block_start
+    type=text            → Start(TextContent, empty)
+    type=thinking        → Start(ReasoningContent, empty)
+
+stream_event / content_block_delta
+    type=text_delta      → Delta(TextDelta)
+    type=thinking_delta  → Delta(ReasoningContentDelta)
+
+stream_event / content_block_stop
+    (text open)          → Done
+    (thinking open)      → Done  (full text known here; update Full via Full event first)
+
+result
+    Fires ``on_result`` with the raw envelope so the caller can capture
+    usage and cost. No StreamTaskMessage is emitted for the result itself.
+
+Out of scope
+------------
+No deployable test agent is provided. claude-code requires the golden
+agent's sandbox/subprocess/secret/MCP orchestration to produce the stream.
+Live coverage is the golden agent, which will adopt this tap. Do NOT add an
+examples/ agent or CI live-matrix row for claude-code.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, Callable, Awaitable, AsyncIterator
+
+from agentex.lib.utils.logging import make_logger
+from agentex.types.text_content import TextContent
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+logger = make_logger(__name__)
+
+_MAX_RESULT_LENGTH = 4000
+
+
+def _truncate(text: str) -> str:
+    return str(text)[:_MAX_RESULT_LENGTH]
+
+
+def _extract_summary(text: str, max_len: int = 300) -> str:
+    return text.strip().split("\n", 1)[0][:max_len]
+
+
+async def convert_claude_code_to_agentex_events(
+    lines: AsyncIterator[str | dict[str, Any]],
+    on_result: Callable[[dict[str, Any]], Awaitable[None]] | None = None,
+) -> AsyncIterator[StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone]:
+    """Convert a claude-code ``stream-json`` line stream into Agentex ``StreamTaskMessage*`` events.
+
+    Each item in ``lines`` is either a raw JSON string (as read from the CLI's
+    stdout) or an already-parsed dict. Empty strings are skipped; unparseable
+    JSON is logged and skipped.
+
+    ``on_result`` is called with the ``result`` envelope when it arrives so the
+    caller can capture usage and cost. It is awaited before the generator
+    continues. When ``None``, the result envelope is silently dropped.
+
+    Envelope → canonical mapping is documented in this module's docstring.
+    """
+    next_index = 0
+    tool_call_count = 0
+
+    # Streaming state for content_block_start / content_block_delta /
+    # content_block_stop triples.
+    _thinking_open = False
+    _thinking_buf = ""
+    _thinking_index: int | None = None
+    _text_open = False
+    _text_buf = ""
+    _text_index: int | None = None
+    # Track which assistant-message block indices were already streamed via
+    # stream_event triples. Those blocks must not be re-emitted when the full
+    # assistant message arrives. Reset at each message boundary (see below) so a
+    # later turn's block indices don't collide with an earlier turn's.
+    _streamed_block_indexes: set[int] = set()
+    # Once-guard so a thinking block's pending index is claimed on its first
+    # thinking_delta only. Reset per turn alongside _streamed_block_indexes.
+    _saw_thinking_stream = False
+    # For deferred ReasoningStarted: if a content_block_start(thinking) arrives
+    # but no thinking_delta ever follows, the final assistant block's thinking
+    # field fills the reasoning content instead.
+    _pending_thinking_block_index: int | None = None
+
+    async for raw in lines:
+        if not raw:
+            continue
+
+        if isinstance(raw, dict):
+            evt = raw
+        else:
+            line = raw.strip()
+            if not line:
+                continue
+            try:
+                evt = json.loads(line)
+            except json.JSONDecodeError:
+                logger.debug("claude-code: skipping non-JSON line: %r", line[:120])
+                continue
+
+        evt_type = evt.get("type", "")
+
+        # -----------------------------------------------------------------------
+        # assistant / user — materialised content blocks
+        # -----------------------------------------------------------------------
+        if evt_type in ("assistant", "user"):
+            msg = evt.get("message", {})
+            blocks = msg.get("content", [])
+            if not isinstance(blocks, list):
+                blocks = [blocks]
+
+            for idx, block in enumerate(blocks):
+                if not isinstance(block, dict):
+                    continue
+                block_type = block.get("type", "")
+
+                if block_type == "text":
+                    # Skip only the specific blocks already delivered via
+                    # stream_event deltas (per-block, not a turn-wide latch).
+                    if idx in _streamed_block_indexes:
+                        continue
+                    text = block.get("text", "")
+                    if text:
+                        msg_index = next_index
+                        next_index += 1
+                        yield StreamTaskMessageStart(
+                            type="start",
+                            index=msg_index,
+                            content=TextContent(
+                                type="text",
+                                author="agent",
+                                content="",
+                            ),
+                        )
+                        yield StreamTaskMessageDelta(
+                            type="delta",
+                            index=msg_index,
+                            delta=TextDelta(type="text", text_delta=text),
+                        )
+                        yield StreamTaskMessageDone(type="done", index=msg_index)
+
+                elif block_type == "thinking":
+                    # Skip only the specific blocks already delivered via
+                    # stream_event deltas (per-block, not a turn-wide latch).
+                    if idx in _streamed_block_indexes:
+                        continue
+                    thinking_text = block.get("thinking", "")
+                    if thinking_text:
+                        summary = _extract_summary(thinking_text)
+                        msg_index = next_index
+                        next_index += 1
+                        yield StreamTaskMessageStart(
+                            type="start",
+                            index=msg_index,
+                            content=ReasoningContent(
+                                type="reasoning",
+                                author="agent",
+                                summary=[summary],
+                                content=[],
+                                style="active",
+                            ),
+                        )
+                        yield StreamTaskMessageDelta(
+                            type="delta",
+                            index=msg_index,
+                            delta=ReasoningContentDelta(
+                                type="reasoning_content",
+                                content_index=0,
+                                content_delta=thinking_text,
+                            ),
+                        )
+                        yield StreamTaskMessageDone(type="done", index=msg_index)
+
+                elif block_type == "tool_use":
+                    tool_call_count += 1
+                    tool_id = block.get("id", f"tool_{tool_call_count}")
+                    name = block.get("name", "unknown")
+                    arguments = block.get("input", {})
+                    if not isinstance(arguments, dict):
+                        arguments = {}
+                    msg_index = next_index
+                    next_index += 1
+                    yield StreamTaskMessageStart(
+                        type="start",
+                        index=msg_index,
+                        content=ToolRequestContent(
+                            type="tool_request",
+                            author="agent",
+                            tool_call_id=tool_id,
+                            name=name,
+                            arguments=arguments,
+                        ),
+                    )
+                    yield StreamTaskMessageDone(type="done", index=msg_index)
+
+                elif block_type == "tool_result":
+                    tool_id = block.get("tool_use_id", "")
+                    content = block.get("content", "")
+                    is_error = block.get("is_error", False)
+                    if isinstance(content, list):
+                        content = "\n".join(b.get("text", str(b)) if isinstance(b, dict) else str(b) for b in content)
+                    result_str = _truncate(str(content))
+                    msg_index = next_index
+                    next_index += 1
+                    yield StreamTaskMessageFull(
+                        type="full",
+                        index=msg_index,
+                        content=ToolResponseContent(
+                            type="tool_response",
+                            author="agent",
+                            tool_call_id=tool_id,
+                            name="",
+                            content={"result": result_str, **({"is_error": True} if is_error else {})},
+                        ),
+                    )
+
+            # End of a materialised message: reset per-turn streaming dedup state
+            # so the next turn's stream_event indices start clean. Without this,
+            # a block index streamed in an earlier turn would linger in the set
+            # and silently drop a later turn's non-streamed block at that index.
+            _streamed_block_indexes = set()
+            _saw_thinking_stream = False
+
+        # -----------------------------------------------------------------------
+        # stream_event — incremental streaming deltas
+        # -----------------------------------------------------------------------
+        elif evt_type == "stream_event":
+            se = evt.get("event") or {}
+            se_type = se.get("type", "")
+            block_index = se.get("index")
+
+            if se_type == "content_block_start":
+                block = se.get("content_block") or {}
+                btype = block.get("type")
+
+                if btype == "thinking":
+                    _thinking_open = True
+                    _thinking_buf = ""
+                    # Defer marking the block as streamed until we actually
+                    # receive a thinking_delta. Some configurations emit a
+                    # thinking block_start but no deltas — in that case we want
+                    # the final assistant-message handler to fill the text.
+                    _pending_thinking_block_index = block_index if isinstance(block_index, int) else None
+                    msg_index = next_index
+                    next_index += 1
+                    _thinking_index = msg_index
+                    yield StreamTaskMessageStart(
+                        type="start",
+                        index=msg_index,
+                        content=ReasoningContent(
+                            type="reasoning",
+                            author="agent",
+                            summary=[],
+                            content=[],
+                            style="active",
+                        ),
+                    )
+
+                elif btype == "text":
+                    _text_open = True
+                    _text_buf = ""
+                    if isinstance(block_index, int):
+                        _streamed_block_indexes.add(block_index)
+                    msg_index = next_index
+                    next_index += 1
+                    _text_index = msg_index
+                    yield StreamTaskMessageStart(
+                        type="start",
+                        index=msg_index,
+                        content=TextContent(
+                            type="text",
+                            author="agent",
+                            content="",
+                        ),
+                    )
+
+            elif se_type == "content_block_delta":
+                delta = se.get("delta") or {}
+                dtype = delta.get("type")
+
+                if dtype == "thinking_delta":
+                    chunk = delta.get("thinking", "")
+                    if chunk and _thinking_open:
+                        if not _saw_thinking_stream:
+                            _saw_thinking_stream = True
+                            # Now mark the block as claimed so the assistant
+                            # message handler won't re-emit it.
+                            if _pending_thinking_block_index is not None:
+                                _streamed_block_indexes.add(_pending_thinking_block_index)
+                        _thinking_buf += chunk
+                        if _thinking_index is not None:
+                            yield StreamTaskMessageDelta(
+                                type="delta",
+                                index=_thinking_index,
+                                delta=ReasoningContentDelta(
+                                    type="reasoning_content",
+                                    content_index=0,
+                                    content_delta=chunk,
+                                ),
+                            )
+
+                elif dtype == "text_delta":
+                    chunk = delta.get("text", "")
+                    if chunk and _text_open:
+                        _text_buf += chunk
+                        if _text_index is not None:
+                            yield StreamTaskMessageDelta(
+                                type="delta",
+                                index=_text_index,
+                                delta=TextDelta(type="text", text_delta=chunk),
+                            )
+
+            elif se_type == "content_block_stop":
+                if _thinking_open:
+                    _thinking_open = False
+                    _thinking_buf = ""
+                    _pending_thinking_block_index = None
+                    # Reset the once-guard per thinking block: a turn can stream a
+                    # second thinking block, and without this the guard stays True,
+                    # the second block's index is never claimed, and the final
+                    # assistant envelope re-emits it (duplicate Start/Delta/Done).
+                    _saw_thinking_stream = False
+                    if _thinking_index is not None:
+                        yield StreamTaskMessageDone(type="done", index=_thinking_index)
+                    _thinking_index = None
+                elif _text_open:
+                    _text_open = False
+                    _text_buf = ""
+                    if _text_index is not None:
+                        yield StreamTaskMessageDone(type="done", index=_text_index)
+                    _text_index = None
+
+        # -----------------------------------------------------------------------
+        # system / init — session metadata (ignored at this layer)
+        # -----------------------------------------------------------------------
+        elif evt_type == "system":
+            # Session ID tracking and MCP status logging are provider concerns.
+            # This pure parser layer intentionally emits nothing for system events.
+            pass
+
+        # -----------------------------------------------------------------------
+        # result — carries usage + cost; fired to on_result, not emitted as msgs
+        # -----------------------------------------------------------------------
+        elif evt_type == "result":
+            if on_result is not None:
+                await on_result(evt)
+
+        else:
+            logger.debug("claude-code: unhandled envelope type %r", evt_type)
diff --git a/src/agentex/lib/adk/_modules/_claude_code_turn.py b/src/agentex/lib/adk/_modules/_claude_code_turn.py
new file mode 100644
index 000000000..6c052976a
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_claude_code_turn.py
@@ -0,0 +1,161 @@
+"""ClaudeCodeTurn — HarnessTurn implementation for the claude-code tap.
+
+Wraps ``convert_claude_code_to_agentex_events`` to implement the
+``HarnessTurn`` protocol: exposes ``events`` (the canonical
+``StreamTaskMessage*`` stream) and ``usage()`` (the normalised
+``TurnUsage``, populated after the stream is exhausted).
+
+Usage normalization
+-------------------
+Claude Code's ``result`` envelope carries usage under several key shapes
+depending on the CLI version. We defensive-map all known shapes:
+
+    result.usage.input_tokens        -> input_tokens
+    result.usage.output_tokens       -> output_tokens
+    result.usage.cache_read_input_tokens
+    result.usage.cache_creation_input_tokens  -> cached_input_tokens (sum)
+    result.cost_usd / result.total_cost_usd   -> cost_usd
+    result.duration_ms               -> duration_ms
+    result.num_turns                 -> num_llm_calls
+
+Real zeros are preserved; missing keys default to ``None`` (not zero) so
+downstream consumers can distinguish "not reported" from "zero".
+
+Out of scope: no deployable test agent is provided — see module docstring
+in ``_claude_code_sync.py``.
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+from agentex.lib.core.harness.types import TurnUsage, HarnessTurn, StreamTaskMessage
+from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events
+
+
+def claude_code_usage_to_turn_usage(result_envelope: dict[str, Any]) -> TurnUsage:
+    """Map a claude-code ``result`` envelope to a canonical ``TurnUsage``.
+
+    Defensively handles missing / None values. Real zeros are preserved.
+    ``cost_usd`` checks both ``cost_usd`` and ``total_cost_usd`` keys (the
+    CLI has used both across versions).
+    ``cached_input_tokens`` accumulates cache_read and cache_creation counts
+    since both represent tokens served from the prompt cache.
+    """
+    usage_raw: dict[str, Any] = result_envelope.get("usage") or {}
+
+    def _int(d: dict[str, Any], key: str) -> int | None:
+        v = d.get(key)
+        if v is None:
+            return None
+        try:
+            return int(v)
+        except (TypeError, ValueError):
+            return None
+
+    def _float(d: dict[str, Any], *keys: str) -> float | None:
+        for key in keys:
+            v = d.get(key)
+            if v is not None:
+                try:
+                    return float(v)
+                except (TypeError, ValueError):
+                    continue
+        return None
+
+    input_tokens = _int(usage_raw, "input_tokens")
+    output_tokens = _int(usage_raw, "output_tokens")
+
+    # Aggregate both cache_read and cache_creation into cached_input_tokens
+    cache_read = _int(usage_raw, "cache_read_input_tokens")
+    cache_creation = _int(usage_raw, "cache_creation_input_tokens")
+    if cache_read is not None or cache_creation is not None:
+        cached_input_tokens = (cache_read or 0) + (cache_creation or 0)
+    else:
+        cached_input_tokens = None
+
+    total_tokens: int | None = None
+    if input_tokens is not None and output_tokens is not None:
+        total_tokens = input_tokens + output_tokens
+
+    cost_usd = _float(result_envelope, "cost_usd", "total_cost_usd")
+    duration_ms = _int(result_envelope, "duration_ms")
+
+    # num_llm_calls is provider-reported (from num_turns): default None ("not
+    # reported") rather than 0 so callers can distinguish it from a real zero,
+    # matching the None convention used for the token fields above.
+    num_turns = result_envelope.get("num_turns")
+    num_llm_calls: int | None = None
+    if num_turns is not None:
+        try:
+            num_llm_calls = int(num_turns)
+        except (TypeError, ValueError):
+            pass
+
+    return TurnUsage(
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        cached_input_tokens=cached_input_tokens,
+        total_tokens=total_tokens,
+        cost_usd=cost_usd,
+        duration_ms=duration_ms,
+        num_llm_calls=num_llm_calls,
+    )
+
+
+class ClaudeCodeTurn:
+    """HarnessTurn for a claude-code ``stream-json`` line stream.
+
+    Satisfies the ``HarnessTurn`` protocol:
+    - ``events`` yields the canonical ``StreamTaskMessage*`` stream.
+    - ``usage()`` returns the normalised ``TurnUsage`` (only valid after
+      ``events`` is fully consumed).
+
+    ``lines`` is an async iterator of raw JSON strings or pre-parsed dicts, as
+    produced by reading the claude-code CLI's stdout line by line.
+    """
+
+    def __init__(self, lines: AsyncIterator[str | dict[str, Any]]) -> None:
+        self._lines = lines
+        self._result_envelope: dict[str, Any] | None = None
+        self._events_stream: AsyncIterator[StreamTaskMessage] | None = None
+
+    async def _on_result(self, envelope: dict[str, Any]) -> None:
+        self._result_envelope = envelope
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]:
+        if self._events_stream is None:
+            self._events_stream = convert_claude_code_to_agentex_events(
+                self._lines,
+                on_result=self._on_result,
+            )
+        return self._events_stream
+
+    @property
+    def session_id(self) -> str | None:
+        """The Claude Code session id, for resuming a multi-turn session.
+
+        Valid only after ``events`` has been fully consumed (populated by the
+        ``result`` envelope). Returns ``None`` if the stream was truncated or
+        Claude Code reported no session id.
+        """
+        if not self._result_envelope:
+            return None
+        return self._result_envelope.get("session_id")
+
+    def usage(self) -> TurnUsage:
+        """Return normalised usage for this turn.
+
+        Call only after ``events`` is exhausted. Returns an empty ``TurnUsage``
+        if the ``result`` envelope was not received (e.g. stream was truncated).
+        """
+        if self._result_envelope is None:
+            return TurnUsage()
+        return claude_code_usage_to_turn_usage(self._result_envelope)
+
+
+# Runtime assert that ClaudeCodeTurn satisfies HarnessTurn protocol
+assert isinstance(ClaudeCodeTurn.__new__(ClaudeCodeTurn), HarnessTurn), (
+    "ClaudeCodeTurn must satisfy the HarnessTurn protocol"
+)
diff --git a/src/agentex/lib/core/harness/types.py b/src/agentex/lib/core/harness/types.py
index b37dc1e51..74e0dc314 100644
--- a/src/agentex/lib/core/harness/types.py
+++ b/src/agentex/lib/core/harness/types.py
@@ -64,7 +64,10 @@ class TurnUsage(BaseModel):
     total_tokens: int | None = None
     cost_usd: float | None = None
     duration_ms: int | None = None
-    num_llm_calls: int = 0
+    # num_llm_calls is provider-reported and may be absent (None = "not
+    # reported"). num_tool_calls / num_reasoning_blocks are counted locally from
+    # the observed stream, so 0 is always a real count.
+    num_llm_calls: int | None = None
     num_tool_calls: int = 0
     num_reasoning_blocks: int = 0
 
diff --git a/tests/lib/adk/test_claude_code_sync.py b/tests/lib/adk/test_claude_code_sync.py
new file mode 100644
index 000000000..6dd36d973
--- /dev/null
+++ b/tests/lib/adk/test_claude_code_sync.py
@@ -0,0 +1,637 @@
+"""Tests for the claude-code stream-json -> Agentex StreamTaskMessage* converter."""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+import pytest
+
+from agentex.types.text_content import TextContent
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events
+
+
+async def _aiter(events: list[Any]) -> AsyncIterator[Any]:
+    for e in events:
+        yield e
+
+
+async def _collect(stream: AsyncIterator[Any]) -> list[Any]:
+    return [e async for e in stream]
+
+
+# ---------------------------------------------------------------------------
+# Text content
+# ---------------------------------------------------------------------------
+
+
+class TestTextContent:
+    async def test_text_block_in_assistant_message_emits_start_delta_done(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "Hello world"}]},
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        assert len(out) == 3
+        assert isinstance(out[0], StreamTaskMessageStart)
+        assert isinstance(out[0].content, TextContent)
+        assert out[0].content.content == ""
+        assert isinstance(out[1], StreamTaskMessageDelta)
+        assert isinstance(out[1].delta, TextDelta)
+        assert out[1].delta.text_delta == "Hello world"
+        assert isinstance(out[2], StreamTaskMessageDone)
+        assert out[0].index == out[1].index == out[2].index
+
+    async def test_empty_text_block_is_skipped(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": ""}]},
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert out == []
+
+    async def test_streamed_text_via_stream_event_emits_start_deltas_done(self):
+        envelopes = [
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_start", "index": 0, "content_block": {"type": "text"}},
+            },
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_delta",
+                    "index": 0,
+                    "delta": {"type": "text_delta", "text": "Hello"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_delta",
+                    "index": 0,
+                    "delta": {"type": "text_delta", "text": " world"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_stop", "index": 0},
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, TextContent)
+        assert len(deltas) == 2
+        assert isinstance(deltas[0].delta, TextDelta)
+        assert deltas[0].delta.text_delta == "Hello"
+        assert isinstance(deltas[1].delta, TextDelta)
+        assert deltas[1].delta.text_delta == " world"
+        assert len(dones) == 1
+
+    async def test_streamed_text_not_re_emitted_by_assistant_block(self):
+        """After stream_event triple, the final assistant block must not re-emit the text."""
+        envelopes = [
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_start",
+                    "index": 0,
+                    "content_block": {"type": "text"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_delta",
+                    "index": 0,
+                    "delta": {"type": "text_delta", "text": "streamed"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_stop", "index": 0},
+            },
+            # Final assistant message with same text — must NOT be re-emitted
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "streamed"}]},
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        text_starts = [e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, TextContent)]
+        assert len(text_starts) == 1, "Text block must not be emitted twice"
+
+    async def test_later_turn_non_streamed_text_not_dropped(self):
+        """A non-streamed text block in a later turn must not be dropped because an
+        earlier turn streamed a block at the same index."""
+        envelopes = [
+            # Turn 1: streamed text at index 0 (dedup'd against the materialised msg).
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_start", "index": 0, "content_block": {"type": "text"}},
+            },
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_delta", "index": 0, "delta": {"type": "text_delta", "text": "first"}},
+            },
+            {"type": "stream_event", "event": {"type": "content_block_stop", "index": 0}},
+            {"type": "assistant", "message": {"content": [{"type": "text", "text": "first"}]}},
+            # Turn 2: a NON-streamed text block, also at index 0.
+            {"type": "assistant", "message": {"content": [{"type": "text", "text": "second"}]}},
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        deltas = [
+            e.delta.text_delta for e in out if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, TextDelta)
+        ]
+        assert deltas == ["first", "second"], "Later turn's non-streamed text must still be delivered"
+
+
+# ---------------------------------------------------------------------------
+# Thinking / reasoning content
+# ---------------------------------------------------------------------------
+
+
+class TestThinkingContent:
+    async def test_thinking_block_emits_reasoning_start_delta_done(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "thinking", "thinking": "Let me reason..."}]},
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        assert len(out) == 3
+        assert isinstance(out[0], StreamTaskMessageStart)
+        assert isinstance(out[0].content, ReasoningContent)
+        # Summary must be populated from the thinking text
+        assert out[0].content.summary == ["Let me reason..."]
+        assert isinstance(out[1], StreamTaskMessageDelta)
+        assert isinstance(out[1].delta, ReasoningContentDelta)
+        assert out[1].delta.content_delta == "Let me reason..."
+        assert out[1].delta.content_index == 0
+        assert isinstance(out[2], StreamTaskMessageDone)
+
+    async def test_empty_thinking_block_is_skipped(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "thinking", "thinking": ""}]},
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert out == []
+
+    async def test_streamed_thinking_emits_reasoning_start_deltas_done(self):
+        envelopes = [
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_start",
+                    "index": 0,
+                    "content_block": {"type": "thinking"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_delta",
+                    "index": 0,
+                    "delta": {"type": "thinking_delta", "thinking": "step one"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_delta",
+                    "index": 0,
+                    "delta": {"type": "thinking_delta", "thinking": " step two"},
+                },
+            },
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_stop", "index": 0},
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, ReasoningContent)
+        assert len(deltas) == 2
+        assert isinstance(deltas[0].delta, ReasoningContentDelta)
+        assert deltas[0].delta.content_delta == "step one"
+        assert isinstance(deltas[1].delta, ReasoningContentDelta)
+        assert deltas[1].delta.content_delta == " step two"
+        assert len(dones) == 1
+
+    async def test_two_streamed_thinking_blocks_not_re_emitted(self):
+        """A turn that streams two thinking blocks must claim both indices, so the
+        final assistant envelope does not re-emit the second one."""
+
+        def _thinking_block(idx: int, text: str) -> list:
+            return [
+                {
+                    "type": "stream_event",
+                    "event": {"type": "content_block_start", "index": idx, "content_block": {"type": "thinking"}},
+                },
+                {
+                    "type": "stream_event",
+                    "event": {
+                        "type": "content_block_delta",
+                        "index": idx,
+                        "delta": {"type": "thinking_delta", "thinking": text},
+                    },
+                },
+                {"type": "stream_event", "event": {"type": "content_block_stop", "index": idx}},
+            ]
+
+        envelopes = [
+            *_thinking_block(0, "first thought"),
+            *_thinking_block(1, "second thought"),
+            # Final assistant envelope repeats both thinking blocks — neither should re-emit.
+            {
+                "type": "assistant",
+                "message": {
+                    "content": [
+                        {"type": "thinking", "thinking": "first thought"},
+                        {"type": "thinking", "thinking": "second thought"},
+                    ]
+                },
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        reasoning_starts = [
+            e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ReasoningContent)
+        ]
+        assert len(reasoning_starts) == 2, "each streamed thinking block emitted exactly once (no duplicate)"
+
+    async def test_thinking_block_start_with_no_deltas_allows_assistant_to_fill(self):
+        """A thinking block_start without any deltas leaves the final assistant block
+        free to emit the thinking text (the block index is not claimed as streamed)."""
+        envelopes = [
+            {
+                "type": "stream_event",
+                "event": {
+                    "type": "content_block_start",
+                    "index": 0,
+                    "content_block": {"type": "thinking"},
+                },
+            },
+            # No thinking_delta — close block immediately
+            {
+                "type": "stream_event",
+                "event": {"type": "content_block_stop", "index": 0},
+            },
+            # Final assistant message has the thinking text
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "thinking", "thinking": "delayed thinking"}]},
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        # The assistant block should produce a full thinking message (Start+Delta+Done)
+        reasoning_starts = [
+            e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ReasoningContent)
+        ]
+        # There will be the empty start from stream_event, plus the one from assistant block
+        reasoning_deltas = [
+            e for e in out if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, ReasoningContentDelta)
+        ]
+        assert len(reasoning_deltas) >= 1
+        assert any(
+            isinstance(d.delta, ReasoningContentDelta) and d.delta.content_delta == "delayed thinking"
+            for d in reasoning_deltas
+        )
+
+
+# ---------------------------------------------------------------------------
+# Tool calls and results
+# ---------------------------------------------------------------------------
+
+
+class TestToolCallsAndResults:
+    async def test_tool_use_block_emits_start_done(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_use",
+                            "id": "call_abc",
+                            "name": "Bash",
+                            "input": {"command": "ls /"},
+                        }
+                    ]
+                },
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        assert len(out) == 2
+        assert isinstance(out[0], StreamTaskMessageStart)
+        assert isinstance(out[0].content, ToolRequestContent)
+        assert out[0].content.tool_call_id == "call_abc"
+        assert out[0].content.name == "Bash"
+        assert out[0].content.arguments == {"command": "ls /"}
+        assert isinstance(out[1], StreamTaskMessageDone)
+
+    async def test_tool_result_block_emits_full(self):
+        envelopes = [
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "call_abc",
+                            "content": "file1.txt\nfile2.txt",
+                        }
+                    ]
+                },
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        assert len(out) == 1
+        assert isinstance(out[0], StreamTaskMessageFull)
+        assert isinstance(out[0].content, ToolResponseContent)
+        assert out[0].content.tool_call_id == "call_abc"
+        assert "file1.txt" in str(out[0].content.content)
+
+    async def test_tool_result_list_content_joined(self):
+        envelopes = [
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "tid",
+                            "content": [
+                                {"type": "text", "text": "line1"},
+                                {"type": "text", "text": "line2"},
+                            ],
+                        }
+                    ]
+                },
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert isinstance(out[0], StreamTaskMessageFull)
+        assert isinstance(out[0].content, ToolResponseContent)
+        payload = str(out[0].content.content)
+        assert "line1" in payload
+        assert "line2" in payload
+
+    async def test_tool_result_error_flag_passed_through(self):
+        envelopes = [
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "err_call",
+                            "content": "Permission denied",
+                            "is_error": True,
+                        }
+                    ]
+                },
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert isinstance(out[0], StreamTaskMessageFull)
+        assert isinstance(out[0].content, ToolResponseContent)
+        assert isinstance(out[0].content.content, dict)
+        assert out[0].content.content.get("is_error") is True
+
+    async def test_tool_result_truncation(self):
+        long_result = "x" * 5000
+        envelopes = [
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "t",
+                            "content": long_result,
+                        }
+                    ]
+                },
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        result_str = out[0].content.content.get("result", "")
+        assert len(result_str) <= 4000
+
+
+# ---------------------------------------------------------------------------
+# on_result callback
+# ---------------------------------------------------------------------------
+
+
+class TestOnResult:
+    async def test_on_result_called_with_result_envelope(self):
+        captured: list[dict] = []
+
+        async def capture(envelope):
+            captured.append(envelope)
+
+        envelopes = [
+            {
+                "type": "result",
+                "session_id": "sess123",
+                "cost_usd": 0.012,
+                "usage": {"input_tokens": 100, "output_tokens": 50},
+            }
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes), on_result=capture))
+
+        # result envelope does not emit any StreamTaskMessage
+        assert out == []
+        assert len(captured) == 1
+        assert captured[0]["session_id"] == "sess123"
+        assert captured[0]["cost_usd"] == pytest.approx(0.012)
+
+    async def test_on_result_not_called_when_no_result_envelope(self):
+        captured: list[dict] = []
+
+        async def capture(envelope):
+            captured.append(envelope)
+
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "Hi"}]},
+            }
+        ]
+        await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes), on_result=capture))
+        assert captured == []
+
+    async def test_no_on_result_does_not_raise(self):
+        envelopes = [
+            {
+                "type": "result",
+                "cost_usd": 0.001,
+                "usage": {"input_tokens": 10, "output_tokens": 5},
+            }
+        ]
+        # Should not raise even without a callback
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert out == []
+
+
+# ---------------------------------------------------------------------------
+# Message indexing
+# ---------------------------------------------------------------------------
+
+
+class TestMessageIndexing:
+    async def test_multiple_blocks_get_distinct_indices(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {
+                    "content": [
+                        {"type": "text", "text": "First"},
+                        {
+                            "type": "tool_use",
+                            "id": "c1",
+                            "name": "Read",
+                            "input": {"path": "/tmp"},
+                        },
+                    ]
+                },
+            },
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "c1",
+                            "content": "some content",
+                        }
+                    ]
+                },
+            },
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "Done"}]},
+            },
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+
+        # Gather all Start/Full events and check indices are monotonically increasing
+        anchors = [e for e in out if isinstance(e, (StreamTaskMessageStart, StreamTaskMessageFull))]
+        indices = [e.index for e in anchors]
+        assert indices == sorted(indices), "Indices must be monotonically increasing"
+        assert len(set(indices)) == len(indices), "All indices must be distinct"
+
+    async def test_system_init_and_unknown_envelopes_produce_no_output(self):
+        envelopes = [
+            {"type": "system", "subtype": "init", "session_id": "sess"},
+            {"type": "unknown_future_type", "data": "whatever"},
+        ]
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter(envelopes)))
+        assert out == []
+
+    async def test_non_json_string_lines_are_skipped(self):
+        lines = [
+            "not json at all",
+            '{"type": "assistant", "message": {"content": [{"type": "text", "text": "hi"}]}}',
+        ]
+
+        async def _str_iter():
+            for line in lines:
+                yield line
+
+        out = await _collect(convert_claude_code_to_agentex_events(_str_iter()))
+        assert len(out) == 3  # Start + Delta + Done for the text block
+
+    async def test_empty_lines_are_skipped(self):
+        lines = ["", "  ", '{"type": "system", "subtype": "init"}']
+
+        async def _str_iter():
+            for line in lines:
+                yield line
+
+        out = await _collect(convert_claude_code_to_agentex_events(_str_iter()))
+        assert out == []
+
+
+# ---------------------------------------------------------------------------
+# Author
+# ---------------------------------------------------------------------------
+
+
+class TestContentAuthors:
+    @pytest.mark.parametrize(
+        "envelope",
+        [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "hi"}]},
+            },
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "thinking", "thinking": "thoughts"}]},
+            },
+            {
+                "type": "assistant",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_use",
+                            "id": "c",
+                            "name": "t",
+                            "input": {},
+                        }
+                    ]
+                },
+            },
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "c",
+                            "content": "ok",
+                        }
+                    ]
+                },
+            },
+        ],
+    )
+    async def test_all_content_authored_by_agent(self, envelope: dict):
+        out = await _collect(convert_claude_code_to_agentex_events(_aiter([envelope])))
+        for e in out:
+            content = getattr(e, "content", None)
+            if content is not None and hasattr(content, "author"):
+                assert content.author == "agent"
diff --git a/tests/lib/adk/test_claude_code_turn.py b/tests/lib/adk/test_claude_code_turn.py
new file mode 100644
index 000000000..4fbb2f913
--- /dev/null
+++ b/tests/lib/adk/test_claude_code_turn.py
@@ -0,0 +1,283 @@
+"""Tests for ClaudeCodeTurn and claude_code_usage_to_turn_usage."""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+import pytest
+
+from agentex.lib.core.harness.types import TurnUsage, HarnessTurn
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._claude_code_turn import (
+    ClaudeCodeTurn,
+    claude_code_usage_to_turn_usage,
+)
+
+
+async def _aiter(events: list[Any]) -> AsyncIterator[Any]:
+    for e in events:
+        yield e
+
+
+async def _drain(turn: ClaudeCodeTurn) -> list[Any]:
+    return [e async for e in turn.events]
+
+
+# ---------------------------------------------------------------------------
+# Usage normalization
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeUsageToTurnUsage:
+    def test_full_usage_fields(self):
+        result = {
+            "usage": {
+                "input_tokens": 100,
+                "output_tokens": 50,
+                "cache_read_input_tokens": 20,
+                "cache_creation_input_tokens": 5,
+            },
+            "cost_usd": 0.025,
+            "duration_ms": 3200,
+            "num_turns": 3,
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+
+        assert usage.input_tokens == 100
+        assert usage.output_tokens == 50
+        assert usage.cached_input_tokens == 25  # 20 + 5
+        assert usage.total_tokens == 150
+        assert usage.cost_usd == pytest.approx(0.025)
+        assert usage.duration_ms == 3200
+        assert usage.num_llm_calls == 3
+
+    def test_total_cost_usd_fallback(self):
+        """total_cost_usd should be used when cost_usd is absent."""
+        result = {
+            "usage": {"input_tokens": 10, "output_tokens": 5},
+            "total_cost_usd": 0.001,
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.cost_usd == pytest.approx(0.001)
+
+    def test_cost_usd_takes_precedence_over_total_cost_usd(self):
+        result = {
+            "usage": {"input_tokens": 10, "output_tokens": 5},
+            "cost_usd": 0.002,
+            "total_cost_usd": 0.999,
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.cost_usd == pytest.approx(0.002)
+
+    def test_missing_usage_key_returns_nones(self):
+        result: dict[str, Any] = {}
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.input_tokens is None
+        assert usage.output_tokens is None
+        assert usage.cached_input_tokens is None
+        assert usage.total_tokens is None
+        assert usage.cost_usd is None
+        assert usage.duration_ms is None
+        assert usage.num_llm_calls is None
+
+    def test_real_zeros_preserved(self):
+        result = {
+            "usage": {
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "cache_read_input_tokens": 0,
+                "cache_creation_input_tokens": 0,
+            },
+            "cost_usd": 0.0,
+            "duration_ms": 0,
+            "num_turns": 0,
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.input_tokens == 0
+        assert usage.output_tokens == 0
+        assert usage.cached_input_tokens == 0
+        assert usage.total_tokens == 0
+        assert usage.cost_usd == pytest.approx(0.0)
+        assert usage.duration_ms == 0
+        assert usage.num_llm_calls == 0
+
+    def test_only_cache_read_no_creation(self):
+        result = {
+            "usage": {
+                "input_tokens": 50,
+                "output_tokens": 25,
+                "cache_read_input_tokens": 15,
+            }
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.cached_input_tokens == 15
+
+    def test_only_cache_creation_no_read(self):
+        result = {
+            "usage": {
+                "input_tokens": 50,
+                "output_tokens": 25,
+                "cache_creation_input_tokens": 10,
+            }
+        }
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.cached_input_tokens == 10
+
+    def test_no_cache_fields_gives_none(self):
+        result = {"usage": {"input_tokens": 10, "output_tokens": 5}}
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.cached_input_tokens is None
+
+    def test_total_tokens_computed_from_input_output(self):
+        result = {"usage": {"input_tokens": 70, "output_tokens": 30}}
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.total_tokens == 100
+
+    def test_missing_output_tokens_leaves_total_none(self):
+        result = {"usage": {"input_tokens": 70}}
+        usage = claude_code_usage_to_turn_usage(result)
+        assert usage.total_tokens is None
+
+    def test_returns_turn_usage_instance(self):
+        result = {"usage": {"input_tokens": 1, "output_tokens": 1}}
+        usage = claude_code_usage_to_turn_usage(result)
+        assert isinstance(usage, TurnUsage)
+
+
+# ---------------------------------------------------------------------------
+# ClaudeCodeTurn protocol
+# ---------------------------------------------------------------------------
+
+
+class TestClaudeCodeTurnProtocol:
+    def test_satisfies_harness_turn_protocol(self):
+        """ClaudeCodeTurn must satisfy the HarnessTurn structural protocol."""
+        turn = ClaudeCodeTurn(_aiter([]))
+        assert isinstance(turn, HarnessTurn)
+
+    async def test_events_yields_stream_task_messages(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "Hi there"}]},
+            }
+        ]
+        turn = ClaudeCodeTurn(_aiter(envelopes))
+        out = await _drain(turn)
+        assert len(out) == 3
+        assert isinstance(out[0], StreamTaskMessageStart)
+        assert isinstance(out[1], StreamTaskMessageDelta)
+        assert isinstance(out[2], StreamTaskMessageDone)
+
+    async def test_usage_before_drain_returns_empty(self):
+        envelopes = [
+            {
+                "type": "result",
+                "usage": {"input_tokens": 100, "output_tokens": 50},
+                "cost_usd": 0.01,
+            }
+        ]
+        turn = ClaudeCodeTurn(_aiter(envelopes))
+        # usage() called before events drained — no result envelope yet
+        usage = turn.usage()
+        assert isinstance(usage, TurnUsage)
+        assert usage.input_tokens is None
+
+    async def test_usage_after_drain_reflects_result(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "response"}]},
+            },
+            {
+                "type": "result",
+                "usage": {"input_tokens": 200, "output_tokens": 80},
+                "cost_usd": 0.015,
+                "num_turns": 2,
+            },
+        ]
+        turn = ClaudeCodeTurn(_aiter(envelopes))
+        await _drain(turn)
+        usage = turn.usage()
+
+        assert usage.input_tokens == 200
+        assert usage.output_tokens == 80
+        assert usage.cost_usd == pytest.approx(0.015)
+        assert usage.num_llm_calls == 2
+
+    async def test_usage_empty_when_no_result_envelope(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {"content": [{"type": "text", "text": "no result"}]},
+            }
+        ]
+        turn = ClaudeCodeTurn(_aiter(envelopes))
+        await _drain(turn)
+        usage = turn.usage()
+        assert usage.input_tokens is None
+        assert usage.cost_usd is None
+
+    async def test_tool_call_and_result_round_trip(self):
+        envelopes = [
+            {
+                "type": "assistant",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_use",
+                            "id": "call_1",
+                            "name": "Read",
+                            "input": {"path": "/etc/hosts"},
+                        }
+                    ]
+                },
+            },
+            {
+                "type": "user",
+                "message": {
+                    "content": [
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": "call_1",
+                            "content": "127.0.0.1 localhost",
+                        }
+                    ]
+                },
+            },
+            {
+                "type": "result",
+                "usage": {"input_tokens": 50, "output_tokens": 20},
+                "cost_usd": 0.005,
+            },
+        ]
+        turn = ClaudeCodeTurn(_aiter(envelopes))
+        out = await _drain(turn)
+        usage = turn.usage()
+
+        tool_starts = [
+            e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ToolResponseContent)
+        ]
+        tool_fulls = [
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolResponseContent)
+        ]
+        assert len(tool_fulls) == 1
+        full_content = tool_fulls[0].content
+        assert isinstance(full_content, ToolResponseContent)
+        assert full_content.tool_call_id == "call_1"
+
+        assert usage.input_tokens == 50
+        assert usage.output_tokens == 20
+
+    async def test_events_property_returns_same_iterator(self):
+        """Accessing .events multiple times returns the same iterator (not a new one each call)."""
+        turn = ClaudeCodeTurn(_aiter([]))
+        it1 = turn.events
+        it2 = turn.events
+        assert it1 is it2
diff --git a/tests/lib/core/harness/conformance/test_claude_code_conformance.py b/tests/lib/core/harness/conformance/test_claude_code_conformance.py
new file mode 100644
index 000000000..88643a4cd
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_claude_code_conformance.py
@@ -0,0 +1,202 @@
+"""Cross-channel conformance tests for the claude-code parser tap.
+
+Each fixture is a representative sequence of claude-code stream-json
+envelopes, converted into canonical ``StreamTaskMessage*`` events via
+``ClaudeCodeTurn``, then registered into the shared conformance runner.
+
+The conformance runner asserts two guarantees per fixture:
+
+1. **Logical-delivery equivalence**: ``yield_events`` and ``auto_send``
+   produce the same logically-delivered message contents.
+
+2. **Span signal equivalence**: both channels emit the same ``SpanSignal``
+   sequence to their ``SpanTracer``.
+
+Fixtures
+--------
+text-only:       single ``assistant`` text block
+tool-call-result: ``tool_use`` block followed by ``tool_result``
+thinking-block:  ``thinking`` block with full text
+multi-step:      text + tool_use + tool_result + text (two model turns)
+
+Note
+----
+Relative imports are used throughout (runner.py and these fixtures live in the
+same package). The per-module ``_FIXTURES`` list is both registered globally
+(via ``register()``) and parametrized locally so this module's tests are
+self-contained regardless of global registry ordering (see runner.py docstring).
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events
+
+from .runner import (
+    Fixture,
+    register,
+    run_cross_channel_conformance,
+)
+
+# ---------------------------------------------------------------------------
+# Convert claude-code envelopes to StreamTaskMessage* events
+# ---------------------------------------------------------------------------
+
+
+async def _envelopes_to_events(envelopes: list[dict]) -> list:
+    """Drive convert_claude_code_to_agentex_events and collect all events."""
+
+    async def _aiter(items):  # type: ignore[return]
+        for item in items:
+            yield item
+
+    return [e async for e in convert_claude_code_to_agentex_events(_aiter(envelopes))]
+
+
+# ---------------------------------------------------------------------------
+# Fixture definitions (raw claude-code envelope sequences)
+# ---------------------------------------------------------------------------
+
+_TEXT_ENVELOPES = [
+    {
+        "type": "assistant",
+        "message": {"content": [{"type": "text", "text": "The answer is 42."}]},
+    }
+]
+
+_TOOL_ENVELOPES = [
+    {
+        "type": "assistant",
+        "message": {
+            "content": [
+                {
+                    "type": "tool_use",
+                    "id": "call_read",
+                    "name": "Read",
+                    "input": {"path": "/workspace/README.md"},
+                }
+            ]
+        },
+    },
+    {
+        "type": "user",
+        "message": {
+            "content": [
+                {
+                    "type": "tool_result",
+                    "tool_use_id": "call_read",
+                    "content": "# My Project\n\nA great project.",
+                }
+            ]
+        },
+    },
+]
+
+_THINKING_ENVELOPES = [
+    {
+        "type": "assistant",
+        "message": {
+            "content": [
+                {"type": "thinking", "thinking": "Let me think about this carefully.\nStep 1: check the facts."},
+                {"type": "text", "text": "Here is my answer."},
+            ]
+        },
+    }
+]
+
+_MULTI_STEP_ENVELOPES = [
+    # Turn 1: text + tool call
+    {
+        "type": "assistant",
+        "message": {
+            "content": [
+                {"type": "text", "text": "Let me look that up."},
+                {
+                    "type": "tool_use",
+                    "id": "call_bash",
+                    "name": "Bash",
+                    "input": {"command": "cat /etc/hostname"},
+                },
+            ]
+        },
+    },
+    {
+        "type": "user",
+        "message": {
+            "content": [
+                {
+                    "type": "tool_result",
+                    "tool_use_id": "call_bash",
+                    "content": "myhost",
+                }
+            ]
+        },
+    },
+    # Turn 2: final text after tool result
+    {
+        "type": "assistant",
+        "message": {"content": [{"type": "text", "text": "The hostname is myhost."}]},
+    },
+]
+
+
+# ---------------------------------------------------------------------------
+# Build fixtures from envelopes at module load time
+# ---------------------------------------------------------------------------
+
+
+async def _build_fixture(name: str, envelopes: list[dict]) -> Fixture:
+    events = await _envelopes_to_events(envelopes)
+    return Fixture(name=name, events=events)
+
+
+# Fixtures must exist before pytest collects (they parametrize the test below),
+# so they are built at import time. The conversion only iterates in-memory
+# envelopes — it never suspends on a real future — so we drive the coroutine to
+# completion by hand instead of asyncio.run(). asyncio.run() at import raises
+# RuntimeError when an event loop is already running (programmatic pytest, a
+# Jupyter kernel, or session-scoped asyncio loops); the loop-free driver below
+# is unaffected by the ambient loop state.
+def _run_pure_async(coro: Any) -> Any:
+    try:
+        coro.send(None)
+    except StopIteration as stop:
+        return stop.value
+    coro.close()
+    raise RuntimeError("conformance fixture build unexpectedly suspended on real I/O")
+
+
+_FIXTURES: list[Fixture] = [
+    _run_pure_async(_build_fixture("claude-code-text-only", _TEXT_ENVELOPES)),
+    _run_pure_async(_build_fixture("claude-code-tool-call-result", _TOOL_ENVELOPES)),
+    _run_pure_async(_build_fixture("claude-code-thinking-block", _THINKING_ENVELOPES)),
+    _run_pure_async(_build_fixture("claude-code-multi-step", _MULTI_STEP_ENVELOPES)),
+]
+
+# Register into the shared registry so all_fixtures() can enumerate them
+for _f in _FIXTURES:
+    register(_f)
+
+
+# ---------------------------------------------------------------------------
+# Cross-channel conformance assertions
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("fixture", _FIXTURES, ids=lambda f: f.name)
+@pytest.mark.asyncio
+async def test_cross_channel_equivalence(fixture: Fixture) -> None:
+    """yield_events and auto_send must produce equivalent logical deliveries
+    and identical span signals for every claude-code fixture.
+    """
+    yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
+
+    assert yield_deliveries == auto_deliveries, (
+        f"[{fixture.name}] logical deliveries differ:\n  yield:     {yield_deliveries}\n  auto_send: {auto_deliveries}"
+    )
+    assert yield_spans == auto_spans, (
+        f"[{fixture.name}] span signals differ:\n  yield:     {yield_spans}\n  auto_send: {auto_spans}"
+    )

From d34422845de4b80ed69d2dccfdb0c680ef2fbca3 Mon Sep 17 00:00:00 2001
From: Declan Brady <declan.brady@scale.com>
Date: Mon, 22 Jun 2026 18:45:21 -0400
Subject: [PATCH 07/10] feat(langgraph): migrate LangGraph harness onto unified
 surface (#417)

---
 .github/workflows/harness-integration.yml     |  16 +-
 .../00_sync/harness_langgraph/Dockerfile      |  50 +++
 .../00_sync/harness_langgraph/README.md       |  55 ++++
 .../00_sync/harness_langgraph/manifest.yaml   |  58 ++++
 .../harness_langgraph/project/__init__.py     |   0
 .../00_sync/harness_langgraph/project/acp.py  | 107 +++++++
 .../harness_langgraph/project/graph.py        |  67 ++++
 .../harness_langgraph/project/tools.py        |  24 ++
 .../00_sync/harness_langgraph/pyproject.toml  |  37 +++
 .../harness_langgraph/tests/test_agent.py     | 144 +++++++++
 .../00_base/harness_langgraph/Dockerfile      |  50 +++
 .../00_base/harness_langgraph/README.md       |  57 ++++
 .../00_base/harness_langgraph/manifest.yaml   |  58 ++++
 .../harness_langgraph/project/__init__.py     |   0
 .../00_base/harness_langgraph/project/acp.py  | 109 +++++++
 .../harness_langgraph/project/graph.py        |  67 ++++
 .../harness_langgraph/project/tools.py        |  24 ++
 .../00_base/harness_langgraph/pyproject.toml  |  37 +++
 .../harness_langgraph/tests/test_agent.py     | 100 ++++++
 .../10_temporal/harness_langgraph/Dockerfile  |  43 +++
 .../10_temporal/harness_langgraph/README.md   |  53 ++++
 .../harness_langgraph/manifest.yaml           |  51 +++
 .../harness_langgraph/project/__init__.py     |   0
 .../harness_langgraph/project/acp.py          |  34 ++
 .../harness_langgraph/project/graph.py        |  85 +++++
 .../harness_langgraph/project/run_worker.py   |  46 +++
 .../harness_langgraph/project/tools.py        |  37 +++
 .../harness_langgraph/project/workflow.py     |  80 +++++
 .../harness_langgraph/pyproject.toml          |  40 +++
 .../harness_langgraph/tests/test_agent.py     | 106 +++++++
 .../lib/adk/_modules/_langgraph_async.py      | 213 +++----------
 .../lib/adk/_modules/_langgraph_sync.py       |  49 ++-
 .../lib/adk/_modules/_langgraph_tracing.py    |  31 +-
 .../lib/adk/_modules/_langgraph_turn.py       | 152 +++++++++
 tests/lib/adk/providers/test_openai_turn.py   |   4 +-
 tests/lib/adk/test_langgraph_async.py         | 282 +++++++++++++++++
 tests/lib/adk/test_langgraph_sync.py          | 247 +++++++++++++++
 tests/lib/adk/test_langgraph_sync_unified.py  | 214 +++++++++++++
 tests/lib/adk/test_langgraph_turn.py          | 265 ++++++++++++++++
 tests/lib/adk/test_pydantic_ai_turn.py        |   4 +-
 .../conformance/test_langgraph_conformance.py | 229 ++++++++++++++
 .../harness/test_harness_langgraph_async.py   | 298 ++++++++++++++++++
 .../harness/test_harness_langgraph_sync.py    | 229 ++++++++++++++
 .../test_harness_langgraph_temporal.py        | 233 ++++++++++++++
 44 files changed, 3897 insertions(+), 188 deletions(-)
 create mode 100644 examples/tutorials/00_sync/harness_langgraph/Dockerfile
 create mode 100644 examples/tutorials/00_sync/harness_langgraph/README.md
 create mode 100644 examples/tutorials/00_sync/harness_langgraph/manifest.yaml
 create mode 100644 examples/tutorials/00_sync/harness_langgraph/project/__init__.py
 create mode 100644 examples/tutorials/00_sync/harness_langgraph/project/acp.py
 create mode 100644 examples/tutorials/00_sync/harness_langgraph/project/graph.py
 create mode 100644 examples/tutorials/00_sync/harness_langgraph/project/tools.py
 create mode 100644 examples/tutorials/00_sync/harness_langgraph/pyproject.toml
 create mode 100644 examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/Dockerfile
 create mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/README.md
 create mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml
 create mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/project/__init__.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml
 create mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/Dockerfile
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/README.md
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/__init__.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py
 create mode 100644 src/agentex/lib/adk/_modules/_langgraph_turn.py
 create mode 100644 tests/lib/adk/test_langgraph_async.py
 create mode 100644 tests/lib/adk/test_langgraph_sync.py
 create mode 100644 tests/lib/adk/test_langgraph_sync_unified.py
 create mode 100644 tests/lib/adk/test_langgraph_turn.py
 create mode 100644 tests/lib/core/harness/conformance/test_langgraph_conformance.py
 create mode 100644 tests/lib/core/harness/test_harness_langgraph_async.py
 create mode 100644 tests/lib/core/harness/test_harness_langgraph_sync.py
 create mode 100644 tests/lib/core/harness/test_harness_langgraph_temporal.py

diff --git a/.github/workflows/harness-integration.yml b/.github/workflows/harness-integration.yml
index 11b5239dc..075ee5cf3 100644
--- a/.github/workflows/harness-integration.yml
+++ b/.github/workflows/harness-integration.yml
@@ -8,6 +8,7 @@ on:
       - "src/agentex/lib/core/harness/**"
       - "src/agentex/lib/adk/_modules/**"
       - "tests/lib/core/harness/test_harness_pydantic_ai_*.py"
+      - "tests/lib/core/harness/test_harness_langgraph_*.py"
       - ".github/workflows/harness-integration.yml"
 
 jobs:
@@ -32,17 +33,18 @@ jobs:
       - name: Conformance suite
         run: ./scripts/test tests/lib/core/harness/ -v
 
-  # Offline pydantic-ai integration tests (sync / async / temporal channels).
-  # These use pydantic-ai TestModel + fake streaming/tracing and require no live
-  # infrastructure. Enabled here for PR 4 (pydantic-ai migration). Future harness
-  # migration PRs (5-8) should add their integration-test paths to this matrix.
+  # Offline harness integration tests (sync / async / temporal channels) for each
+  # migrated harness. These use fake streams / TestModel + fake streaming/tracing
+  # and require no live infrastructure. Future harness migration PRs (6-8) add
+  # their harness to the matrix below and their test paths to the triggers above.
   live-matrix:
     runs-on: ubuntu-latest
     strategy:
       matrix:
+        harness: [pydantic_ai, langgraph]
         channel: [sync, async, temporal]
       fail-fast: false
-    name: pydantic-ai-${{ matrix.channel }}
+    name: ${{ matrix.harness }}-${{ matrix.channel }}
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
 
@@ -54,6 +56,6 @@ jobs:
       - name: Bootstrap
         run: ./scripts/bootstrap
 
-      - name: pydantic-ai ${{ matrix.channel }} integration tests (offline, TestModel)
+      - name: ${{ matrix.harness }} ${{ matrix.channel }} integration tests (offline)
         run: |
-          ./scripts/test tests/lib/core/harness/test_harness_pydantic_ai_${{ matrix.channel }}.py -v
+          ./scripts/test tests/lib/core/harness/test_harness_${{ matrix.harness }}_${{ matrix.channel }}.py -v
diff --git a/examples/tutorials/00_sync/harness_langgraph/Dockerfile b/examples/tutorials/00_sync/harness_langgraph/Dockerfile
new file mode 100644
index 000000000..9d492198f
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_langgraph/Dockerfile
@@ -0,0 +1,50 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+# Copy pyproject.toml and README.md to install dependencies
+COPY 00_sync/harness_langgraph/pyproject.toml /app/harness_langgraph/pyproject.toml
+COPY 00_sync/harness_langgraph/README.md /app/harness_langgraph/README.md
+
+WORKDIR /app/harness_langgraph
+
+# Copy the project code
+COPY 00_sync/harness_langgraph/project /app/harness_langgraph/project
+
+# Copy the test files
+COPY 00_sync/harness_langgraph/tests /app/harness_langgraph/tests
+
+# Copy shared test utilities
+COPY test_utils /app/test_utils
+
+# Install the required Python packages with dev dependencies
+RUN uv pip install --system .[dev]
+
+# Set environment variables
+ENV PYTHONPATH=/app
+
+# Set test environment variables
+ENV AGENT_NAME=s-harness-langgraph
+
+# Run the agent using uvicorn
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/00_sync/harness_langgraph/README.md b/examples/tutorials/00_sync/harness_langgraph/README.md
new file mode 100644
index 000000000..86367f162
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_langgraph/README.md
@@ -0,0 +1,55 @@
+# Tutorial: Sync Harness LangGraph Agent
+
+This tutorial demonstrates how to build a **synchronous** LangGraph agent on AgentEx
+using the **unified harness surface**:
+
+```python
+turn = LangGraphTurn(stream, model=None)
+emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...)
+async for event in emitter.yield_turn(turn):
+    yield event
+```
+
+Compare with ``030_langgraph``, which uses the bespoke
+``convert_langgraph_to_agentex_events`` helper directly.
+
+## Key Concepts
+
+### Unified Harness
+
+`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw
+LangGraph `astream()` generator and exposes `events` (an async generator of
+`TaskMessageUpdate`) and `usage()` (token counts captured from the final
+`AIMessage`).
+
+`UnifiedEmitter.yield_turn(turn)` iterates the turn's events and yields them
+to the sync ACP handler unchanged. The same `LangGraphTurn` object can also be
+passed to `UnifiedEmitter.auto_send_turn` in the async/temporal channels.
+
+### AGX1-377 Note
+
+LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates"
+node outputs). The `SpanDeriver` does not open tool spans from Full events
+today; that gap is tracked in AGX1-373.
+
+## Files
+
+| File | Description |
+|------|-------------|
+| `project/acp.py` | ACP server using unified harness (LangGraphTurn + yield_turn) |
+| `project/graph.py` | LangGraph state graph (identical to 030_langgraph) |
+| `project/tools.py` | Tool definitions (weather example) |
+| `tests/test_agent.py` | Integration tests |
+| `manifest.yaml` | Agent configuration (name: s-harness-langgraph) |
+
+## Running Locally
+
+```bash
+agentex agents run
+```
+
+## Running Tests
+
+```bash
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/00_sync/harness_langgraph/manifest.yaml b/examples/tutorials/00_sync/harness_langgraph/manifest.yaml
new file mode 100644
index 000000000..1f57678f2
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_langgraph/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../
+    include_paths:
+      - 00_sync/harness_langgraph
+      - test_utils
+    dockerfile: 00_sync/harness_langgraph/Dockerfile
+    dockerignore: 00_sync/harness_langgraph/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: sync
+  name: s-harness-langgraph
+  description: A sync LangGraph agent using the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn)
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "s-harness-langgraph"
+      description: "A sync LangGraph agent using the unified harness surface"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/00_sync/harness_langgraph/project/__init__.py b/examples/tutorials/00_sync/harness_langgraph/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/00_sync/harness_langgraph/project/acp.py b/examples/tutorials/00_sync/harness_langgraph/project/acp.py
new file mode 100644
index 000000000..f609f1682
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_langgraph/project/acp.py
@@ -0,0 +1,107 @@
+"""ACP handler for sync harness LangGraph agent.
+
+Uses the unified harness surface: ``LangGraphTurn`` wraps the LangGraph
+``astream()`` generator, and ``UnifiedEmitter.yield_turn`` converts it into
+the AgentEx ``TaskMessageUpdate`` event stream expected by the sync ACP.
+
+Differences from ``030_langgraph`` (bespoke path):
+- No ``create_langgraph_tracing_handler`` boilerplate.
+- No manual text-delta accumulation for the span output.
+- Tool calls are emitted as ``StreamTaskMessageFull`` (not Start+Delta+Done)
+  via the same code path as the async/temporal channels.
+- Usage data (token counts) is captured on the ``LangGraphTurn`` object and
+  can be read after the turn completes.
+
+AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull``
+events (from "updates"). The ``SpanDeriver`` does not open tool spans from
+Full events today; that gap is tracked in AGX1-373.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import AsyncGenerator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from project.graph import create_graph
+from agentex.lib.types.acp import SendMessageParams
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import TaskMessageUpdate
+from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(acp_type="sync")
+
+_graph = None
+
+
+async def get_graph():
+    """Get or create the compiled graph instance."""
+    global _graph
+    if _graph is None:
+        _graph = await create_graph()
+    return _graph
+
+
+@acp.on_message_send
+async def handle_message_send(
+    params: SendMessageParams,
+) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]:
+    """Handle incoming messages, streaming tokens and tool calls via unified harness."""
+    graph = await get_graph()
+
+    task_id = params.task.id
+    user_message = params.content.content
+
+    logger.info(f"Processing message for task {task_id}")
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        stream = graph.astream(
+            {"messages": [{"role": "user", "content": user_message}]},
+            config={"configurable": {"thread_id": task_id}},
+            stream_mode=["messages", "updates"],
+        )
+
+        turn = LangGraphTurn(stream, model=None)
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
+        final_text = ""
+        async for event in emitter.yield_turn(turn):
+            # Accumulate text deltas so the span's final_output is the assistant
+            # text (matching the async tutorial), not the usage metrics.
+            delta = getattr(event, "delta", None)
+            if isinstance(delta, TextDelta) and delta.text_delta:
+                final_text += delta.text_delta
+            yield event
+
+        if turn_span:
+            turn_span.output = {"final_output": final_text, "usage": turn.usage().model_dump()}
diff --git a/examples/tutorials/00_sync/harness_langgraph/project/graph.py b/examples/tutorials/00_sync/harness_langgraph/project/graph.py
new file mode 100644
index 000000000..4516087d2
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_langgraph/project/graph.py
@@ -0,0 +1,67 @@
+"""LangGraph graph definition for the harness_langgraph sync agent.
+
+Identical to ``030_langgraph/project/graph.py`` — the graph definition is not
+affected by the harness migration. Only ``acp.py`` changes.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Annotated
+from datetime import datetime
+from typing_extensions import TypedDict
+
+from langgraph.graph import START, StateGraph
+from langchain_openai import ChatOpenAI
+from langgraph.prebuilt import ToolNode, tools_condition
+from langchain_core.messages import SystemMessage
+from langgraph.graph.message import add_messages
+
+from project.tools import TOOLS
+from agentex.lib.adk import create_checkpointer
+
+MODEL_NAME = "gpt-5"
+SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use tools when they would help answer the user's question
+- If you're unsure, ask clarifying questions
+- Always provide accurate information
+"""
+
+
+class AgentState(TypedDict):
+    """State schema for the agent graph."""
+
+    messages: Annotated[list[Any], add_messages]
+
+
+async def create_graph():
+    """Create and compile the agent graph with checkpointer."""
+    llm = ChatOpenAI(
+        model=MODEL_NAME,
+        reasoning={"effort": "high", "summary": "auto"},
+    )
+    llm_with_tools = llm.bind_tools(TOOLS)
+
+    checkpointer = await create_checkpointer()
+
+    def agent_node(state: AgentState) -> dict[str, Any]:
+        """Process the current state and generate a response."""
+        messages = state["messages"]
+        if not messages or not isinstance(messages[0], SystemMessage):
+            system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
+            messages = [SystemMessage(content=system_content)] + messages
+        response = llm_with_tools.invoke(messages)
+        return {"messages": [response]}
+
+    builder = StateGraph(AgentState)
+    builder.add_node("agent", agent_node)
+    builder.add_node("tools", ToolNode(tools=TOOLS))
+    builder.add_edge(START, "agent")
+    builder.add_conditional_edges("agent", tools_condition, "tools")
+    builder.add_edge("tools", "agent")
+
+    return builder.compile(checkpointer=checkpointer)
diff --git a/examples/tutorials/00_sync/harness_langgraph/project/tools.py b/examples/tutorials/00_sync/harness_langgraph/project/tools.py
new file mode 100644
index 000000000..f02587430
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_langgraph/project/tools.py
@@ -0,0 +1,24 @@
+"""Tool definitions for the harness_langgraph sync agent."""
+
+from langchain_core.tools import Tool
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
+
+
+weather_tool = Tool(
+    name="get_weather",
+    func=get_weather,
+    description="Get the current weather for a city. Input should be a city name.",
+)
+
+TOOLS = [weather_tool]
diff --git a/examples/tutorials/00_sync/harness_langgraph/pyproject.toml b/examples/tutorials/00_sync/harness_langgraph/pyproject.toml
new file mode 100644
index 000000000..deecd08b3
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_langgraph/pyproject.toml
@@ -0,0 +1,37 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "s-harness-langgraph"
+version = "0.1.0"
+description = "A sync LangGraph agent using the unified harness surface"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "langgraph",
+    "langchain-openai",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
diff --git a/examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py b/examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py
new file mode 100644
index 000000000..2eb561cec
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py
@@ -0,0 +1,144 @@
+"""
+Tests for the sync harness LangGraph agent.
+
+Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn)
+end-to-end against a live AgentEx server.
+
+Configuration:
+- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003)
+- AGENT_NAME: Name of the agent to test (default: s-harness-langgraph)
+"""
+
+import os
+
+import pytest
+from test_utils.sync import validate_text_in_string, collect_streaming_response
+
+from agentex import Agentex
+from agentex.types import TextContent, TextContentParam
+from agentex.types.agent_rpc_params import ParamsCreateTaskRequest, ParamsSendMessageRequest
+from agentex.lib.sdk.fastacp.base.base_acp_server import uuid
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "s-harness-langgraph")
+
+
+@pytest.fixture
+def client():
+    return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+
+@pytest.fixture
+def agent_name():
+    return AGENT_NAME
+
+
+@pytest.fixture
+def agent_id(client, agent_name):
+    agents = client.agents.list()
+    for agent in agents:
+        if agent.name == agent_name:
+            return agent.id
+    raise ValueError(f"Agent with name {agent_name} not found.")
+
+
+class TestNonStreamingMessages:
+    def test_send_simple_message(self, client: Agentex, agent_name: str):
+        response = client.agents.send_message(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="Hello! What can you help me with?",
+                    type="text",
+                )
+            ),
+        )
+        result = response.result
+        assert result is not None
+        assert len(result) >= 1
+
+    def test_tool_calling(self, client: Agentex, agent_name: str):
+        response = client.agents.send_message(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="What's the weather in San Francisco?",
+                    type="text",
+                )
+            ),
+        )
+        result = response.result
+        assert result is not None
+        assert len(result) >= 1
+
+    def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id: str):
+        task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
+        task = task_response.result
+        assert task is not None
+
+        response1 = client.agents.send_message(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="My name is Alice. Remember that.",
+                    type="text",
+                ),
+                task_id=task.id,
+            ),
+        )
+        assert response1.result is not None
+
+        response2 = client.agents.send_message(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="What is my name?",
+                    type="text",
+                ),
+                task_id=task.id,
+            ),
+        )
+        assert response2.result is not None
+        for message in response2.result:
+            if isinstance(message.content, TextContent):
+                validate_text_in_string("alice", message.content.content.lower())
+
+
+class TestStreamingMessages:
+    def test_stream_simple_message(self, client: Agentex, agent_name: str):
+        stream = client.agents.send_message_stream(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="Tell me a short joke.",
+                    type="text",
+                )
+            ),
+        )
+        aggregated_content, chunks = collect_streaming_response(stream)
+        assert aggregated_content is not None
+        assert len(chunks) > 1, "No chunks received in streaming response."
+
+    def test_stream_tool_calling(self, client: Agentex, agent_name: str):
+        stream = client.agents.send_message_stream(
+            agent_name=agent_name,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="What's the weather in New York?",
+                    type="text",
+                )
+            ),
+        )
+        aggregated_content, chunks = collect_streaming_response(stream)
+        assert aggregated_content is not None
+        assert len(chunks) > 0, "No chunks received in streaming response."
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/Dockerfile b/examples/tutorials/10_async/00_base/harness_langgraph/Dockerfile
new file mode 100644
index 000000000..3e0bd696a
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_langgraph/Dockerfile
@@ -0,0 +1,50 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+# Copy pyproject.toml and README.md to install dependencies
+COPY 10_async/00_base/harness_langgraph/pyproject.toml /app/harness_langgraph/pyproject.toml
+COPY 10_async/00_base/harness_langgraph/README.md /app/harness_langgraph/README.md
+
+WORKDIR /app/harness_langgraph
+
+# Copy the project code
+COPY 10_async/00_base/harness_langgraph/project /app/harness_langgraph/project
+
+# Copy the test files
+COPY 10_async/00_base/harness_langgraph/tests /app/harness_langgraph/tests
+
+# Copy shared test utilities
+COPY test_utils /app/test_utils
+
+# Install the required Python packages with dev dependencies
+RUN uv pip install --system .[dev] pytest-asyncio httpx
+
+# Set environment variables
+ENV PYTHONPATH=/app
+
+# Set test environment variables
+ENV AGENT_NAME=a-harness-langgraph
+
+# Run the agent using uvicorn
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/README.md b/examples/tutorials/10_async/00_base/harness_langgraph/README.md
new file mode 100644
index 000000000..7efe28207
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_langgraph/README.md
@@ -0,0 +1,57 @@
+# Tutorial: Async Harness LangGraph Agent
+
+This tutorial demonstrates how to build an **async** LangGraph agent on AgentEx
+using the **unified harness surface**:
+
+```python
+turn = LangGraphTurn(stream, model=None)
+emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...)
+result = await emitter.auto_send_turn(turn)
+```
+
+Compare with ``100_langgraph``, which uses the bespoke
+``stream_langgraph_events`` helper directly.
+
+## Key Concepts
+
+### Unified Harness
+
+`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw
+LangGraph `astream()` generator and exposes `events` (an async generator of
+`TaskMessageUpdate`) and `usage()` (token counts captured from the final
+`AIMessage`).
+
+`UnifiedEmitter.auto_send_turn(turn)` pushes each event to Redis via
+`streaming_task_message_context`, accumulates the final text, and returns a
+`TurnResult(final_text=..., usage=...)`.
+
+The same `LangGraphTurn` object can also be passed to
+`UnifiedEmitter.yield_turn` in the sync channel.
+
+### AGX1-377 Note
+
+LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates"
+node outputs). The `SpanDeriver` does not open tool spans from Full events
+today; that gap is tracked in AGX1-373.
+
+## Files
+
+| File | Description |
+|------|-------------|
+| `project/acp.py` | ACP server using unified harness (LangGraphTurn + auto_send_turn) |
+| `project/graph.py` | LangGraph state graph (identical to 100_langgraph) |
+| `project/tools.py` | Tool definitions (weather example) |
+| `tests/test_agent.py` | Integration tests |
+| `manifest.yaml` | Agent configuration (name: a-harness-langgraph) |
+
+## Running Locally
+
+```bash
+agentex agents run
+```
+
+## Running Tests
+
+```bash
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml b/examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml
new file mode 100644
index 000000000..bb19e25b3
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/00_base/harness_langgraph
+      - test_utils
+    dockerfile: 10_async/00_base/harness_langgraph/Dockerfile
+    dockerignore: 10_async/00_base/harness_langgraph/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: async
+  name: a-harness-langgraph
+  description: An async LangGraph agent using the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn)
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "a-harness-langgraph"
+      description: "An async LangGraph agent using the unified harness surface"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/__init__.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py
new file mode 100644
index 000000000..a99395424
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py
@@ -0,0 +1,109 @@
+"""ACP handler for async harness LangGraph agent.
+
+Uses the unified harness surface: ``LangGraphTurn`` wraps the LangGraph
+``astream()`` generator, and ``UnifiedEmitter.auto_send_turn`` streams events
+to Redis and returns a ``TurnResult`` with the accumulated final text.
+
+Differences from ``100_langgraph`` (bespoke path):
+- No ``create_langgraph_tracing_handler`` boilerplate.
+- ``stream_langgraph_events`` is replaced by
+  ``UnifiedEmitter.auto_send_turn(LangGraphTurn(stream))``.
+- Tool calls/responses go through ``streaming_task_message_context``
+  (same code path as text deltas), making the event stream channel-agnostic.
+- Usage data (token counts) is captured on ``LangGraphTurn.usage()`` after
+  ``auto_send_turn`` returns.
+
+AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull``
+events (from "updates"). The ``SpanDeriver`` does not open tool spans from
+Full events today; that gap is tracked in AGX1-373.
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from project.graph import create_graph
+from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
+from agentex.lib.types.fastacp import AsyncACPConfig
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(
+    acp_type="async",
+    config=AsyncACPConfig(type="base"),
+)
+
+_graph = None
+
+
+async def get_graph():
+    global _graph
+    if _graph is None:
+        _graph = await create_graph()
+    return _graph
+
+
+@acp.on_task_event_send
+async def handle_task_event_send(params: SendEventParams):
+    """Handle incoming events, streaming tokens and tool calls via unified harness."""
+    graph = await get_graph()
+    task_id = params.task.id
+    user_message = params.event.content.content
+
+    logger.info(f"Processing message for thread {task_id}")
+
+    await adk.messages.create(task_id=task_id, content=params.event.content)
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        stream = graph.astream(
+            {"messages": [{"role": "user", "content": user_message}]},
+            config={"configurable": {"thread_id": task_id}},
+            stream_mode=["messages", "updates"],
+        )
+
+        turn = LangGraphTurn(stream, model=None)
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
+        result = await emitter.auto_send_turn(turn)
+
+        if turn_span:
+            turn_span.output = {"final_output": result.final_text}
+
+
+@acp.on_task_create
+async def handle_task_create(params: CreateTaskParams):
+    logger.info(f"Task created: {params.task.id}")
+
+
+@acp.on_task_cancel
+async def handle_task_canceled(params: CancelTaskParams):
+    logger.info(f"Task canceled: {params.task.id}")
diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py
new file mode 100644
index 000000000..4aeac3b3c
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py
@@ -0,0 +1,67 @@
+"""LangGraph graph definition for the harness_langgraph async agent.
+
+Identical to ``100_langgraph/project/graph.py`` — the graph definition is not
+affected by the harness migration. Only ``acp.py`` changes.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Annotated
+from datetime import datetime
+from typing_extensions import TypedDict
+
+from langgraph.graph import START, StateGraph
+from langchain_openai import ChatOpenAI
+from langgraph.prebuilt import ToolNode, tools_condition
+from langchain_core.messages import SystemMessage
+from langgraph.graph.message import add_messages
+
+from project.tools import TOOLS
+from agentex.lib.adk import create_checkpointer
+
+MODEL_NAME = "gpt-5"
+SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Guidelines:
+- Be concise and helpful
+- Use tools when they would help answer the user's question
+- If you're unsure, ask clarifying questions
+- Always provide accurate information
+"""
+
+
+class AgentState(TypedDict):
+    """State schema for the agent graph."""
+
+    messages: Annotated[list[Any], add_messages]
+
+
+async def create_graph():
+    """Create and compile the agent graph with checkpointer."""
+    llm = ChatOpenAI(
+        model=MODEL_NAME,
+        reasoning={"effort": "high", "summary": "auto"},
+    )
+    llm_with_tools = llm.bind_tools(TOOLS)
+
+    checkpointer = await create_checkpointer()
+
+    def agent_node(state: AgentState) -> dict[str, Any]:
+        """Process the current state and generate a response."""
+        messages = state["messages"]
+        if not messages or not isinstance(messages[0], SystemMessage):
+            system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
+            messages = [SystemMessage(content=system_content)] + messages
+        response = llm_with_tools.invoke(messages)
+        return {"messages": [response]}
+
+    builder = StateGraph(AgentState)
+    builder.add_node("agent", agent_node)
+    builder.add_node("tools", ToolNode(tools=TOOLS))
+    builder.add_edge(START, "agent")
+    builder.add_conditional_edges("agent", tools_condition, "tools")
+    builder.add_edge("tools", "agent")
+
+    return builder.compile(checkpointer=checkpointer)
diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py
new file mode 100644
index 000000000..6e7614300
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py
@@ -0,0 +1,24 @@
+"""Tool definitions for the harness_langgraph async agent."""
+
+from langchain_core.tools import Tool
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
+
+
+weather_tool = Tool(
+    name="get_weather",
+    func=get_weather,
+    description="Get the current weather for a city. Input should be a city name.",
+)
+
+TOOLS = [weather_tool]
diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml b/examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml
new file mode 100644
index 000000000..69856e6db
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml
@@ -0,0 +1,37 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "a-harness-langgraph"
+version = "0.1.0"
+description = "An async LangGraph agent using the unified harness surface"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "langgraph",
+    "langchain-openai",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py b/examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py
new file mode 100644
index 000000000..762b2b90c
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py
@@ -0,0 +1,100 @@
+"""
+Tests for the async harness LangGraph agent.
+
+Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn)
+end-to-end against a live AgentEx server.
+
+Configuration:
+- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003)
+- AGENT_NAME: Name of the agent to test (default: a-harness-langgraph)
+"""
+
+import os
+
+import pytest
+import pytest_asyncio
+
+from agentex import AsyncAgentex
+from agentex.types import TextContentParam
+from agentex.types.agent_rpc_params import ParamsCreateTaskRequest
+from agentex.lib.sdk.fastacp.base.base_acp_server import uuid
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "a-harness-langgraph")
+
+
+@pytest_asyncio.fixture
+async def client():
+    client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL)
+    yield client
+    await client.close()
+
+
+@pytest.fixture
+def agent_name():
+    return AGENT_NAME
+
+
+@pytest_asyncio.fixture
+async def agent_id(client, agent_name):
+    agents = await client.agents.list()
+    for agent in agents:
+        if agent.name == agent_name:
+            return agent.id
+    raise ValueError(f"Agent with name {agent_name} not found.")
+
+
+class TestNonStreamingEvents:
+    @pytest.mark.asyncio
+    async def test_send_event(self, client: AsyncAgentex, agent_id: str):
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
+        task = task_response.result
+        assert task is not None
+
+        event_content = TextContentParam(
+            type="text",
+            author="user",
+            content="Hello! What can you help me with?",
+        )
+        await client.agents.send_event(
+            agent_id=agent_id,
+            params={"task_id": task.id, "content": event_content},
+        )
+
+    @pytest.mark.asyncio
+    async def test_tool_calling(self, client: AsyncAgentex, agent_id: str):
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
+        task = task_response.result
+        assert task is not None
+
+        event_content = TextContentParam(
+            type="text",
+            author="user",
+            content="What's the weather in San Francisco?",
+        )
+        await client.agents.send_event(
+            agent_id=agent_id,
+            params={"task_id": task.id, "content": event_content},
+        )
+
+
+class TestStreamingEvents:
+    @pytest.mark.asyncio
+    async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str):
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
+        task = task_response.result
+        assert task is not None
+
+        event_content = TextContentParam(
+            type="text",
+            author="user",
+            content="Tell me a short joke.",
+        )
+        await client.agents.send_event(
+            agent_id=agent_id,
+            params={"task_id": task.id, "content": event_content},
+        )
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/Dockerfile b/examples/tutorials/10_async/10_temporal/harness_langgraph/Dockerfile
new file mode 100644
index 000000000..f6c9fb59b
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_langgraph/Dockerfile
@@ -0,0 +1,43 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/10_temporal/harness_langgraph/pyproject.toml /app/harness_langgraph/pyproject.toml
+COPY 10_async/10_temporal/harness_langgraph/README.md /app/harness_langgraph/README.md
+
+WORKDIR /app/harness_langgraph
+
+COPY 10_async/10_temporal/harness_langgraph/project /app/harness_langgraph/project
+COPY 10_async/10_temporal/harness_langgraph/tests /app/harness_langgraph/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+
+ENV AGENT_NAME=at-harness-langgraph
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
+
+# When we deploy the worker, we will replace the CMD with the following
+# CMD ["python", "-m", "run_worker"]
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/README.md b/examples/tutorials/10_async/10_temporal/harness_langgraph/README.md
new file mode 100644
index 000000000..4df6969f1
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_langgraph/README.md
@@ -0,0 +1,53 @@
+# Tutorial: Temporal Harness LangGraph Agent
+
+This tutorial demonstrates how to build a **Temporal-backed** LangGraph agent on
+AgentEx, following the ``130_langgraph`` pattern. The agent's LLM node runs as a
+durable Temporal activity; the tools node runs inline in the workflow.
+
+This agent is named ``at-harness-langgraph`` to distinguish it from
+``at130-langgraph`` (the bespoke reference). The graph and workflow structure are
+identical; only the agent name changes.
+
+## Key Concepts
+
+### Temporal + LangGraph
+
+The ``LangGraphPlugin`` from ``temporalio.contrib.langgraph`` turns annotated graph
+nodes into Temporal activities or inline workflow callables:
+
+- `agent` node: `execute_in="activity"` (durable, retryable LLM call)
+- `tools` node: `execute_in="workflow"` (inline, fast tool execution)
+
+### Message surfacing
+
+After each turn, ``emit_langgraph_messages`` converts the new LangGraph messages
+(tool requests, tool responses, final text) into AgentEx ``TaskMessage`` objects
+and posts them to the task's message stream.
+
+This is the Temporal-specific path. The non-Temporal async/sync channels use
+``UnifiedEmitter.auto_send_turn`` / ``UnifiedEmitter.yield_turn`` with
+``LangGraphTurn`` instead.
+
+## Files
+
+| File | Description |
+|------|-------------|
+| `project/acp.py` | ACP server (Temporal config, LangGraphPlugin) |
+| `project/graph.py` | LangGraph graph (agent + tools nodes) |
+| `project/workflow.py` | Temporal workflow (signal handlers, emit_langgraph_messages) |
+| `project/run_worker.py` | Temporal worker runner |
+| `project/tools.py` | Tool definitions (weather example) |
+| `tests/test_agent.py` | Integration tests |
+| `manifest.yaml` | Agent configuration (name: at-harness-langgraph) |
+
+## Running Locally
+
+```bash
+agentex agents run
+```
+
+## Running Tests
+
+```bash
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml b/examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml
new file mode 100644
index 000000000..596d38eb4
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml
@@ -0,0 +1,51 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/10_temporal/harness_langgraph
+      - test_utils
+    dockerfile: 10_async/10_temporal/harness_langgraph/Dockerfile
+    dockerignore: 10_async/10_temporal/harness_langgraph/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+    worker: project/run_worker.py
+
+agent:
+  acp_type: async
+  name: at-harness-langgraph
+  description: "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities"
+
+  temporal:
+    enabled: true
+    workflows:
+      - name: at-harness-langgraph
+        queue_name: at_harness_langgraph_queue
+
+  credentials:
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+
+  env: {}
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  imagePullSecrets: []
+
+  global:
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/__init__.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py
new file mode 100644
index 000000000..7af9c5e68
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py
@@ -0,0 +1,34 @@
+"""ACP server for the Temporal harness LangGraph agent.
+
+Follows the ``130_langgraph`` pattern: the Temporal ``LangGraphPlugin`` runs
+graph nodes as Temporal activities. The agent logic lives in ``workflow.py``
+(the runtime) and ``graph.py`` (the LangGraph graph), executed by the Temporal
+worker (``run_worker.py``), not by this HTTP process.
+
+The workflow uses ``emit_langgraph_messages`` to surface turn messages to
+AgentEx. That helper is Temporal-specific and is not replaced by the unified
+harness here (``UnifiedEmitter`` targets the non-Temporal async/sync channels).
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from temporalio.contrib.langgraph import LangGraphPlugin
+
+from project.graph import GRAPH_NAME, build_graph
+from agentex.lib.types.fastacp import TemporalACPConfig
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+
+acp = FastACP.create(
+    acp_type="async",
+    config=TemporalACPConfig(
+        type="temporal",
+        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
+        plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})],
+    ),
+)
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py
new file mode 100644
index 000000000..ce9c2b520
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py
@@ -0,0 +1,85 @@
+"""LangGraph graph for at-harness-langgraph — nodes run as Temporal activities.
+
+Identical in structure to ``130_langgraph/project/graph.py``. The graph
+definition is not affected by the harness migration; only the agent naming
+changes. The LLM ``agent`` node runs as a durable Temporal activity;
+the ``tools`` node runs inline in the workflow.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any, Annotated
+from datetime import datetime, timedelta
+
+_litellm_key = os.environ.get("LITELLM_API_KEY")
+if _litellm_key:
+    os.environ.setdefault("OPENAI_API_KEY", _litellm_key)
+
+from typing_extensions import TypedDict
+
+from langgraph.graph import END, START, StateGraph
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import ToolMessage, SystemMessage
+from langgraph.graph.message import add_messages
+
+from project.tools import TOOLS
+
+_TOOLS_BY_NAME = {tool.name: tool for tool in TOOLS}
+
+GRAPH_NAME = "at-harness-langgraph"
+MODEL_NAME = "gpt-4o"
+SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools.
+
+Current date and time: {timestamp}
+
+Be concise and use tools when they help answer the question."""
+
+
+class AgentState(TypedDict):
+    messages: Annotated[list[Any], add_messages]
+
+
+async def agent_node(state: AgentState) -> dict[str, Any]:
+    """The 'agent' node — one LLM call. Runs as a durable Temporal activity."""
+    llm = ChatOpenAI(model=MODEL_NAME).bind_tools(TOOLS)
+    messages = state["messages"]
+    if not messages or not isinstance(messages[0], SystemMessage):
+        system = SystemMessage(content=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
+        messages = [system, *messages]
+    return {"messages": [await llm.ainvoke(messages)]}
+
+
+async def tools_node(state: AgentState) -> dict[str, Any]:
+    """Run the tool calls the model requested. Runs inline in the workflow."""
+    last = state["messages"][-1]
+    results: list[Any] = []
+    for call in getattr(last, "tool_calls", None) or []:
+        tool = _TOOLS_BY_NAME.get(call["name"])
+        if tool is None:
+            output = f"Error: unknown tool {call['name']!r}. Available: {list(_TOOLS_BY_NAME)}"
+        else:
+            output = await tool.ainvoke(call["args"])
+        results.append(ToolMessage(content=str(output), tool_call_id=call["id"], name=call["name"]))
+    return {"messages": results}
+
+
+async def route_after_agent(state: AgentState) -> str:
+    """Go to the tools node if the model requested tools, else finish."""
+    last = state["messages"][-1]
+    return "tools" if getattr(last, "tool_calls", None) else END
+
+
+def build_graph() -> StateGraph:
+    """Build the agent graph; the LLM node runs as an activity, tools in the workflow."""
+    builder = StateGraph(AgentState)
+    builder.add_node(
+        "agent",
+        agent_node,
+        metadata={"execute_in": "activity", "start_to_close_timeout": timedelta(minutes=5)},
+    )
+    builder.add_node("tools", tools_node, metadata={"execute_in": "workflow"})
+    builder.add_edge(START, "agent")
+    builder.add_conditional_edges("agent", route_after_agent, {"tools": "tools", END: END})
+    builder.add_edge("tools", "agent")
+    return builder
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py
new file mode 100644
index 000000000..ca64464fc
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py
@@ -0,0 +1,46 @@
+"""Temporal worker for at-harness-langgraph.
+
+Run as a separate long-lived process alongside the ACP HTTP server. The
+worker polls Temporal for workflow + activity tasks and executes them.
+
+The ``LangGraphPlugin`` is given the graph registry (``{ GRAPH_NAME: graph }``).
+At runtime it turns the graph's ``execute_in="activity"`` nodes into Temporal
+activities and registers them on the worker automatically.
+"""
+
+import asyncio
+
+from temporalio.contrib.langgraph import LangGraphPlugin
+
+from project.graph import GRAPH_NAME, build_graph
+from project.workflow import AtHarnessLanggraphWorkflow
+from agentex.lib.utils.debug import setup_debug_if_enabled
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.activities import get_all_activities
+from agentex.lib.core.temporal.workers.worker import AgentexWorker
+
+environment_variables = EnvironmentVariables.refresh()
+logger = make_logger(__name__)
+
+
+async def main():
+    setup_debug_if_enabled()
+
+    task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE
+    if task_queue_name is None:
+        raise ValueError("WORKFLOW_TASK_QUEUE is not set")
+
+    worker = AgentexWorker(
+        task_queue=task_queue_name,
+        plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})],
+    )
+
+    await worker.run(
+        activities=get_all_activities(),
+        workflow=AtHarnessLanggraphWorkflow,
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py
new file mode 100644
index 000000000..10943c9d2
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py
@@ -0,0 +1,37 @@
+"""Tool definitions for the harness_langgraph temporal agent."""
+
+from langchain_core.tools import Tool
+
+
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The name of the city to get weather for.
+
+    Returns:
+        A string describing the weather conditions.
+    """
+    return f"The weather in {city} is sunny and 72°F"
+
+
+async def aget_weather(city: str) -> str:
+    """Native async tool entrypoint.
+
+    ``tools_node`` runs inline in the Temporal workflow and invokes tools via
+    ``tool.ainvoke``. A sync-only tool forces LangChain to bridge through
+    ``run_in_executor`` (a thread pool), which the deterministic Temporal
+    workflow event loop forbids (``NotImplementedError``). Providing a real
+    coroutine keeps tool execution on the workflow loop.
+    """
+    return get_weather(city)
+
+
+weather_tool = Tool(
+    name="get_weather",
+    func=get_weather,
+    coroutine=aget_weather,
+    description="Get the current weather for a city. Input should be a city name.",
+)
+
+TOOLS = [weather_tool]
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py
new file mode 100644
index 000000000..4125dca39
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py
@@ -0,0 +1,80 @@
+"""Temporal workflow for at-harness-langgraph.
+
+Each turn the workflow runs the LangGraph graph (``project/graph.py``) via the
+``temporalio.contrib.langgraph`` plugin. The plugin runs the LLM ``agent`` node
+as a durable Temporal activity and the ``tools`` node inline in the workflow.
+
+Multi-turn memory is kept on the workflow instance (``self._messages``) — it's
+durable and replay-safe for free, so no checkpoint database is needed.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from temporalio import workflow
+from temporalio.contrib.langgraph import graph as lg_graph
+
+from agentex.lib import adk
+from project.graph import GRAPH_NAME
+from agentex.lib.adk import emit_langgraph_messages
+from agentex.protocol.acp import SendEventParams, CreateTaskParams
+from agentex.lib.utils.logging import make_logger
+from agentex.types.text_content import TextContent
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.types.workflow import SignalName
+from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow
+
+environment_variables = EnvironmentVariables.refresh()
+
+if environment_variables.WORKFLOW_NAME is None:
+    raise ValueError("Environment variable WORKFLOW_NAME is not set")
+if environment_variables.AGENT_NAME is None:
+    raise ValueError("Environment variable AGENT_NAME is not set")
+
+logger = make_logger(__name__)
+
+
+@workflow.defn(name=environment_variables.WORKFLOW_NAME)
+class AtHarnessLanggraphWorkflow(BaseWorkflow):
+    """Runs the LangGraph agent each turn; its nodes run as Temporal activities."""
+
+    def __init__(self) -> None:
+        super().__init__(display_name=environment_variables.AGENT_NAME)
+        self._complete_task = False
+        self._messages: list[Any] = []
+        self._emitted = 0
+
+    @workflow.signal(name=SignalName.RECEIVE_EVENT)
+    async def on_task_event_send(self, params: SendEventParams) -> None:
+        """Echo the user's message, run the graph, surface the new messages."""
+        await adk.messages.create(task_id=params.task.id, content=params.event.content)
+        self._messages.append({"role": "user", "content": params.event.content.content})
+
+        compiled = lg_graph(GRAPH_NAME).compile()
+        result = await compiled.ainvoke({"messages": self._messages})
+        self._messages = result["messages"]
+
+        await emit_langgraph_messages(self._messages[self._emitted :], params.task.id)
+        self._emitted = len(self._messages)
+
+    @workflow.signal
+    async def complete_task_signal(self) -> None:
+        self._complete_task = True
+
+    @workflow.run
+    async def on_task_create(self, params: CreateTaskParams) -> str:
+        await adk.messages.create(
+            task_id=params.task.id,
+            content=TextContent(
+                author="agent",
+                content=(
+                    f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n\n"
+                    "Send me a message and I'll respond using a LangGraph agent whose nodes "
+                    "run as durable Temporal activities."
+                ),
+            ),
+        )
+        await workflow.wait_condition(lambda: self._complete_task, timeout=None)
+        return "Task completed"
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml b/examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml
new file mode 100644
index 000000000..897f54dd6
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml
@@ -0,0 +1,40 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "at-harness-langgraph"
+version = "0.1.0"
+description = "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "temporalio[langgraph]>=1.27.0",
+    "langchain-openai",
+    "langchain-core",
+    "grandalf",
+    "python-dotenv",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+    "debugpy>=1.8.15",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py
new file mode 100644
index 000000000..05d9ffa01
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py
@@ -0,0 +1,106 @@
+"""Integration tests for the Temporal harness LangGraph agent (live agent required).
+
+These drive a *running* agent over the AgentEx API and verify that:
+- the agent sends a welcome message on task creation,
+- a weather question triggers a tool_request / tool_response round-trip
+  (proving the LLM node ran as a Temporal activity and the tool node ran),
+- the final answer reflects the tool output.
+
+To run:
+1. Start the agent (worker + ACP server): ``agentex agents run --manifest manifest.yaml``
+2. Set AGENTEX_API_BASE_URL if not using the default
+3. ``pytest tests/test_agent.py -v``
+"""
+
+import os
+import uuid
+
+import pytest
+import pytest_asyncio
+from test_utils.async_utils import (
+    poll_messages,
+    send_event_and_poll_yielding,
+)
+
+from agentex import AsyncAgentex
+from agentex.types.task_message import TaskMessage
+from agentex.types.agent_rpc_params import ParamsCreateTaskRequest
+
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "at-harness-langgraph")
+
+
+@pytest_asyncio.fixture
+async def client():
+    client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL)
+    yield client
+    await client.close()
+
+
+@pytest.fixture
+def agent_name():
+    return AGENT_NAME
+
+
+@pytest_asyncio.fixture
+async def agent_id(client, agent_name):
+    agents = await client.agents.list()
+    for agent in agents:
+        if agent.name == agent_name:
+            return agent.id
+    raise ValueError(f"Agent with name {agent_name} not found.")
+
+
+class TestNonStreamingEvents:
+    """The Temporal-backed LangGraph agent responds and uses tools."""
+
+    @pytest.mark.asyncio
+    async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str):
+        """Create a task, ask about weather, verify the tool round-trip."""
+        task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex))
+        task = task_response.result
+        assert task is not None
+
+        task_creation_found = False
+        async for message in poll_messages(client=client, task_id=task.id, timeout=30, sleep_interval=1.0):
+            assert isinstance(message, TaskMessage)
+            if message.content and message.content.type == "text" and message.content.author == "agent":
+                task_creation_found = True
+                break
+        assert task_creation_found, "Task creation welcome message not found"
+
+        seen_tool_request = False
+        seen_tool_response = False
+        final_message = None
+        async for message in send_event_and_poll_yielding(
+            client=client,
+            agent_id=agent_id,
+            task_id=task.id,
+            user_message="What is the weather in San Francisco? Use your tool.",
+            timeout=60,
+            sleep_interval=1.0,
+        ):
+            assert isinstance(message, TaskMessage)
+
+            if message.content and message.content.type == "tool_request":
+                seen_tool_request = True
+            if message.content and message.content.type == "tool_response":
+                seen_tool_response = True
+
+            if message.content and message.content.type == "text" and message.content.author == "agent":
+                final_message = message
+                content_length = len(getattr(message.content, "content", "") or "")
+                if getattr(message, "streaming_status", None) in (None, "DONE") and content_length > 0:
+                    if seen_tool_response:
+                        break
+
+        assert seen_tool_request, "Expected a tool_request (agent calling get_weather)"
+        assert seen_tool_response, "Expected a tool_response (get_weather result)"
+        assert final_message is not None, "Expected a final agent text message"
+        final_text = getattr(final_message.content, "content", None) if final_message.content else None
+        assert isinstance(final_text, str) and len(final_text) > 0
+        assert "72" in final_text, "Expected weather response to mention 72°F"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/src/agentex/lib/adk/_modules/_langgraph_async.py b/src/agentex/lib/adk/_modules/_langgraph_async.py
index 3e61c42f9..02ef059eb 100644
--- a/src/agentex/lib/adk/_modules/_langgraph_async.py
+++ b/src/agentex/lib/adk/_modules/_langgraph_async.py
@@ -3,8 +3,21 @@
 Converts LangGraph graph.astream() events into Agentex streaming updates
 and pushes them to Redis via adk.streaming contexts. For use with async
 ACP agents that stream via Redis rather than HTTP yields.
+
+Unified surface
+---------------
+This module is now implemented on top of ``LangGraphTurn`` and
+``UnifiedEmitter.auto_send_turn``, the same surface used by every other
+harness adapter (pydantic-ai, openai-agents, etc.). The public signature
+and return type are preserved identically.
+
+AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` events
+(from "updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send``
+handles Full events correctly; no coalescing wrapper is needed.
 """
 
+from agentex.lib.utils.temporal import workflow_now_if_in_workflow
+
 
 async def stream_langgraph_events(stream, task_id: str) -> str:
     """Stream LangGraph events to Agentex via Redis.
@@ -18,6 +31,19 @@ async def stream_langgraph_events(stream, task_id: str) -> str:
     models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks
     in the Responses API responses/v1 format).
 
+    Reimplemented on ``UnifiedEmitter.auto_send_turn(LangGraphTurn(...))`` for
+    cross-harness consistency. Behavior is identical to the previous bespoke
+    implementation (verified by characterization tests in test_langgraph_async.py).
+
+    AGX1-377 note: LangGraph emits tool requests as ``Full`` events (from "updates"),
+    NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events
+    correctly; no coalescing wrapper is needed.
+
+    AGX1-378 note: ``created_at`` is set from ``workflow.now()`` when called inside a
+    Temporal workflow, matching the pattern used by the openai/litellm providers.
+    Outside a workflow (plain async activities, sync agents) it is ``None`` and the
+    server's wall clock is used.
+
     Args:
         stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"])
         task_id: The Agentex task ID to stream messages to.
@@ -25,178 +51,15 @@ async def stream_langgraph_events(stream, task_id: str) -> str:
     Returns:
         The accumulated final text output from the agent.
     """
-    # Lazy imports so langgraph/langchain aren't required at module load time
-    from langchain_core.messages import ToolMessage, AIMessageChunk
-
-    from agentex.lib import adk
-    from agentex.types.text_content import TextContent
-    from agentex.types.reasoning_content import ReasoningContent
-    from agentex.types.task_message_delta import TextDelta
-    from agentex.types.task_message_update import StreamTaskMessageDelta
-    from agentex.types.tool_request_content import ToolRequestContent
-    from agentex.types.tool_response_content import ToolResponseContent
-    from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta
-
-    text_context = None
-    reasoning_context = None
-    final_text = ""
-
-    try:
-        async for event_type, event_data in stream:
-            if event_type == "messages":
-                chunk, metadata = event_data
-
-                if not isinstance(chunk, AIMessageChunk) or not chunk.content:
-                    continue
-
-                # ----------------------------------------------------------
-                # Case 1: content is a plain string (regular models)
-                # ----------------------------------------------------------
-                if isinstance(chunk.content, str):
-                    if reasoning_context:
-                        await reasoning_context.close()
-                        reasoning_context = None
-
-                    if not text_context:
-                        final_text = ""
-                        text_context = await adk.streaming.streaming_task_message_context(
-                            task_id=task_id,
-                            initial_content=TextContent(
-                                author="agent",
-                                content="",
-                                format="markdown",
-                            ),
-                        ).__aenter__()
-
-                    final_text += chunk.content
-                    await text_context.stream_update(
-                        StreamTaskMessageDelta(
-                            parent_task_message=text_context.task_message,
-                            delta=TextDelta(type="text", text_delta=chunk.content),
-                            type="delta",
-                        )
-                    )
-
-                # ----------------------------------------------------------
-                # Case 2: content is a list of typed blocks (reasoning models)
-                # Responses API (responses/v1) format:
-                #   {"type": "reasoning", "summary": [{"type": "summary_text", "text": "..."}]}
-                #   {"type": "text", "text": "..."}
-                # ----------------------------------------------------------
-                elif isinstance(chunk.content, list):
-                    for block in chunk.content:
-                        if not isinstance(block, dict):
-                            continue
-
-                        block_type = block.get("type")
-
-                        if block_type == "reasoning":
-                            reasoning_text = ""
-                            for s in block.get("summary", []):
-                                if isinstance(s, dict) and s.get("type") == "summary_text":
-                                    reasoning_text += s.get("text", "")
-                            if not reasoning_text:
-                                continue
-
-                            if text_context:
-                                await text_context.close()
-                                text_context = None
-
-                            if not reasoning_context:
-                                reasoning_context = await adk.streaming.streaming_task_message_context(
-                                    task_id=task_id,
-                                    initial_content=ReasoningContent(
-                                        author="agent",
-                                        summary=[],
-                                        content=[],
-                                        type="reasoning",
-                                        style="active",
-                                    ),
-                                ).__aenter__()
-
-                            await reasoning_context.stream_update(
-                                StreamTaskMessageDelta(
-                                    parent_task_message=reasoning_context.task_message,
-                                    delta=ReasoningSummaryDelta(
-                                        type="reasoning_summary",
-                                        summary_index=0,
-                                        summary_delta=reasoning_text,
-                                    ),
-                                    type="delta",
-                                )
-                            )
-
-                        elif block_type == "text":
-                            text_delta = block.get("text", "")
-                            if not text_delta:
-                                continue
-
-                            if reasoning_context:
-                                await reasoning_context.close()
-                                reasoning_context = None
-
-                            if not text_context:
-                                final_text = ""
-                                text_context = await adk.streaming.streaming_task_message_context(
-                                    task_id=task_id,
-                                    initial_content=TextContent(
-                                        author="agent",
-                                        content="",
-                                        format="markdown",
-                                    ),
-                                ).__aenter__()
-
-                            final_text += text_delta
-                            await text_context.stream_update(
-                                StreamTaskMessageDelta(
-                                    parent_task_message=text_context.task_message,
-                                    delta=TextDelta(type="text", text_delta=text_delta),
-                                    type="delta",
-                                )
-                            )
-
-            elif event_type == "updates":
-                for node_name, state_update in event_data.items():
-                    if node_name == "agent":
-                        messages = state_update.get("messages", [])
-                        for msg in messages:
-                            if text_context:
-                                await text_context.close()
-                                text_context = None
-                            if reasoning_context:
-                                await reasoning_context.close()
-                                reasoning_context = None
-
-                            if hasattr(msg, "tool_calls") and msg.tool_calls:
-                                for tc in msg.tool_calls:
-                                    await adk.messages.create(
-                                        task_id=task_id,
-                                        content=ToolRequestContent(
-                                            tool_call_id=tc["id"],
-                                            name=tc["name"],
-                                            arguments=tc["args"],
-                                            author="agent",
-                                        ),
-                                    )
-
-                    elif node_name == "tools":
-                        messages = state_update.get("messages", [])
-                        for msg in messages:
-                            if isinstance(msg, ToolMessage):
-                                await adk.messages.create(
-                                    task_id=task_id,
-                                    content=ToolResponseContent(
-                                        tool_call_id=msg.tool_call_id,
-                                        name=msg.name or "unknown",
-                                        content=msg.content if isinstance(msg.content, str) else str(msg.content),
-                                        author="agent",
-                                    ),
-                                )
-    finally:
-        # Always close open contexts
-        if text_context:
-            await text_context.close()
-        if reasoning_context:
-            await reasoning_context.close()
-
-    return final_text
+    from agentex.lib.core.harness.emitter import UnifiedEmitter
+    from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+    # AGX1-377 note: LangGraph emits tool requests as Full events (from "updates"),
+    # NOT Start+Delta+Done like pydantic-ai. auto_send handles Full events correctly;
+    # no coalescing wrapper is needed.
+    # AGX1-378: stamp messages with workflow.now() inside Temporal for deterministic
+    # created_at ordering; falls back to None (server wall clock) outside a workflow.
+    turn = LangGraphTurn(stream, model=None)
+    emitter = UnifiedEmitter(task_id=task_id, trace_id=None, parent_span_id=None)
+    result = await emitter.auto_send_turn(turn, created_at=workflow_now_if_in_workflow())
+    return result.final_text
diff --git a/src/agentex/lib/adk/_modules/_langgraph_sync.py b/src/agentex/lib/adk/_modules/_langgraph_sync.py
index 6d4ce715f..48231a87d 100644
--- a/src/agentex/lib/adk/_modules/_langgraph_sync.py
+++ b/src/agentex/lib/adk/_modules/_langgraph_sync.py
@@ -3,10 +3,36 @@
 Converts LangGraph graph.astream() events into Agentex TaskMessageUpdate
 events that are yielded back over the HTTP response. For use with sync ACP
 agents that stream via HTTP yields rather than Redis.
+
+Unified sync path
+-----------------
+Prefer using ``LangGraphTurn`` with ``UnifiedEmitter.yield_turn`` for new
+agents, which adds usage capture and optional tracing via the shared harness
+surface::
+
+    from agentex.lib.core.harness.emitter import UnifiedEmitter
+    from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+    turn = LangGraphTurn(stream)
+    emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=span_id)
+    async for event in emitter.yield_turn(turn):
+        yield event
+
+``convert_langgraph_to_agentex_events`` remains available as a lower-level
+primitive (e.g. for callers that need the raw event stream without the
+harness envelope).
 """
 
+from __future__ import annotations
+
+from typing import Any, Callable, Optional
+from collections.abc import AsyncGenerator
+
 
-async def convert_langgraph_to_agentex_events(stream):
+async def convert_langgraph_to_agentex_events(
+    stream: Any,
+    on_final_ai_message: Optional[Callable[..., None]] = None,
+) -> AsyncGenerator[Any, None]:
     """Convert LangGraph streaming events to Agentex TaskMessageUpdate events.
 
     Expects the stream from graph.astream() called with
@@ -22,8 +48,17 @@ async def convert_langgraph_to_agentex_events(stream):
     Supports both regular models (chunk.content is a str) and reasoning models
     like gpt-5/o1/o3 (chunk.content is a list of typed content blocks).
 
+    AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` (from
+    "updates" events), NOT Start+Delta+Done like pydantic-ai. No coalesce_tool_requests
+    option is needed for LangGraph.
+
     Args:
         stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"])
+        on_final_ai_message: Optional callback ``(msg: AIMessage) -> None`` called for
+            each ``AIMessage`` in an "agent" node update. Use this to capture
+            ``usage_metadata`` for token accounting without re-traversing the stream.
+            The callback fires *after* all events for that message are yielded.
+            No-op when ``None`` (default).
 
     Yields:
         TaskMessageUpdate events (Start, Delta, Done, Full)
@@ -32,6 +67,7 @@ async def convert_langgraph_to_agentex_events(stream):
     from langchain_core.messages import ToolMessage, AIMessageChunk
 
     from agentex.types.text_content import TextContent
+    from agentex.types.reasoning_content import ReasoningContent
     from agentex.types.task_message_delta import TextDelta
     from agentex.types.task_message_update import (
         StreamTaskMessageDone,
@@ -113,7 +149,9 @@ async def convert_langgraph_to_agentex_events(stream):
                             yield StreamTaskMessageStart(
                                 type="start",
                                 index=message_index,
-                                content=TextContent(type="text", author="agent", content=""),
+                                content=ReasoningContent(
+                                    type="reasoning", author="agent", summary=[], content=[], style="active"
+                                ),
                             )
                             reasoning_streaming = True
                             reasoning_content_index = 0
@@ -205,6 +243,13 @@ async def convert_langgraph_to_agentex_events(stream):
                                 )
                                 message_index += 1
 
+                        # Notify caller of the final AIMessage (e.g. for usage capture)
+                        if on_final_ai_message is not None:
+                            from langchain_core.messages import AIMessage as _AIMessage
+
+                            if isinstance(msg, _AIMessage):
+                                on_final_ai_message(msg)
+
                 elif node_name == "tools":
                     messages = state_update.get("messages", [])
                     for msg in messages:
diff --git a/src/agentex/lib/adk/_modules/_langgraph_tracing.py b/src/agentex/lib/adk/_modules/_langgraph_tracing.py
index 74b8dcb57..2162201e1 100644
--- a/src/agentex/lib/adk/_modules/_langgraph_tracing.py
+++ b/src/agentex/lib/adk/_modules/_langgraph_tracing.py
@@ -1,4 +1,14 @@
-"""LangChain callback handler that creates Agentex spans for LLM calls and tool executions."""
+"""LangChain callback handler that creates Agentex spans for LLM calls and tool executions.
+
+.. deprecated::
+    ``AgentexLangGraphTracingHandler`` and ``create_langgraph_tracing_handler`` are
+    superseded by the unified harness surface (``LangGraphTurn`` +
+    ``UnifiedEmitter``), which derives spans automatically from the canonical
+    event stream without requiring a LangChain callback handler.
+
+    They remain importable and functional for backward compatibility, but new
+    agents should use the unified path instead.
+"""
 # ruff: noqa: ARG002
 # Callback methods must accept all arguments defined by LangChain's AsyncCallbackHandler interface.
 
@@ -31,6 +41,11 @@ class AgentexLangGraphTracingHandler(AsyncCallbackHandler):
           ├── llm:<model>       (LLM call)
           ├── tool:<tool_name>  (tool execution)
           └── llm:<model>       (LLM call)
+
+    .. deprecated::
+        Use ``LangGraphTurn`` with ``UnifiedEmitter`` instead. The unified
+        harness derives equivalent spans from the canonical event stream,
+        removing the need for a LangChain callback handler entirely.
     """
 
     def __init__(
@@ -237,6 +252,20 @@ def create_langgraph_tracing_handler(
 
     Returns:
         An ``AgentexLangGraphTracingHandler`` instance ready to use as a LangChain callback.
+
+    .. deprecated::
+        Use ``LangGraphTurn`` with ``UnifiedEmitter`` instead. The unified harness
+        derives equivalent spans from the canonical event stream automatically, with
+        no LangChain callback required::
+
+            from agentex.lib.core.harness.emitter import UnifiedEmitter
+            from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+            turn = LangGraphTurn(stream)
+            emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=span_id)
+            result = await emitter.auto_send_turn(turn)
+
+        This function remains available for backward compatibility.
     """
     return AgentexLangGraphTracingHandler(
         trace_id=trace_id,
diff --git a/src/agentex/lib/adk/_modules/_langgraph_turn.py b/src/agentex/lib/adk/_modules/_langgraph_turn.py
new file mode 100644
index 000000000..da8ff0e7c
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_langgraph_turn.py
@@ -0,0 +1,152 @@
+"""HarnessTurn adapter for LangGraph astream() event streams.
+
+Provides ``LangGraphTurn`` (a ``HarnessTurn`` implementation) and the
+``langgraph_usage_to_turn_usage`` helper that maps LangGraph's
+``AIMessage.usage_metadata`` onto the framework-agnostic ``TurnUsage`` model.
+
+AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` events
+(from "updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send``
+handles Full events correctly; no coalescing wrapper is needed.
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+from collections.abc import AsyncGenerator
+
+from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage
+from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events
+
+
+def langgraph_usage_to_turn_usage(usage_metadata: Any, model: str | None) -> TurnUsage:
+    """Map LangGraph ``AIMessage.usage_metadata`` onto ``TurnUsage``.
+
+    ``usage_metadata`` may be ``None`` (model doesn't report usage).
+    Real zero token counts (e.g. 0 output tokens) are preserved as 0, NOT
+    coerced to ``None``.
+
+    Mapping::
+
+        input_tokens                       -> input_tokens
+        output_tokens                      -> output_tokens
+        total_tokens                       -> total_tokens
+        input_token_details.cache_read     -> cached_input_tokens
+        output_token_details.reasoning     -> reasoning_tokens
+
+    Args:
+        usage_metadata: The ``usage_metadata`` dict from an ``AIMessage``,
+            or ``None`` if the model did not report usage.
+        model: The model name string to attach to the ``TurnUsage``, or ``None``.
+
+    Returns:
+        A populated ``TurnUsage`` instance.
+    """
+    if usage_metadata is None:
+        return TurnUsage(model=model)
+
+    raw_input = (usage_metadata or {}).get("input_tokens")
+    raw_output = (usage_metadata or {}).get("output_tokens")
+    raw_total = (usage_metadata or {}).get("total_tokens")
+    input_details = (usage_metadata or {}).get("input_token_details") or {}
+    output_details = (usage_metadata or {}).get("output_token_details") or {}
+    raw_cache_read = input_details.get("cache_read")
+    raw_reasoning = output_details.get("reasoning")
+
+    return TurnUsage(
+        model=model,
+        input_tokens=raw_input,
+        output_tokens=raw_output,
+        total_tokens=raw_total,
+        cached_input_tokens=raw_cache_read,
+        reasoning_tokens=raw_reasoning,
+    )
+
+
+def _add_optional(a: int | None, b: int | None) -> int | None:
+    """Sum two optional token counts; ``None`` means 'not reported' on that side.
+
+    ``None + None`` stays ``None`` (model never reported usage), while a real 0
+    contributes 0 (preserving zero counts rather than coercing them away).
+    """
+    if a is None and b is None:
+        return None
+    return (a or 0) + (b or 0)
+
+
+def _accumulate_turn_usage(acc: TurnUsage, call: TurnUsage, model: str | None) -> TurnUsage:
+    """Add a single LLM call's usage into the running per-turn total.
+
+    A LangGraph turn can make multiple LLM calls (e.g. text -> tool decision ->
+    final text); summing them avoids silently dropping all but the last call.
+    """
+    return TurnUsage(
+        model=model,
+        input_tokens=_add_optional(acc.input_tokens, call.input_tokens),
+        output_tokens=_add_optional(acc.output_tokens, call.output_tokens),
+        total_tokens=_add_optional(acc.total_tokens, call.total_tokens),
+        cached_input_tokens=_add_optional(acc.cached_input_tokens, call.cached_input_tokens),
+        reasoning_tokens=_add_optional(acc.reasoning_tokens, call.reasoning_tokens),
+    )
+
+
+class LangGraphTurn:
+    """HarnessTurn wrapping a LangGraph ``astream()`` event stream.
+
+    Implements the ``HarnessTurn`` Protocol so it can be passed to either
+    ``UnifiedEmitter.yield_turn`` (sync HTTP ACP) or
+    ``UnifiedEmitter.auto_send_turn`` (async / temporal).
+
+    Usage::
+
+        stream = graph.astream(
+            {"messages": [{"role": "user", "content": user_message}]},
+            stream_mode=["messages", "updates"],
+        )
+        turn = LangGraphTurn(stream, model=model_name)
+
+        # Sync HTTP ACP
+        async for event in emitter.yield_turn(turn):
+            yield event
+
+        # Async / temporal
+        result = await emitter.auto_send_turn(turn)
+
+    AGX1-377 note: LangGraph tool requests are ``StreamTaskMessageFull`` (from
+    "updates"), NOT Start+Delta+Done like pydantic-ai. No ``coalesce_tool_requests``
+    option is needed.
+
+    Usage data is captured lazily via the ``on_final_ai_message`` callback and
+    is only valid after ``events`` has been fully consumed. Multi-step turns
+    (more than one LLM call) accumulate usage additively across calls.
+    """
+
+    def __init__(self, stream: Any, model: str | None = None) -> None:
+        self._stream = stream
+        self._model = model
+        self._usage: TurnUsage = TurnUsage(model=model)
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]:
+        return self._generate_events()
+
+    async def _generate_events(self) -> AsyncGenerator[StreamTaskMessage, None]:
+        def _capture(ai_msg: Any) -> None:
+            usage_metadata = getattr(ai_msg, "usage_metadata", None)
+            if usage_metadata is not None:
+                call_usage = langgraph_usage_to_turn_usage(usage_metadata, self._model)
+                # Accumulate across LLM calls — the callback fires once per agent
+                # node invocation, so a multi-step turn reports usage more than
+                # once; overwriting would drop all but the last call.
+                self._usage = _accumulate_turn_usage(self._usage, call_usage, self._model)
+
+        async for ev in convert_langgraph_to_agentex_events(self._stream, on_final_ai_message=_capture):
+            yield ev
+
+    def usage(self) -> TurnUsage:
+        """Return the usage accumulated across all AIMessages in the stream.
+
+        Multi-step turns sum each LLM call's usage. Valid only after ``events``
+        has been fully consumed. Returns a zero-usage ``TurnUsage`` if the model
+        did not report usage.
+        """
+        return self._usage
diff --git a/tests/lib/adk/providers/test_openai_turn.py b/tests/lib/adk/providers/test_openai_turn.py
index 023b0ed4e..47a9ba9fe 100644
--- a/tests/lib/adk/providers/test_openai_turn.py
+++ b/tests/lib/adk/providers/test_openai_turn.py
@@ -65,7 +65,9 @@ def test_usage_mapping_none_usage():
     turn_usage = openai_usage_to_turn_usage(None, model="gpt-4o")
 
     assert turn_usage.model == "gpt-4o"
-    assert turn_usage.num_llm_calls == 0
+    # num_llm_calls is None ("not reported") when no usage is present, matching
+    # the token fields below; a real 0 is only reported when the provider says so.
+    assert turn_usage.num_llm_calls is None
     assert turn_usage.input_tokens is None
     assert turn_usage.output_tokens is None
     assert turn_usage.total_tokens is None
diff --git a/tests/lib/adk/test_langgraph_async.py b/tests/lib/adk/test_langgraph_async.py
new file mode 100644
index 000000000..682bd43bc
--- /dev/null
+++ b/tests/lib/adk/test_langgraph_async.py
@@ -0,0 +1,282 @@
+"""Characterization tests for stream_langgraph_events (unified surface).
+
+These tests verify the behavior of ``stream_langgraph_events`` after it was
+reimplemented on top of ``LangGraphTurn`` + ``UnifiedEmitter.auto_send_turn``
+(Task 4). They serve as a contract test for the public signature.
+
+Key behavioral notes (unified surface vs. old bespoke implementation):
+- Tool calls/responses are posted via ``streaming_task_message_context`` (not
+  ``adk.messages.create``); they appear as contexts with no stream_update calls.
+- ``final_text`` accumulates ALL text across the turn (the old bespoke impl
+  only returned the last text segment — behavior varied across models).
+
+NOTE: langchain_core imports are deferred to test scope because conftest.py
+stubs ``langchain_core.messages`` with MagicMock.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+from dataclasses import field, dataclass
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import StreamTaskMessageDelta
+from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events
+
+TASK_ID = "task-test"
+
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming infrastructure (mirrors test_pydantic_ai_async.py pattern)
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class FakeContext:
+    initial_content: Any
+    task_message: TaskMessage
+    closed: bool = False
+    updates: list[StreamTaskMessageDelta] = field(default_factory=list)
+
+    async def __aenter__(self) -> "FakeContext":
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
+        await self.close()
+        return False
+
+    async def stream_update(self, update: StreamTaskMessageDelta) -> None:
+        if self.closed:
+            raise AssertionError("stream_update called after close")
+        self.updates.append(update)
+
+    async def close(self) -> None:
+        self.closed = True
+
+
+class FakeStreamingModule:
+    def __init__(self) -> None:
+        self.contexts: list[FakeContext] = []
+
+    def streaming_task_message_context(self, *, task_id: str, initial_content: Any, **kw: Any) -> FakeContext:
+        tm = TaskMessage(
+            id=f"m{len(self.contexts) + 1}",
+            task_id=task_id,
+            content=initial_content,
+            streaming_status="IN_PROGRESS",
+        )
+        ctx = FakeContext(initial_content=initial_content, task_message=tm)
+        self.contexts.append(ctx)
+        return ctx
+
+
+class FakeMessagesModule:
+    def __init__(self) -> None:
+        self.created: list[dict[str, Any]] = []
+
+    async def create(self, *, task_id: str, content: Any) -> TaskMessage:
+        self.created.append({"task_id": task_id, "content": content})
+        return TaskMessage(
+            id=f"created-{len(self.created)}",
+            task_id=task_id,
+            content=content,
+            streaming_status="DONE",
+        )
+
+
+@pytest.fixture
+def fake_adk(monkeypatch):
+    from agentex.lib import adk as adk_module
+
+    streaming = FakeStreamingModule()
+    messages = FakeMessagesModule()
+    monkeypatch.setattr(adk_module, "streaming", streaming)
+    monkeypatch.setattr(adk_module, "messages", messages)
+    return streaming, messages
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+def _text_deltas(ctx: FakeContext) -> list[str]:
+    out: list[str] = []
+    for u in ctx.updates:
+        if isinstance(u.delta, TextDelta):
+            out.append(u.delta.text_delta or "")
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Characterization tests (unified surface behavior)
+# ---------------------------------------------------------------------------
+
+
+class TestCharacterization:
+    async def test_plain_text_streams_and_returns_final_text(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        streaming, messages = fake_adk
+        chunk = AIMessageChunk(content="Hello, world!")
+        ai_msg = AIMessage(content="Hello, world!")
+        stream = _make_stream(
+            [
+                ("messages", (chunk, {})),
+                ("updates", {"agent": {"messages": [ai_msg]}}),
+            ]
+        )
+
+        final = await stream_langgraph_events(stream, TASK_ID)
+
+        assert final == "Hello, world!"
+        assert len(streaming.contexts) == 1
+        ctx = streaming.contexts[0]
+        assert isinstance(ctx.initial_content, TextContent)
+        assert _text_deltas(ctx) == ["Hello, world!"]
+        assert ctx.closed is True
+        # Unified surface: no messages.create for text
+        assert messages.created == []
+
+    async def test_empty_stream_returns_empty_string(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        streaming, _ = fake_adk
+        final = await stream_langgraph_events(_make_stream([]), TASK_ID)
+        assert final == ""
+        assert streaming.contexts == []
+
+    async def test_tool_call_posted_via_streaming_context(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        """Unified surface: tool calls go through streaming_task_message_context,
+        not adk.messages.create. The context is opened and immediately closed
+        (no deltas) so the initial_content is the tool request."""
+        from langchain_core.messages import AIMessage
+
+        streaming, messages = fake_adk
+        tc = {"id": "call_1", "name": "get_weather", "args": {"city": "Paris"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        stream = _make_stream([("updates", {"agent": {"messages": [ai_msg]}})])
+
+        await stream_langgraph_events(stream, TASK_ID)
+
+        # Unified surface: tool messages go via streaming_task_message_context
+        assert len(streaming.contexts) == 1
+        assert messages.created == [], "Unified surface uses streaming_task_message_context, not messages.create"
+
+        from agentex.types.tool_request_content import ToolRequestContent
+
+        content = streaming.contexts[0].initial_content
+        assert isinstance(content, ToolRequestContent)
+        assert content.tool_call_id == "call_1"
+        assert content.name == "get_weather"
+        assert content.arguments == {"city": "Paris"}
+        # Full messages close immediately (no delta updates)
+        assert streaming.contexts[0].closed is True
+        assert streaming.contexts[0].updates == []
+
+    async def test_tool_response_posted_via_streaming_context(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        """Unified surface: tool responses go through streaming_task_message_context."""
+        from langchain_core.messages import ToolMessage
+
+        streaming, messages = fake_adk
+        tool_msg = ToolMessage(content="Sunny, 72F", tool_call_id="call_1", name="get_weather")
+        stream = _make_stream([("updates", {"tools": {"messages": [tool_msg]}})])
+
+        await stream_langgraph_events(stream, TASK_ID)
+
+        assert len(streaming.contexts) == 1
+        assert messages.created == []
+
+        from agentex.types.tool_response_content import ToolResponseContent
+
+        content = streaming.contexts[0].initial_content
+        assert isinstance(content, ToolResponseContent)
+        assert content.tool_call_id == "call_1"
+        assert content.name == "get_weather"
+        assert content.content == "Sunny, 72F"
+        assert streaming.contexts[0].closed is True
+
+    async def test_multi_step_text_then_tool_then_text_last_segment(
+        self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
+    ) -> None:
+        """Unified surface: final_text uses last-segment semantics.
+
+        auto_send resets final_text_parts when a new Start(TextContent) is seen,
+        so multi-step turns (text -> tool -> text) return only the LAST text segment.
+        Both text contexts are still opened and streamed to Redis; only the
+        return value is last-segment. This matches stream_pydantic_ai_events.
+        """
+        from langchain_core.messages import AIMessage, ToolMessage, AIMessageChunk
+
+        streaming, messages = fake_adk
+        chunk1 = AIMessageChunk(content="Looking up...")
+        ai_msg1 = AIMessage(content="Looking up...", tool_calls=[{"id": "c1", "name": "search", "args": {}}])
+        tool_msg = ToolMessage(content="result", tool_call_id="c1", name="search")
+        chunk2 = AIMessageChunk(content="Found it!")
+        ai_msg2 = AIMessage(content="Found it!")
+
+        stream = _make_stream(
+            [
+                ("messages", (chunk1, {})),
+                ("updates", {"agent": {"messages": [ai_msg1]}}),
+                ("updates", {"tools": {"messages": [tool_msg]}}),
+                ("messages", (chunk2, {})),
+                ("updates", {"agent": {"messages": [ai_msg2]}}),
+            ]
+        )
+
+        final = await stream_langgraph_events(stream, TASK_ID)
+
+        # Last segment only — first text segment is NOT in final_text
+        assert final == "Found it!"
+        # Two text streaming contexts (one per text segment) — both streamed to Redis
+        text_ctxs = [c for c in streaming.contexts if isinstance(c.initial_content, TextContent)]
+        assert len(text_ctxs) == 2
+        assert all(ctx.closed for ctx in text_ctxs)
+        # Tool request + tool response via streaming_task_message_context (not messages.create)
+        assert messages.created == []
+
+    async def test_context_closed_on_exception(self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]) -> None:
+        from langchain_core.messages import AIMessageChunk
+
+        streaming, _ = fake_adk
+
+        async def _boom():
+            chunk = AIMessageChunk(content="partial")
+            yield ("messages", (chunk, {}))
+            raise RuntimeError("upstream exploded")
+
+        with pytest.raises(RuntimeError, match="upstream exploded"):
+            await stream_langgraph_events(_boom(), TASK_ID)
+
+        assert streaming.contexts[0].closed is True
diff --git a/tests/lib/adk/test_langgraph_sync.py b/tests/lib/adk/test_langgraph_sync.py
new file mode 100644
index 000000000..248d18f68
--- /dev/null
+++ b/tests/lib/adk/test_langgraph_sync.py
@@ -0,0 +1,247 @@
+"""Tests for the sync LangGraph -> Agentex stream event converter.
+
+Covers:
+- Basic text, tool call, and tool response emission
+- on_final_ai_message callback for usage capture
+- create_langgraph_tracing_handler symbol is importable and functional
+  (runtime DeprecationWarning removed; deprecation is docstring-only)
+
+NOTE: langchain_core imports must be deferred to test-function scope because
+conftest.py stubs out ``langchain_core.messages`` with MagicMock for ADK
+package-level tests. The real classes are imported lazily inside each test.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any, AsyncIterator
+
+import pytest
+
+from agentex.types.task_message_update import (
+    StreamTaskMessageFull,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _collect(stream: AsyncIterator[Any]) -> list[Any]:
+    return [e async for e in stream]
+
+
+def _make_stream(events: list[tuple[str, Any]]) -> AsyncIterator[tuple[str, Any]]:
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+# ---------------------------------------------------------------------------
+# Remove the conftest stubs for langchain_core so real classes are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    """Remove conftest MagicMock stubs so real langchain_core types are used."""
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    # Re-import the real modules
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    # Restore stubs after the test
+    sys.modules.update(saved)
+
+
+class TestTextStreaming:
+    async def test_plain_text_emits_start_delta_done(self):
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="Hello, world!")
+        events = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [AIMessage(content="Hello, world!")]}}),
+        ]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        types = [type(e).__name__ for e in out]
+        assert "StreamTaskMessageStart" in types
+        assert "StreamTaskMessageDelta" in types
+        assert "StreamTaskMessageDone" in types
+
+    async def test_empty_chunk_content_is_skipped(self):
+        from langchain_core.messages import AIMessageChunk
+
+        chunk = AIMessageChunk(content="")
+        events = [("messages", (chunk, {}))]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        assert out == []
+
+    async def test_reasoning_block_start_wraps_reasoning_content(self):
+        """A Responses-API reasoning block opens a Start wrapping ReasoningContent,
+        not TextContent (the deltas are ReasoningContentDelta)."""
+        from langchain_core.messages import AIMessageChunk
+
+        from agentex.types.reasoning_content import ReasoningContent
+        from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageStart
+        from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+        chunk = AIMessageChunk(
+            content=[{"type": "reasoning", "summary": [{"type": "summary_text", "text": "thinking hard"}]}]
+        )
+        events = [("messages", (chunk, {}))]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, ReasoningContent), "reasoning Start must wrap ReasoningContent"
+        # `style` must be a non-null MessageStyle: the AgentEx server's
+        # StreamTaskMessageStartEntity rejects `reasoning.style=None` (enum), which
+        # would kill the stream. Match the conformance fixture's canonical value.
+        assert starts[0].content.style == "active", "reasoning Start must set a non-null style ('active')"
+        # Pull content_delta inside the comprehension so the isinstance narrows the
+        # delta union (narrowing would not survive a later attribute access).
+        reasoning_delta_texts = [
+            e.delta.content_delta
+            for e in out
+            if isinstance(e, StreamTaskMessageDelta) and isinstance(e.delta, ReasoningContentDelta)
+        ]
+        assert reasoning_delta_texts == ["thinking hard"]
+
+
+class TestToolCallEmission:
+    async def test_tool_call_emits_full_message(self):
+        from langchain_core.messages import AIMessage
+
+        tc = {"id": "call_1", "name": "get_weather", "args": {"city": "Paris"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        assert len(out) == 1
+        assert isinstance(out[0], StreamTaskMessageFull)
+        content = out[0].content
+        assert isinstance(content, ToolRequestContent)
+        assert content.tool_call_id == "call_1"
+        assert content.name == "get_weather"
+        assert content.arguments == {"city": "Paris"}
+        assert content.author == "agent"
+
+    async def test_tool_response_emits_full_message(self):
+        from langchain_core.messages import ToolMessage
+
+        tool_msg = ToolMessage(content="Sunny, 72F", tool_call_id="call_1", name="get_weather")
+        events = [("updates", {"tools": {"messages": [tool_msg]}})]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        assert len(out) == 1
+        assert isinstance(out[0], StreamTaskMessageFull)
+        content = out[0].content
+        assert isinstance(content, ToolResponseContent)
+        assert content.tool_call_id == "call_1"
+        assert content.name == "get_weather"
+        assert content.content == "Sunny, 72F"
+        assert content.author == "agent"
+
+
+class TestOnFinalAiMessageCallback:
+    async def test_callback_called_for_ai_message_in_agent_node(self):
+        from langchain_core.messages import AIMessage
+
+        captured: list[Any] = []
+        ai_msg = AIMessage(content="Hello!")
+
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+        await _collect(convert_langgraph_to_agentex_events(_make_stream(events), on_final_ai_message=captured.append))
+        assert len(captured) == 1
+        assert captured[0] is ai_msg
+
+    async def test_callback_not_called_for_tool_messages(self):
+        from langchain_core.messages import ToolMessage
+
+        captured: list[Any] = []
+        tool_msg = ToolMessage(content="result", tool_call_id="c1", name="t")
+
+        events = [("updates", {"tools": {"messages": [tool_msg]}})]
+        await _collect(convert_langgraph_to_agentex_events(_make_stream(events), on_final_ai_message=captured.append))
+        assert captured == []
+
+    async def test_callback_receives_usage_metadata(self):
+        from langchain_core.messages import AIMessage
+
+        captured: list[Any] = []
+        usage = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        ai_msg = AIMessage(content="Answer.", usage_metadata=usage)
+
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+        await _collect(convert_langgraph_to_agentex_events(_make_stream(events), on_final_ai_message=captured.append))
+        assert len(captured) == 1
+        assert captured[0].usage_metadata == usage
+
+    async def test_no_callback_is_noop(self):
+        from langchain_core.messages import AIMessage
+
+        ai_msg = AIMessage(content="Hello!")
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+        out = await _collect(convert_langgraph_to_agentex_events(_make_stream(events)))
+        assert isinstance(out, list)
+
+    async def test_callback_called_multiple_times_for_multi_step(self):
+        from langchain_core.messages import AIMessage
+
+        captured: list[Any] = []
+        ai_msg_1 = AIMessage(content="Step 1")
+        ai_msg_2 = AIMessage(content="Step 2")
+
+        events = [
+            ("updates", {"agent": {"messages": [ai_msg_1]}}),
+            ("updates", {"agent": {"messages": [ai_msg_2]}}),
+        ]
+        await _collect(convert_langgraph_to_agentex_events(_make_stream(events), on_final_ai_message=captured.append))
+        assert len(captured) == 2
+        assert captured[0] is ai_msg_1
+        assert captured[1] is ai_msg_2
+
+    async def test_callback_called_after_tool_call_events_yielded(self):
+        """The callback fires after all events for that AIMessage are yielded."""
+        from langchain_core.messages import AIMessage
+
+        yield_order: list[str] = []
+
+        async def _gen():
+            tc = {"id": "c1", "name": "t", "args": {}}
+            ai_msg = AIMessage(content="", tool_calls=[tc])
+            yield ("updates", {"agent": {"messages": [ai_msg]}})
+
+        def _cb(msg):
+            yield_order.append("callback")
+
+        async for _ in convert_langgraph_to_agentex_events(_gen(), on_final_ai_message=_cb):
+            yield_order.append("event")
+
+        # The tool call Full event is emitted before the callback fires
+        assert yield_order.index("event") < yield_order.index("callback")
+
+
+class TestLangGraphTracingHandlerBackwardCompat:
+    def test_create_langgraph_tracing_handler_no_runtime_warning(self):
+        """Deprecated symbol remains importable and emits no runtime DeprecationWarning.
+
+        The runtime warnings.warn was removed (docstring-only deprecation) to
+        align with PR 4/6 and avoid breaking callers under warnings-as-errors.
+        Using ``warnings.simplefilter("error", DeprecationWarning)`` verifies
+        that calling the function is safe under -W error conditions.
+        """
+        import warnings
+
+        from agentex.lib.adk._modules._langgraph_tracing import create_langgraph_tracing_handler
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("error", DeprecationWarning)
+            create_langgraph_tracing_handler(trace_id="t1", parent_span_id="p1")
+
+        assert w == [], "create_langgraph_tracing_handler must NOT emit a runtime DeprecationWarning"
diff --git a/tests/lib/adk/test_langgraph_sync_unified.py b/tests/lib/adk/test_langgraph_sync_unified.py
new file mode 100644
index 000000000..cfd522828
--- /dev/null
+++ b/tests/lib/adk/test_langgraph_sync_unified.py
@@ -0,0 +1,214 @@
+"""Unified sync path tests for LangGraphTurn + UnifiedEmitter.
+
+Verifies:
+1. Passthrough: events from emitter.yield_turn(LangGraphTurn(stream)) equal
+   LangGraphTurn(stream).events collected directly.
+2. Span derivation: with trace_id + fake tracer, tool spans are derived from
+   the event stream.
+
+NOTE: langchain_core imports are deferred to test scope because conftest.py
+stubs ``langchain_core.messages`` with MagicMock.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+from datetime import datetime, timezone
+from dataclasses import field, dataclass
+
+import pytest
+
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+# ---------------------------------------------------------------------------
+# Fake SpanTracer
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _FakeTracingBackend:
+    spans_started: list[dict[str, Any]] = field(default_factory=list)
+    spans_ended: list[str] = field(default_factory=list)
+
+    async def start_span(self, **kw) -> Any:
+        from agentex.types.span import Span
+
+        sp = Span(
+            id=f"span-{len(self.spans_started) + 1}",
+            trace_id=kw.get("trace_id", "trace1"),
+            name=kw.get("name", ""),
+            start_time=datetime.now(tz=timezone.utc),
+        )
+        self.spans_started.append(kw)
+        return sp
+
+    async def end_span(self, *, trace_id: str, span: Any) -> None:
+        self.spans_ended.append(span.id if span else "")
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestPassthrough:
+    async def test_yield_turn_events_equal_direct_events(self):
+        """Events from emitter.yield_turn(LangGraphTurn(stream)) must equal
+        LangGraphTurn(stream).events collected directly — the emitter must not
+        add, drop, or reorder events in yield mode."""
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="Hello!")
+        ai_msg = AIMessage(content="Hello!")
+
+        # Build two identical streams
+        events_raw = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+
+        # Direct collection
+        direct = [e async for e in LangGraphTurn(_make_stream(events_raw)).events]
+
+        # Via emitter.yield_turn
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        via_emitter = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
+
+        assert len(direct) == len(via_emitter), "yield_turn must not add or drop events relative to direct iteration"
+        for a, b in zip(direct, via_emitter, strict=True):
+            assert type(a) == type(b), f"Event type mismatch: {type(a).__name__} vs {type(b).__name__}"
+
+    async def test_yield_turn_passes_all_event_types(self):
+        """Start, Delta, Done, Full — each type is preserved."""
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="hi")
+        tc = {"id": "c1", "name": "t", "args": {}}
+        ai_msg = AIMessage(content="hi", tool_calls=[tc])
+
+        events_raw = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
+        types = {type(e).__name__ for e in out}
+        # text chunk emits Start + Delta
+        assert "StreamTaskMessageStart" in types
+        assert "StreamTaskMessageDelta" in types
+        # tool call emits Full
+        assert "StreamTaskMessageFull" in types
+
+    async def test_empty_stream_yields_no_events(self):
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream([])))]
+        assert out == []
+
+
+class TestSpanDerivation:
+    @pytest.fixture
+    def fake_tracer(self):
+        backend = _FakeTracingBackend()
+        tracer = SpanTracer(
+            trace_id="trace1",
+            parent_span_id=None,
+            task_id="t",
+            tracing=backend,  # type: ignore[arg-type]
+        )
+        return tracer, backend
+
+    async def test_tool_span_derived_from_full_events(self, fake_tracer):
+        """AGX1-377: SpanDeriver now handles Full tool events for LangGraph.
+
+        Full(ToolRequestContent) opens a tool span keyed by tool_call_id;
+        Full(ToolResponseContent) closes it. This bridges the previous gap where
+        LangGraph's Full-event path produced no spans, aligning it with
+        Start+Done harnesses (pydantic-ai, openai-agents).
+        """
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        tracer, backend = fake_tracer
+        tc = {"id": "c1", "name": "get_weather", "args": {"city": "Paris"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="Sunny", tool_call_id="c1", name="get_weather")
+
+        events_raw = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+        ]
+
+        emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer)
+        _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
+
+        assert len(backend.spans_started) == 1, "Full(ToolRequestContent) opens one tool span"
+        started = backend.spans_started[0]
+        assert started["name"] == "get_weather"
+        assert started["input"] == {"city": "Paris"}
+
+    async def test_no_spans_when_no_tool_calls(self, fake_tracer):
+        """yield_turn with tracer but no tool calls emits no spans."""
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        tracer, backend = fake_tracer
+        chunk = AIMessageChunk(content="Hello!")
+        ai_msg = AIMessage(content="Hello!")
+
+        events_raw = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+
+        emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer)
+        _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
+
+        assert backend.spans_started == [], "No tool spans when there are no tool calls"
+
+    async def test_tracer_none_means_no_spans(self):
+        """With tracer=False, no spans should be emitted."""
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        tc = {"id": "c1", "name": "t", "args": {}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t")
+
+        events_raw = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+        ]
+
+        emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=False)
+        _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
+        # No assertion on spans since tracer=False means emitter.tracer is None
+        assert emitter.tracer is None
diff --git a/tests/lib/adk/test_langgraph_turn.py b/tests/lib/adk/test_langgraph_turn.py
new file mode 100644
index 000000000..23aa34ba3
--- /dev/null
+++ b/tests/lib/adk/test_langgraph_turn.py
@@ -0,0 +1,265 @@
+"""Tests for LangGraphTurn and langgraph_usage_to_turn_usage."""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+
+import pytest
+
+from agentex.lib.core.harness.types import TurnUsage
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn, langgraph_usage_to_turn_usage
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+async def _drain(turn: LangGraphTurn) -> list[Any]:
+    return [e async for e in turn.events]
+
+
+# ---------------------------------------------------------------------------
+# langgraph_usage_to_turn_usage
+# ---------------------------------------------------------------------------
+
+
+class TestLangGraphUsageToTurnUsage:
+    def test_none_usage_returns_empty_turn_usage(self):
+        result = langgraph_usage_to_turn_usage(None, model="gpt-4")
+        assert result == TurnUsage(model="gpt-4")
+
+    def test_basic_token_fields_mapped(self):
+        usage = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        result = langgraph_usage_to_turn_usage(usage, model="gpt-4")
+        assert result.input_tokens == 10
+        assert result.output_tokens == 5
+        assert result.total_tokens == 15
+        assert result.model == "gpt-4"
+
+    def test_zero_output_tokens_preserved_not_coerced_to_none(self):
+        """Real zero counts must be preserved as 0, not None."""
+        usage = {"input_tokens": 10, "output_tokens": 0, "total_tokens": 10}
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.output_tokens == 0
+
+    def test_cache_read_mapped_to_cached_input_tokens(self):
+        usage = {
+            "input_tokens": 20,
+            "output_tokens": 5,
+            "total_tokens": 25,
+            "input_token_details": {"cache_read": 8},
+        }
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.cached_input_tokens == 8
+
+    def test_reasoning_mapped_to_reasoning_tokens(self):
+        usage = {
+            "input_tokens": 10,
+            "output_tokens": 15,
+            "total_tokens": 25,
+            "output_token_details": {"reasoning": 6},
+        }
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.reasoning_tokens == 6
+
+    def test_missing_optional_fields_are_none(self):
+        usage = {"input_tokens": 5, "output_tokens": 3, "total_tokens": 8}
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.cached_input_tokens is None
+        assert result.reasoning_tokens is None
+
+    def test_full_usage_object(self):
+        usage = {
+            "input_tokens": 100,
+            "output_tokens": 50,
+            "total_tokens": 150,
+            "input_token_details": {"cache_read": 30},
+            "output_token_details": {"reasoning": 20},
+        }
+        result = langgraph_usage_to_turn_usage(usage, model="claude-3-5-sonnet")
+        assert result == TurnUsage(
+            model="claude-3-5-sonnet",
+            input_tokens=100,
+            output_tokens=50,
+            total_tokens=150,
+            cached_input_tokens=30,
+            reasoning_tokens=20,
+        )
+
+    def test_model_none_is_preserved(self):
+        result = langgraph_usage_to_turn_usage({"input_tokens": 1}, model=None)
+        assert result.model is None
+
+    def test_empty_input_token_details_does_not_crash(self):
+        usage = {"input_tokens": 5, "input_token_details": {}}
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.cached_input_tokens is None
+
+    def test_empty_output_token_details_does_not_crash(self):
+        usage = {"output_tokens": 5, "output_token_details": {}}
+        result = langgraph_usage_to_turn_usage(usage, model=None)
+        assert result.reasoning_tokens is None
+
+
+# ---------------------------------------------------------------------------
+# LangGraphTurn
+# ---------------------------------------------------------------------------
+
+
+class TestLangGraphTurn:
+    async def test_events_yields_from_sync_converter(self):
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="Hello!")
+        ai_msg = AIMessage(content="Hello!")
+        stream = _make_stream(
+            [
+                ("messages", (chunk, {})),
+                ("updates", {"agent": {"messages": [ai_msg]}}),
+            ]
+        )
+        turn = LangGraphTurn(stream)
+        events = await _drain(turn)
+        assert len(events) > 0
+
+    async def test_usage_is_empty_before_stream_consumed(self):
+        turn = LangGraphTurn(_make_stream([]))
+        # usage() before events consumed should return a default TurnUsage
+        usage = turn.usage()
+        assert isinstance(usage, TurnUsage)
+
+    async def test_usage_captured_from_ai_message(self):
+        from langchain_core.messages import AIMessage
+
+        usage_meta = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        ai_msg = AIMessage(content="Hi!", usage_metadata=usage_meta)
+        stream = _make_stream([("updates", {"agent": {"messages": [ai_msg]}})])
+        turn = LangGraphTurn(stream, model="gpt-4")
+        await _drain(turn)
+
+        usage = turn.usage()
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 5
+        assert usage.total_tokens == 15
+        assert usage.model == "gpt-4"
+
+    async def test_usage_accumulates_across_multiple_ai_messages(self):
+        """A multi-step turn (>1 LLM call) sums usage instead of keeping only the last."""
+        from langchain_core.messages import AIMessage
+
+        first = AIMessage(
+            content="thinking",
+            usage_metadata={
+                "input_tokens": 10,
+                "output_tokens": 5,
+                "total_tokens": 15,
+                "input_token_details": {"cache_read": 2},
+                "output_token_details": {"reasoning": 1},
+            },
+        )
+        second = AIMessage(
+            content="answer",
+            usage_metadata={
+                "input_tokens": 20,
+                "output_tokens": 7,
+                "total_tokens": 27,
+                "input_token_details": {"cache_read": 3},
+                "output_token_details": {"reasoning": 4},
+            },
+        )
+        stream = _make_stream(
+            [
+                ("updates", {"agent": {"messages": [first]}}),
+                ("updates", {"agent": {"messages": [second]}}),
+            ]
+        )
+        turn = LangGraphTurn(stream, model="gpt-4")
+        await _drain(turn)
+
+        usage = turn.usage()
+        assert usage.input_tokens == 30
+        assert usage.output_tokens == 12
+        assert usage.total_tokens == 42
+        assert usage.cached_input_tokens == 5
+        assert usage.reasoning_tokens == 5
+        assert usage.model == "gpt-4"
+
+    async def test_usage_not_updated_when_no_usage_metadata(self):
+        from langchain_core.messages import AIMessage
+
+        ai_msg = AIMessage(content="Hi!")
+        stream = _make_stream([("updates", {"agent": {"messages": [ai_msg]}})])
+        turn = LangGraphTurn(stream, model="gpt-4")
+        await _drain(turn)
+
+        usage = turn.usage()
+        assert usage == TurnUsage(model="gpt-4")
+
+    async def test_usage_captures_cache_read_and_reasoning(self):
+        from langchain_core.messages import AIMessage
+
+        usage_meta = {
+            "input_tokens": 100,
+            "output_tokens": 50,
+            "total_tokens": 150,
+            "input_token_details": {"cache_read": 30},
+            "output_token_details": {"reasoning": 20},
+        }
+        ai_msg = AIMessage(content="Result", usage_metadata=usage_meta)
+        stream = _make_stream([("updates", {"agent": {"messages": [ai_msg]}})])
+        turn = LangGraphTurn(stream, model="claude-3-5-sonnet")
+        await _drain(turn)
+
+        usage = turn.usage()
+        assert usage.cached_input_tokens == 30
+        assert usage.reasoning_tokens == 20
+
+    async def test_harness_turn_protocol_conformance(self):
+        """LangGraphTurn satisfies the HarnessTurn Protocol."""
+        from agentex.lib.core.harness.types import HarnessTurn
+
+        turn = LangGraphTurn(_make_stream([]))
+        assert isinstance(turn, HarnessTurn), "LangGraphTurn must satisfy HarnessTurn Protocol"
+
+    async def test_empty_stream_yields_no_events(self):
+        turn = LangGraphTurn(_make_stream([]))
+        events = await _drain(turn)
+        assert events == []
+
+    async def test_model_none_default(self):
+        turn = LangGraphTurn(_make_stream([]))
+        assert turn.usage().model is None
+
+    async def test_model_passed_through_to_usage(self):
+        from langchain_core.messages import AIMessage
+
+        ai_msg = AIMessage(content="ok", usage_metadata={"input_tokens": 1, "output_tokens": 0, "total_tokens": 1})
+        stream = _make_stream([("updates", {"agent": {"messages": [ai_msg]}})])
+        turn = LangGraphTurn(stream, model="my-model")
+        await _drain(turn)
+        assert turn.usage().model == "my-model"
diff --git a/tests/lib/adk/test_pydantic_ai_turn.py b/tests/lib/adk/test_pydantic_ai_turn.py
index 0659895d3..46bf247a3 100644
--- a/tests/lib/adk/test_pydantic_ai_turn.py
+++ b/tests/lib/adk/test_pydantic_ai_turn.py
@@ -122,7 +122,7 @@ async def test_usage_before_exhaustion_returns_default(self):
         assert pre_usage.model == "openai:gpt-4o"
         assert pre_usage.input_tokens is None
         assert pre_usage.output_tokens is None
-        assert pre_usage.num_llm_calls == 0
+        assert pre_usage.num_llm_calls is None
 
     async def test_turn_events_and_usage(self):
         """Driving events to exhaustion populates usage from the terminal event."""
@@ -227,7 +227,7 @@ async def test_no_usage_event_leaves_default_usage(self):
         usage = turn.usage()
         assert usage.model == "openai:gpt-4o"
         assert usage.input_tokens is None
-        assert usage.num_llm_calls == 0
+        assert usage.num_llm_calls is None
 
 
 class TestToolRequestStreaming:
diff --git a/tests/lib/core/harness/conformance/test_langgraph_conformance.py b/tests/lib/core/harness/conformance/test_langgraph_conformance.py
new file mode 100644
index 000000000..721d6aac5
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_langgraph_conformance.py
@@ -0,0 +1,229 @@
+"""Cross-channel conformance fixtures for LangGraph harness tap.
+
+Each fixture is built as a canonical sequence of ``StreamTaskMessage*`` events
+that matches what ``convert_langgraph_to_agentex_events`` (via ``LangGraphTurn``)
+emits for the given scenario.  The fixtures are registered with the shared
+conformance runner and exercised by both the cross-channel equivalence test
+(yield_events vs auto_send) and the backward-compatible span-derivation test.
+
+LangGraph-specific note
+-----------------------
+LangGraph emits tool *requests* as ``StreamTaskMessageFull`` events (from the
+"updates" stream), NOT as Start+Delta+Done like pydantic-ai.  ``auto_send``
+handles Full events by opening a streaming context with the full content and
+closing it immediately, so both channels deliver the same logical payload.
+No ``coalesce_tool_requests`` option is needed.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agentex.types.text_content import TextContent
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+from .runner import Fixture, register, derive_all, run_cross_channel_conformance
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+_TEXT_ONLY = Fixture(
+    name="langgraph-text-only",
+    events=[
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="Hello from LangGraph!"),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+    ],
+)
+
+_SINGLE_TOOL = Fixture(
+    name="langgraph-single-tool",
+    events=[
+        # LangGraph tool request is a Full event (from "updates" stream)
+        StreamTaskMessageFull(
+            type="full",
+            index=0,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_1",
+                name="get_weather",
+                arguments={"city": "Paris"},
+            ),
+        ),
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="call_1",
+                name="get_weather",
+                content="Sunny, 72F",
+            ),
+        ),
+        StreamTaskMessageStart(
+            type="start",
+            index=2,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=2,
+            delta=TextDelta(type="text", text_delta="The weather in Paris is sunny, 72F."),
+        ),
+        StreamTaskMessageDone(type="done", index=2),
+    ],
+)
+
+_REASONING = Fixture(
+    name="langgraph-reasoning",
+    events=[
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=ReasoningContent(
+                type="reasoning",
+                author="agent",
+                summary=[],
+                content=[],
+                style="active",
+            ),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=ReasoningContentDelta(
+                type="reasoning_content",
+                content_index=0,
+                content_delta="Thinking about this...",
+            ),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        StreamTaskMessageStart(
+            type="start",
+            index=1,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=1,
+            delta=TextDelta(type="text", text_delta="The answer is 42."),
+        ),
+        StreamTaskMessageDone(type="done", index=1),
+    ],
+)
+
+_MULTI_STEP = Fixture(
+    name="langgraph-multi-step",
+    events=[
+        # Turn 1: streaming text
+        StreamTaskMessageStart(
+            type="start",
+            index=0,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=0,
+            delta=TextDelta(type="text", text_delta="Let me search for that."),
+        ),
+        StreamTaskMessageDone(type="done", index=0),
+        # Tool request (Full — from "updates" stream)
+        StreamTaskMessageFull(
+            type="full",
+            index=1,
+            content=ToolRequestContent(
+                type="tool_request",
+                author="agent",
+                tool_call_id="call_2",
+                name="search",
+                arguments={"query": "langgraph"},
+            ),
+        ),
+        StreamTaskMessageFull(
+            type="full",
+            index=2,
+            content=ToolResponseContent(
+                type="tool_response",
+                author="agent",
+                tool_call_id="call_2",
+                name="search",
+                content="LangGraph is a framework for...",
+            ),
+        ),
+        # Turn 2: final streaming text
+        StreamTaskMessageStart(
+            type="start",
+            index=3,
+            content=TextContent(type="text", author="agent", content=""),
+        ),
+        StreamTaskMessageDelta(
+            type="delta",
+            index=3,
+            delta=TextDelta(type="text", text_delta="Based on my research, LangGraph is..."),
+        ),
+        StreamTaskMessageDone(type="done", index=3),
+    ],
+)
+
+_LANGGRAPH_FIXTURES = [_TEXT_ONLY, _SINGLE_TOOL, _REASONING, _MULTI_STEP]
+
+for _fixture in _LANGGRAPH_FIXTURES:
+    register(_fixture)
+
+
+# ---------------------------------------------------------------------------
+# Cross-channel conformance: logical equivalence + span equivalence
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("fixture", _LANGGRAPH_FIXTURES, ids=lambda f: f.name)
+@pytest.mark.asyncio
+async def test_cross_channel_equivalence(fixture: Fixture) -> None:
+    """Assert that yield_events and auto_send produce equivalent logical
+    deliveries and identical span signals for each LangGraph fixture.
+
+    See runner.py for the full contract.  The key LangGraph difference: tool
+    requests arrive as Full events rather than Start+Delta+Done, so auto_send
+    handles them by opening a streaming context with the full content and
+    closing it immediately — both channels produce the same LogicalDelivery.
+    """
+    yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
+
+    assert yield_deliveries == auto_deliveries, (
+        f"[{fixture.name}] logical deliveries differ:\n  yield:     {yield_deliveries}\n  auto_send: {auto_deliveries}"
+    )
+    assert yield_spans == auto_spans, (
+        f"[{fixture.name}] span signals differ:\n  yield:     {yield_spans}\n  auto_send: {auto_spans}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Backward-compatible determinism guard
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("fixture", _LANGGRAPH_FIXTURES, ids=lambda f: f.name)
+def test_span_derivation_is_deterministic(fixture: Fixture) -> None:
+    """Span derivation over the same event list is idempotent."""
+    assert derive_all(fixture.events) == derive_all(fixture.events)
diff --git a/tests/lib/core/harness/test_harness_langgraph_async.py b/tests/lib/core/harness/test_harness_langgraph_async.py
new file mode 100644
index 000000000..39bf5bc66
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_langgraph_async.py
@@ -0,0 +1,298 @@
+"""Integration test: async (Redis-streaming) channel with a LangGraph agent.
+
+Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + LangGraphTurn)
+with a minimal fake LangGraph stream so the test runs fully offline (no API
+keys, no Redis, no Agentex server).
+
+Agent description
+-----------------
+A simulated single-tool agent run using hand-crafted LangGraph event tuples:
+one tool request + response, followed by a final text reply.
+
+What is tested
+--------------
+- The async handler pushes the correct sequence of messages to the fake streaming
+  backend: Full(ToolRequest) + Full(ToolResponse) + text Start/Delta/Done.
+- final_text accumulates all text (not just last segment — AGX1-377 unified behavior).
+- Tool messages go through streaming_task_message_context (not messages.create).
+- With a SpanTracer, no tool spans are produced (AGX1-377: Full events are not
+  handled by SpanDeriver today).
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Actual Redis streaming (requires a running Redis instance).
+- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle.
+- Real LLM calls or real LangGraph graph execution.
+- The full FastACP async request lifecycle.
+
+See also: test_harness_langgraph_sync.py and test_harness_langgraph_temporal.py
+for the other two channels.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+from dataclasses import field, dataclass
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.types import TurnResult
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend (replaces adk.streaming; no Redis required)
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _FakeCtx:
+    ctype: str
+    initial_content: Any
+    task_message: TaskMessage
+    closed: bool = False
+    deltas: list[Any] = field(default_factory=list)
+
+    async def __aenter__(self) -> "_FakeCtx":
+        return self
+
+    async def __aexit__(self, *args: Any) -> bool:
+        await self.close()
+        return False
+
+    async def close(self) -> None:
+        self.closed = True
+
+    async def stream_update(self, update: Any) -> Any:
+        self.deltas.append(update)
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self) -> None:
+        self.contexts: list[_FakeCtx] = []
+
+    def streaming_task_message_context(self, task_id: str, initial_content: Any, **kw: Any) -> _FakeCtx:
+        ctype = getattr(initial_content, "type", None) or ""
+        tm = TaskMessage(id=f"m{len(self.contexts) + 1}", task_id=task_id, content=initial_content)
+        ctx = _FakeCtx(ctype=ctype, initial_content=initial_content, task_message=tm)
+        self.contexts.append(ctx)
+        return ctx
+
+
+# ---------------------------------------------------------------------------
+# Fake tracing backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeSpan:
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self.output: Any = None
+
+
+class _FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, Any]] = []
+        self.ended: list[tuple[str, Any]] = []
+
+    async def start_span(self, *, trace_id: str, name: str, **kw: Any) -> _FakeSpan:
+        self.started.append((name, kw.get("parent_id")))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None:
+        self.ended.append((span.name, span.output))
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+async def _run_auto_send_turn(
+    stream_events: list[tuple[str, Any]],
+    trace_id: str | None = None,
+) -> tuple[TurnResult, _FakeStreaming, _FakeTracing | None]:
+    fake_streaming = _FakeStreaming()
+    fake_tracing = _FakeTracing() if trace_id else None
+
+    tracer: SpanTracer | bool = False
+    if trace_id and fake_tracing is not None:
+        tracer = SpanTracer(trace_id=trace_id, parent_span_id=None, task_id="task1", tracing=fake_tracing)
+
+    turn = LangGraphTurn(_make_stream(stream_events), model=None)
+    emitter = UnifiedEmitter(
+        task_id="task1",
+        trace_id=trace_id,
+        parent_span_id=None,
+        tracer=tracer,
+        streaming=fake_streaming,
+    )
+    result = await emitter.auto_send_turn(turn)
+    return result, fake_streaming, fake_tracing
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestAsyncAutoSendChannel:
+    async def test_text_only_streams_text_and_returns_final(self):
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="Hello from LangGraph!")
+        ai_msg = AIMessage(content="Hello from LangGraph!")
+        events = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+        result, fake_streaming, _ = await _run_auto_send_turn(events)
+
+        assert result.final_text == "Hello from LangGraph!"
+        text_ctxs = [c for c in fake_streaming.contexts if c.ctype == "text"]
+        assert len(text_ctxs) == 1
+        assert text_ctxs[0].closed is True
+
+    async def test_tool_call_posted_via_streaming_context(self):
+        from langchain_core.messages import AIMessage
+
+        tc = {"id": "call_1", "name": "get_weather", "args": {"city": "Paris"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+
+        result, fake_streaming, _ = await _run_auto_send_turn(events)
+
+        # Tool request via streaming_task_message_context (Full event)
+        tool_req_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, ToolRequestContent)]
+        assert len(tool_req_ctxs) == 1
+        assert tool_req_ctxs[0].initial_content.tool_call_id == "call_1"
+        assert tool_req_ctxs[0].closed is True
+        assert tool_req_ctxs[0].deltas == [], "Full messages have no deltas"
+
+    async def test_tool_response_posted_via_streaming_context(self):
+        from langchain_core.messages import ToolMessage
+
+        tool_msg = ToolMessage(content="Sunny, 72F", tool_call_id="call_1", name="get_weather")
+        events = [("updates", {"tools": {"messages": [tool_msg]}})]
+
+        _, fake_streaming, _ = await _run_auto_send_turn(events)
+
+        tool_resp_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, ToolResponseContent)]
+        assert len(tool_resp_ctxs) == 1
+        assert tool_resp_ctxs[0].initial_content.content == "Sunny, 72F"
+        assert tool_resp_ctxs[0].closed is True
+
+    async def test_multi_step_final_text_is_last_segment(self):
+        """Unified surface: final_text uses last-segment semantics.
+
+        auto_send resets final_text_parts when a new Start(TextContent) is seen,
+        so multi-step turns (text -> tool -> text) return only the LAST text segment.
+        This matches the behaviour documented in auto_send.py and mirrors
+        stream_pydantic_ai_events.
+        """
+        from langchain_core.messages import AIMessage, ToolMessage, AIMessageChunk
+
+        chunk1 = AIMessageChunk(content="Searching...")
+        ai_msg1 = AIMessage(content="Searching...", tool_calls=[{"id": "c1", "name": "s", "args": {}}])
+        tool_msg = ToolMessage(content="results", tool_call_id="c1", name="s")
+        chunk2 = AIMessageChunk(content="Found it!")
+        ai_msg2 = AIMessage(content="Found it!")
+
+        events = [
+            ("messages", (chunk1, {})),
+            ("updates", {"agent": {"messages": [ai_msg1]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+            ("messages", (chunk2, {})),
+            ("updates", {"agent": {"messages": [ai_msg2]}}),
+        ]
+        result, fake_streaming, _ = await _run_auto_send_turn(events)
+
+        # Last segment only — first text segment is NOT in final_text
+        assert result.final_text == "Found it!"
+
+        # Two text streaming contexts still opened (both streamed to Redis)
+        text_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, TextContent)]
+        assert len(text_ctxs) == 2
+
+    async def test_empty_stream_returns_empty_final_text(self):
+        result, fake_streaming, _ = await _run_auto_send_turn([])
+        assert result.final_text == ""
+        assert fake_streaming.contexts == []
+
+    async def test_turn_usage_populated_after_events_consumed(self):
+        """LangGraphTurn.usage() is populated via the on_final_ai_message callback
+        during event iteration. TurnResult.usage is a snapshot from before events run
+        (emitter.auto_send_turn evaluates turn.usage() eagerly); the authoritative
+        post-iteration usage is on turn.usage() directly."""
+        from langchain_core.messages import AIMessage
+
+        fake_streaming = _FakeStreaming()
+        usage_meta = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        ai_msg = AIMessage(content="hi", usage_metadata=usage_meta)
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+
+        turn = LangGraphTurn(_make_stream(events), model="gpt-4")
+        emitter = UnifiedEmitter(
+            task_id="task1", trace_id=None, parent_span_id=None, tracer=False, streaming=fake_streaming
+        )
+        await emitter.auto_send_turn(turn)
+
+        # After auto_send_turn, turn.usage() has the captured values
+        usage = turn.usage()
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 5
+        assert usage.total_tokens == 15
+
+    async def test_tracer_produces_tool_spans_for_full_events(self):
+        """AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes).
+
+        Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it.
+        This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents).
+        """
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        tc = {"id": "c1", "name": "t", "args": {}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t")
+
+        events = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+        ]
+        _, _, fake_tracing = await _run_auto_send_turn(events, trace_id="trace-1")
+
+        assert fake_tracing is not None
+        assert len(fake_tracing.started) == 1, "Full(ToolRequestContent) opens one tool span"
+        assert fake_tracing.started[0][0] == "t", "span name matches the tool name"
+        assert len(fake_tracing.ended) == 1, "Full(ToolResponseContent) closes the span"
diff --git a/tests/lib/core/harness/test_harness_langgraph_sync.py b/tests/lib/core/harness/test_harness_langgraph_sync.py
new file mode 100644
index 000000000..9f67dd2b6
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_langgraph_sync.py
@@ -0,0 +1,229 @@
+"""Integration test: sync (HTTP-yield) channel with a LangGraph agent.
+
+Exercises the unified harness surface (UnifiedEmitter.yield_turn + LangGraphTurn)
+with a minimal fake LangGraph stream so the test runs fully offline (no API
+keys, no Redis, no Agentex server).
+
+Agent description
+-----------------
+A simulated single-tool agent run using hand-crafted LangGraph event tuples:
+one tool request + response, followed by a final text reply.
+
+What is tested
+--------------
+- The sync handler correctly yields StreamTaskMessage* events in order:
+  Full(ToolRequest) then Full(ToolResponse) then text Start+Delta+Done.
+- With trace_id + fake tracing, the SpanDeriver fires for text events.
+- LangGraph emits tool calls as Full events (not Start+Done); the SpanDeriver
+  opens a tool span on Full(ToolRequestContent) and closes it on the matching
+  Full(ToolResponseContent) (see test_tracer_produces_tool_spans_for_full_events).
+- Final text is accumulated via yield mode.
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Actual HTTP streaming over the ACP sync endpoint.
+- Real LLM calls or real LangGraph graph execution.
+- The full FastACP request/response lifecycle.
+
+See also: test_harness_langgraph_async.py and test_harness_langgraph_temporal.py
+for the other two channels.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+
+import pytest
+
+from agentex.lib.core.harness.tracer import SpanTracer
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.task_message_update import (
+    StreamTaskMessageFull,
+    StreamTaskMessageStart,
+)
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Fake tracing backend
+# ---------------------------------------------------------------------------
+
+
+class _FakeSpan:
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self.output: Any = None
+
+
+class _FakeTracing:
+    def __init__(self) -> None:
+        self.started: list[tuple[str, Any]] = []
+        self.ended: list[tuple[str, Any]] = []
+
+    async def start_span(
+        self, *, trace_id: str, name: str, input: Any = None, parent_id: Any = None, **kw: Any
+    ) -> _FakeSpan:
+        self.started.append((name, parent_id))
+        return _FakeSpan(name)
+
+    async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None:
+        self.ended.append((span.name, span.output))
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+async def _run_yield_turn(
+    stream_events: list[tuple[str, Any]], trace_id: str | None = None
+) -> tuple[list[Any], _FakeTracing | None]:
+    fake_tracing = _FakeTracing() if trace_id else None
+    tracer: SpanTracer | bool | None = None
+    if trace_id and fake_tracing is not None:
+        tracer = SpanTracer(trace_id=trace_id, parent_span_id=None, task_id="task1", tracing=fake_tracing)
+
+    emitter = UnifiedEmitter(
+        task_id="task1",
+        trace_id=trace_id,
+        parent_span_id=None,
+        tracer=tracer if tracer is not None else False,
+    )
+    turn = LangGraphTurn(_make_stream(stream_events), model=None)
+    out = [e async for e in emitter.yield_turn(turn)]
+    return out, fake_tracing
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestSyncYieldChannel:
+    async def test_text_only_stream_yields_start_delta_done(self):
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        chunk = AIMessageChunk(content="Hello from LangGraph!")
+        ai_msg = AIMessage(content="Hello from LangGraph!")
+        events = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+        out, _ = await _run_yield_turn(events)
+
+        types = [type(e).__name__ for e in out]
+        assert "StreamTaskMessageStart" in types
+        assert "StreamTaskMessageDelta" in types
+        assert "StreamTaskMessageDone" in types
+
+    async def test_tool_call_yields_full_events(self):
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        tc = {"id": "call_1", "name": "get_weather", "args": {"city": "Paris"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="Sunny, 72F", tool_call_id="call_1", name="get_weather")
+        events = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+        ]
+        out, _ = await _run_yield_turn(events)
+
+        full_events = [e for e in out if isinstance(e, StreamTaskMessageFull)]
+        assert len(full_events) == 2
+
+        contents = [e.content for e in full_events]
+        assert any(isinstance(c, ToolRequestContent) for c in contents)
+        assert any(isinstance(c, ToolResponseContent) for c in contents)
+
+    async def test_multi_step_yields_events_in_order(self):
+        from langchain_core.messages import AIMessage, ToolMessage, AIMessageChunk
+
+        chunk1 = AIMessageChunk(content="Searching...")
+        ai_msg1 = AIMessage(content="Searching...", tool_calls=[{"id": "c1", "name": "search", "args": {"q": "test"}}])
+        tool_msg = ToolMessage(content="results", tool_call_id="c1", name="search")
+        chunk2 = AIMessageChunk(content="Found it!")
+        ai_msg2 = AIMessage(content="Found it!")
+
+        events = [
+            ("messages", (chunk1, {})),
+            ("updates", {"agent": {"messages": [ai_msg1]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+            ("messages", (chunk2, {})),
+            ("updates", {"agent": {"messages": [ai_msg2]}}),
+        ]
+        out, _ = await _run_yield_turn(events)
+
+        # Should have multiple start events (one per text segment)
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        assert len(starts) >= 2
+        # And two Full events (tool req + tool resp)
+        fulls = [e for e in out if isinstance(e, StreamTaskMessageFull)]
+        assert len(fulls) == 2
+
+    async def test_empty_stream_yields_nothing(self):
+        out, _ = await _run_yield_turn([])
+        assert out == []
+
+    async def test_tracer_produces_tool_spans_for_full_events(self):
+        """AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes).
+
+        Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it.
+        This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents).
+        """
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        tc = {"id": "c1", "name": "t", "args": {}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t")
+
+        events = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+        ]
+        _, fake_tracing = await _run_yield_turn(events, trace_id="trace-1")
+
+        assert fake_tracing is not None
+        assert len(fake_tracing.started) == 1, "Full(ToolRequestContent) opens one tool span"
+        assert fake_tracing.started[0][0] == "t", "span name matches the tool name"
+        assert len(fake_tracing.ended) == 1, "Full(ToolResponseContent) closes the span"
+
+    async def test_usage_captured_after_yield(self):
+        from langchain_core.messages import AIMessage
+
+        usage_meta = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        ai_msg = AIMessage(content="Hi!", usage_metadata=usage_meta)
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+
+        turn = LangGraphTurn(_make_stream(events), model="gpt-4")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+        _ = [e async for e in emitter.yield_turn(turn)]
+
+        usage = turn.usage()
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 5
diff --git a/tests/lib/core/harness/test_harness_langgraph_temporal.py b/tests/lib/core/harness/test_harness_langgraph_temporal.py
new file mode 100644
index 000000000..1a094a33c
--- /dev/null
+++ b/tests/lib/core/harness/test_harness_langgraph_temporal.py
@@ -0,0 +1,233 @@
+"""Integration test: Temporal channel with a LangGraph agent.
+
+The Temporal LangGraph agent pattern uses ``emit_langgraph_messages`` (from
+``_langgraph_messages.py``) inside a Temporal activity. That module is not
+yet unified onto the harness surface (it has its own Redis-streaming code).
+
+This test file verifies the LangGraph Temporal agent's streaming behavior using
+the same fake streaming infrastructure as test_harness_langgraph_async.py. The
+key difference from the non-temporal async path is that in Temporal, each agent
+turn runs inside a Temporal activity that has already been handed the task_id
+and a pre-wired streaming client — so the ``UnifiedEmitter.auto_send_turn``
+path is identical. The graph activities and workflow scaffolding are not tested
+here; that requires a running Temporal cluster.
+
+What is tested
+--------------
+- stream_langgraph_events (the public async API used by temporal agent acp.py via
+  the workflow activity) produces the same result via the unified surface.
+- Usage from AIMessage.usage_metadata is captured in TurnResult.usage.
+- The auto_send_turn path for a temporal-style call (same as async).
+
+What is NOT covered without live infrastructure
+-----------------------------------------------
+- Actual Temporal workflow execution (requires a running Temporal cluster).
+- The Temporal activity retry/compensation logic.
+- LangGraph checkpoint storage via TemporalCheckpointer.
+- emit_langgraph_messages (the Temporal-specific streaming helper).
+- Real LLM calls or real LangGraph graph execution.
+
+See also: test_harness_langgraph_sync.py and test_harness_langgraph_async.py.
+"""
+
+from __future__ import annotations
+
+import sys
+from typing import Any
+from dataclasses import field, dataclass
+
+import pytest
+
+from agentex.types.task_message import TaskMessage
+from agentex.types.text_content import TextContent
+from agentex.lib.core.harness.emitter import UnifiedEmitter
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn
+from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events
+
+# ---------------------------------------------------------------------------
+# Remove conftest stubs so real langchain_core types are used
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _real_langchain_core():
+    stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")]
+    saved = {k: sys.modules.pop(k) for k in stub_keys}
+    import importlib
+
+    importlib.import_module("langchain_core.messages")
+    yield
+    sys.modules.update(saved)
+
+
+# ---------------------------------------------------------------------------
+# Fake streaming backend
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _FakeCtx:
+    ctype: str
+    initial_content: Any
+    task_message: TaskMessage
+    closed: bool = False
+    deltas: list[Any] = field(default_factory=list)
+
+    async def __aenter__(self) -> "_FakeCtx":
+        return self
+
+    async def __aexit__(self, *args: Any) -> bool:
+        await self.close()
+        return False
+
+    async def close(self) -> None:
+        self.closed = True
+
+    async def stream_update(self, update: Any) -> Any:
+        self.deltas.append(update)
+        return update
+
+
+class _FakeStreaming:
+    def __init__(self) -> None:
+        self.contexts: list[_FakeCtx] = []
+
+    def streaming_task_message_context(self, task_id: str, initial_content: Any, **kw: Any) -> _FakeCtx:
+        ctype = getattr(initial_content, "type", None) or ""
+        tm = TaskMessage(id=f"m{len(self.contexts) + 1}", task_id=task_id, content=initial_content)
+        ctx = _FakeCtx(ctype=ctype, initial_content=initial_content, task_message=tm)
+        self.contexts.append(ctx)
+        return ctx
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_stream(events: list[tuple[str, Any]]):
+    async def _gen():
+        for e in events:
+            yield e
+
+    return _gen()
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestTemporalAutoSendChannel:
+    async def test_stream_langgraph_events_plain_text(self, monkeypatch):
+        """stream_langgraph_events (used by temporal agents via the acp.py activity) returns
+        the accumulated final text."""
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        from agentex.lib import adk as adk_module
+
+        fake_streaming = _FakeStreaming()
+        monkeypatch.setattr(adk_module, "streaming", fake_streaming)
+
+        chunk = AIMessageChunk(content="Hello Temporal!")
+        ai_msg = AIMessage(content="Hello Temporal!")
+        events = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+
+        final = await stream_langgraph_events(_make_stream(events), "task-1")
+        assert final == "Hello Temporal!"
+
+    async def test_stream_langgraph_events_tool_call(self, monkeypatch):
+        from langchain_core.messages import AIMessage, ToolMessage
+
+        from agentex.lib import adk as adk_module
+
+        fake_streaming = _FakeStreaming()
+        monkeypatch.setattr(adk_module, "streaming", fake_streaming)
+
+        tc = {"id": "c1", "name": "search", "args": {"q": "test"}}
+        ai_msg = AIMessage(content="", tool_calls=[tc])
+        tool_msg = ToolMessage(content="results", tool_call_id="c1", name="search")
+        chunk_final = AIMessage(content="Here are the results.")
+
+        events = [
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+            ("updates", {"tools": {"messages": [tool_msg]}}),
+            ("updates", {"agent": {"messages": [chunk_final]}}),
+        ]
+
+        final = await stream_langgraph_events(_make_stream(events), "task-1")
+
+        # Check tool request and response posted to fake streaming
+        tool_req_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, ToolRequestContent)]
+        tool_resp_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, ToolResponseContent)]
+        assert len(tool_req_ctxs) == 1
+        assert len(tool_resp_ctxs) == 1
+        assert tool_req_ctxs[0].initial_content.name == "search"
+
+    async def test_langgraph_turn_auto_send_via_unified_emitter(self):
+        """Direct UnifiedEmitter.auto_send_turn path used by temporal agent workflow
+        activities. Uses a fake streaming backend (no Redis)."""
+        from langchain_core.messages import AIMessage, AIMessageChunk
+
+        fake_streaming = _FakeStreaming()
+        chunk = AIMessageChunk(content="Temporal answer!")
+        ai_msg = AIMessage(content="Temporal answer!")
+        events = [
+            ("messages", (chunk, {})),
+            ("updates", {"agent": {"messages": [ai_msg]}}),
+        ]
+
+        turn = LangGraphTurn(_make_stream(events), model=None)
+        emitter = UnifiedEmitter(
+            task_id="task-1",
+            trace_id=None,
+            parent_span_id=None,
+            streaming=fake_streaming,
+        )
+        result = await emitter.auto_send_turn(turn)
+
+        assert result.final_text == "Temporal answer!"
+        text_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, TextContent)]
+        assert len(text_ctxs) == 1
+
+    async def test_usage_captured_via_turn_after_events_consumed(self):
+        """Usage from AIMessage.usage_metadata is captured via the on_final_ai_message
+        callback during event iteration. The authoritative usage is on turn.usage()
+        after events are consumed (emitter.auto_send_turn evaluates turn.usage()
+        eagerly before iteration, so TurnResult.usage is a pre-iteration snapshot)."""
+        from langchain_core.messages import AIMessage
+
+        fake_streaming = _FakeStreaming()
+        usage_meta = {"input_tokens": 20, "output_tokens": 10, "total_tokens": 30}
+        ai_msg = AIMessage(content="answer", usage_metadata=usage_meta)
+        events = [("updates", {"agent": {"messages": [ai_msg]}})]
+
+        turn = LangGraphTurn(_make_stream(events), model="gpt-4o")
+        emitter = UnifiedEmitter(
+            task_id="task-1",
+            trace_id=None,
+            parent_span_id=None,
+            streaming=fake_streaming,
+        )
+        await emitter.auto_send_turn(turn)
+
+        # After auto_send_turn, turn.usage() has the captured values
+        usage = turn.usage()
+        assert usage.input_tokens == 20
+        assert usage.output_tokens == 10
+        assert usage.total_tokens == 30
+
+    async def test_empty_stream_returns_empty_string(self, monkeypatch):
+        from agentex.lib import adk as adk_module
+
+        fake_streaming = _FakeStreaming()
+        monkeypatch.setattr(adk_module, "streaming", fake_streaming)
+
+        final = await stream_langgraph_events(_make_stream([]), "task-1")
+        assert final == ""
+        assert fake_streaming.contexts == []

From 9b2b03144cc67bb497e0a301686207aba2629758 Mon Sep 17 00:00:00 2001
From: Declan Brady <declan.brady@scale.com>
Date: Mon, 22 Jun 2026 18:45:46 -0400
Subject: [PATCH 08/10] feat(codex): event-stream parser tap for the unified
 harness surface (#421)

---
 .../00_sync/harness_codex/Dockerfile          |  50 ++
 .../tutorials/00_sync/harness_codex/README.md |  40 ++
 .../00_sync/harness_codex/conftest.py         |  12 +
 .../00_sync/harness_codex/manifest.yaml       |  58 ++
 .../00_sync/harness_codex/project/__init__.py |   0
 .../00_sync/harness_codex/project/acp.py      | 175 +++++
 .../00_sync/harness_codex/pyproject.toml      |  38 +
 .../00_sync/harness_codex/tests/test_agent.py | 176 +++++
 .../10_async/00_base/harness_codex/Dockerfile |  39 +
 .../10_async/00_base/harness_codex/README.md  |  40 ++
 .../00_base/harness_codex/conftest.py         |  12 +
 .../00_base/harness_codex/manifest.yaml       |  58 ++
 .../00_base/harness_codex/project/__init__.py |   0
 .../00_base/harness_codex/project/acp.py      | 230 ++++++
 .../00_base/harness_codex/pyproject.toml      |  38 +
 .../00_base/harness_codex/tests/test_agent.py | 188 +++++
 .../10_temporal/harness_codex/Dockerfile      |  42 ++
 .../10_temporal/harness_codex/README.md       |  48 ++
 .../10_temporal/harness_codex/conftest.py     |  17 +
 .../10_temporal/harness_codex/manifest.yaml   |  62 ++
 .../harness_codex/project/__init__.py         |   0
 .../10_temporal/harness_codex/project/acp.py  |  32 +
 .../harness_codex/project/activities.py       | 145 ++++
 .../harness_codex/project/run_worker.py       |  41 ++
 .../harness_codex/project/workflow.py         | 145 ++++
 .../10_temporal/harness_codex/pyproject.toml  |  40 ++
 .../harness_codex/tests/test_agent.py         | 275 +++++++
 src/agentex/lib/adk/__init__.py               |   6 +
 src/agentex/lib/adk/_modules/_codex_sync.py   | 587 +++++++++++++++
 src/agentex/lib/adk/_modules/_codex_turn.py   | 214 ++++++
 tests/lib/adk/test_codex_sync.py              | 671 ++++++++++++++++++
 tests/lib/adk/test_codex_turn.py              | 282 ++++++++
 .../conformance/test_codex_conformance.py     | 225 ++++++
 33 files changed, 3986 insertions(+)
 create mode 100644 examples/tutorials/00_sync/harness_codex/Dockerfile
 create mode 100644 examples/tutorials/00_sync/harness_codex/README.md
 create mode 100644 examples/tutorials/00_sync/harness_codex/conftest.py
 create mode 100644 examples/tutorials/00_sync/harness_codex/manifest.yaml
 create mode 100644 examples/tutorials/00_sync/harness_codex/project/__init__.py
 create mode 100644 examples/tutorials/00_sync/harness_codex/project/acp.py
 create mode 100644 examples/tutorials/00_sync/harness_codex/pyproject.toml
 create mode 100644 examples/tutorials/00_sync/harness_codex/tests/test_agent.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_codex/Dockerfile
 create mode 100644 examples/tutorials/10_async/00_base/harness_codex/README.md
 create mode 100644 examples/tutorials/10_async/00_base/harness_codex/conftest.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_codex/manifest.yaml
 create mode 100644 examples/tutorials/10_async/00_base/harness_codex/project/__init__.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_codex/project/acp.py
 create mode 100644 examples/tutorials/10_async/00_base/harness_codex/pyproject.toml
 create mode 100644 examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/Dockerfile
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/README.md
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/conftest.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/manifest.yaml
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/project/__init__.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/project/acp.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/project/activities.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/project/run_worker.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/project/workflow.py
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/pyproject.toml
 create mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py
 create mode 100644 src/agentex/lib/adk/_modules/_codex_sync.py
 create mode 100644 src/agentex/lib/adk/_modules/_codex_turn.py
 create mode 100644 tests/lib/adk/test_codex_sync.py
 create mode 100644 tests/lib/adk/test_codex_turn.py
 create mode 100644 tests/lib/core/harness/conformance/test_codex_conformance.py

diff --git a/examples/tutorials/00_sync/harness_codex/Dockerfile b/examples/tutorials/00_sync/harness_codex/Dockerfile
new file mode 100644
index 000000000..72713b95d
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_codex/Dockerfile
@@ -0,0 +1,50 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+# Copy pyproject.toml and README.md to install dependencies
+COPY 00_sync/harness_codex/pyproject.toml /app/harness_codex/pyproject.toml
+COPY 00_sync/harness_codex/README.md /app/harness_codex/README.md
+
+WORKDIR /app/harness_codex
+
+# Copy the project code
+COPY 00_sync/harness_codex/project /app/harness_codex/project
+
+# Copy the test files
+COPY 00_sync/harness_codex/tests /app/harness_codex/tests
+
+# Copy shared test utilities
+COPY test_utils /app/test_utils
+
+# Install the required Python packages with dev dependencies
+RUN uv pip install --system .[dev]
+
+# Set environment variables
+ENV PYTHONPATH=/app
+
+# Set test environment variables
+ENV AGENT_NAME=s-harness-codex
+
+# Run the agent using uvicorn
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/00_sync/harness_codex/README.md b/examples/tutorials/00_sync/harness_codex/README.md
new file mode 100644
index 000000000..5f3396cfa
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_codex/README.md
@@ -0,0 +1,40 @@
+# harness_codex (sync)
+
+Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap,
+`CodexTurn`, and `UnifiedEmitter` for a **sync** (HTTP-yield) ACP agent.
+
+## What this tutorial shows
+
+- Spawning `codex exec --json` as a **local asyncio subprocess** (no Scale sandbox).
+- Wrapping the stdout line stream in a `CodexTurn`.
+- Delivering every canonical `StreamTaskMessage*` event to the HTTP caller via
+  `UnifiedEmitter.yield_turn` (tracing as a side-effect).
+
+> **Production isolation note:** A tutorial agent runs the Codex CLI locally.
+> Production-grade isolation (Scale sandbox, secret injection, MCP configuration)
+> is handled by the golden agent at
+> `teams/sgp/agents/golden_agent/project/harness/providers/codex.py`.
+
+## Live runs
+
+Live runs require:
+1. The `codex` CLI on PATH: `npm install -g @openai/codex`
+2. `OPENAI_API_KEY` set in the environment.
+
+## Running offline unit tests
+
+The offline tests inject a fake subprocess and never invoke the real CLI:
+
+```bash
+cd /path/to/scale-agentex-python
+uv run --all-packages --all-extras pytest examples/tutorials/00_sync/harness_codex/tests/test_agent.py -q
+```
+
+## Running live integration tests
+
+```bash
+export CODEX_LIVE_TESTS=1
+export OPENAI_API_KEY=sk-...
+# Start the agent server first, then:
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/00_sync/harness_codex/conftest.py b/examples/tutorials/00_sync/harness_codex/conftest.py
new file mode 100644
index 000000000..bdd78994b
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_codex/conftest.py
@@ -0,0 +1,12 @@
+"""Add the agent's project root to sys.path so ``import project`` works.
+
+Also sets minimal environment variables so the FastACP and tracing modules
+can be imported without a running agent server.
+"""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+os.environ.setdefault("ACP_URL", "http://localhost:8000")
diff --git a/examples/tutorials/00_sync/harness_codex/manifest.yaml b/examples/tutorials/00_sync/harness_codex/manifest.yaml
new file mode 100644
index 000000000..52943f8f2
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_codex/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../
+    include_paths:
+      - 00_sync/harness_codex
+      - test_utils
+    dockerfile: 00_sync/harness_codex/Dockerfile
+    dockerignore: 00_sync/harness_codex/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: sync
+  name: s-harness-codex
+  description: Sync tutorial agent driving the unified harness surface via local codex CLI subprocess
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "s-harness-codex"
+      description: "Sync tutorial agent driving the unified harness surface via local codex CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/00_sync/harness_codex/project/__init__.py b/examples/tutorials/00_sync/harness_codex/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/00_sync/harness_codex/project/acp.py b/examples/tutorials/00_sync/harness_codex/project/acp.py
new file mode 100644
index 000000000..bcb5e10df
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_codex/project/acp.py
@@ -0,0 +1,175 @@
+"""Sync ACP handler for the Codex CLI harness tutorial.
+
+Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` +
+``UnifiedEmitter`` for a sync (HTTP-yield) ACP agent.
+
+The handler:
+1. Spawns ``codex exec --json`` as a LOCAL asyncio subprocess (no sandbox).
+   This is correct for tutorials and local development; production isolation
+   is handled by the golden agent's Scale sandbox at
+   ``teams/sgp/agents/golden_agent/project/harness/providers/codex.py``.
+2. Wraps the stdout line stream in a ``CodexTurn``.
+3. Delivers every canonical ``StreamTaskMessage*`` event via
+   ``UnifiedEmitter.yield_turn``, which traces + yields each event back to
+   the HTTP caller in one pass.
+
+Live runs require:
+- ``codex`` CLI on PATH  (``npm install -g @openai/codex``)
+- ``OPENAI_API_KEY`` set in the environment
+"""
+
+from __future__ import annotations
+
+import os
+import time
+import codecs
+import asyncio
+from typing import AsyncGenerator
+from collections.abc import AsyncIterator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from agentex.lib.adk import CodexTurn
+from agentex.lib.types.acp import SendMessageParams
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.types.task_message_update import TaskMessageUpdate
+from agentex.types.task_message_content import TaskMessageContent
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(acp_type="sync")
+
+MODEL = os.environ.get("CODEX_MODEL", "o4-mini")
+
+
+async def _spawn_codex(model: str) -> asyncio.subprocess.Process:
+    """Spawn ``codex exec --json`` locally and return the live process.
+
+    Injection seam: tests replace this function with a fake that returns a
+    mock process whose stdout yields pre-recorded event lines.
+
+    The flags mirror the golden agent (codex.py in the golden agent repo):
+      --json                      machine-readable newline-delimited events
+      --skip-git-repo-check       safe to run outside a git repo
+      --dangerously-bypass-approvals-and-sandbox
+                                  skip interactive approval prompts in a
+                                  non-interactive (server) context
+      --model <model>             which OpenAI model to use
+
+    The caller writes the prompt to stdin after the process starts, then
+    closes stdin so codex knows input is complete.
+    """
+    cmd = [
+        "codex",
+        "exec",
+        "--json",
+        "--skip-git-repo-check",
+        "--dangerously-bypass-approvals-and-sandbox",
+        "--model",
+        model,
+        "-",  # read prompt from stdin
+    ]
+    return await asyncio.create_subprocess_exec(
+        *cmd,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        # Discard stderr: codex --json writes events to stdout; its stderr is
+        # progress/debug noise. Capturing it with PIPE but never reading it
+        # would deadlock once codex fills the OS pipe buffer (~64 KB).
+        stderr=asyncio.subprocess.DEVNULL,
+        env={**os.environ},
+    )
+
+
+async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]:
+    """Yield newline-delimited JSON lines from the process stdout.
+
+    Uses an incremental UTF-8 decoder so a multibyte character split across two
+    4 KB reads is decoded correctly instead of being corrupted at the boundary.
+    """
+    assert process.stdout is not None
+    decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
+    buffer = ""
+    while True:
+        chunk = await process.stdout.read(4096)
+        if not chunk:
+            break
+        buffer += decoder.decode(chunk)
+        while "\n" in buffer:
+            line, buffer = buffer.split("\n", 1)
+            line = line.strip()
+            if line:
+                yield line
+    buffer += decoder.decode(b"", final=True)
+    if buffer.strip():
+        yield buffer.strip()
+
+
+@acp.on_message_send
+async def handle_message_send(
+    params: SendMessageParams,
+) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]:
+    """Handle each message by running ``codex exec`` locally and streaming events."""
+    task_id = params.task.id
+    user_message = params.content.content
+    logger.info("Processing message for task %s", task_id)
+
+    start_ms = int(time.monotonic() * 1000)
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name="message",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        process = await _spawn_codex(MODEL)
+
+        # Write prompt to stdin then close it so codex knows input is done.
+        assert process.stdin is not None
+        process.stdin.write(user_message.encode("utf-8"))
+        await process.stdin.drain()
+        process.stdin.close()
+
+        turn = CodexTurn(
+            events=_process_stdout(process),
+            model=MODEL,
+        )
+
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
+        async for event in emitter.yield_turn(turn):
+            yield event
+
+        await process.wait()
+
+        # Record the real wall-clock duration AFTER streaming completes; setting
+        # it before the stream ran would capture only subprocess spawn overhead.
+        turn.duration_ms = int(time.monotonic() * 1000) - start_ms
+
+        if turn_span:
+            usage = turn.usage()
+            turn_span.output = {
+                "model": usage.model,
+                "input_tokens": usage.input_tokens,
+                "output_tokens": usage.output_tokens,
+            }
diff --git a/examples/tutorials/00_sync/harness_codex/pyproject.toml b/examples/tutorials/00_sync/harness_codex/pyproject.toml
new file mode 100644
index 000000000..ca7d8ac18
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_codex/pyproject.toml
@@ -0,0 +1,38 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "s-harness-codex"
+version = "0.1.0"
+description = "Sync tutorial agent driving the unified harness surface via local codex CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
diff --git a/examples/tutorials/00_sync/harness_codex/tests/test_agent.py b/examples/tutorials/00_sync/harness_codex/tests/test_agent.py
new file mode 100644
index 000000000..b2d5b6498
--- /dev/null
+++ b/examples/tutorials/00_sync/harness_codex/tests/test_agent.py
@@ -0,0 +1,176 @@
+"""Tests for the sync Codex harness tutorial agent.
+
+LIVE tests (``TestLiveCodexAgent``):
+  - Require the ``codex`` CLI on PATH and ``OPENAI_API_KEY`` set.
+  - Run the full agent end-to-end against a live Agentex server.
+  - Skipped automatically when ``CODEX_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestOfflineCodexHandler``):
+  - Inject a fake async iterator of pre-recorded codex event lines.
+  - Assert the ``CodexTurn`` + ``UnifiedEmitter`` pipeline yields events,
+    populates usage, and satisfies the ``HarnessTurn`` protocol.
+  - Always run.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import Any
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+SAMPLE_EVENTS: list[dict[str, Any]] = [
+    {"type": "thread.started", "thread_id": "thread-abc"},
+    {"type": "turn.started"},
+    {
+        "type": "item.started",
+        "item": {"id": "msg-1", "type": "agent_message", "text": "Hello"},
+    },
+    {
+        "type": "item.completed",
+        "item": {"id": "msg-1", "type": "agent_message", "text": "Hello, world!"},
+    },
+    {
+        "type": "turn.completed",
+        "usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+    },
+]
+
+
+async def _fake_event_stream():
+    """Async iterator of pre-recorded codex event JSON lines (no subprocess)."""
+    for evt in SAMPLE_EVENTS:
+        yield json.dumps(evt)
+
+
+class TestOfflineCodexHandler:
+    """Unit tests that run without a real codex CLI or network."""
+
+    @pytest.mark.asyncio
+    async def test_codex_turn_yields_stream_events(self):
+        """CodexTurn drives the unified surface and yields StreamTaskMessage* events."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert len(events) > 0, "No events yielded"
+
+        types_seen = {type(e).__name__ for e in events}
+        known_types = {
+            "StreamTaskMessageStart",
+            "StreamTaskMessageDelta",
+            "StreamTaskMessageFull",
+            "StreamTaskMessageDone",
+        }
+        assert bool(types_seen & known_types), f"Unexpected event types: {types_seen}"
+
+    @pytest.mark.asyncio
+    async def test_usage_populated_after_stream_exhausted(self):
+        """CodexTurn.usage() returns correct tokens after stream is exhausted."""
+        from agentex.lib.adk import CodexTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+
+        collected = [e async for e in turn.events]
+
+        usage = turn.usage()
+        assert usage.input_tokens == 10
+        assert usage.output_tokens == 5
+        assert usage.total_tokens == 15
+        assert usage.model == "o4-mini"
+
+    @pytest.mark.asyncio
+    async def test_codex_turn_protocol_compliance(self):
+        """CodexTurn satisfies the HarnessTurn protocol."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness.types import HarnessTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        assert isinstance(turn, HarnessTurn), "CodexTurn does not satisfy HarnessTurn protocol"
+
+    @pytest.mark.asyncio
+    async def test_unified_emitter_yield_passes_through_events(self):
+        """UnifiedEmitter.yield_turn passes events through unchanged in sync mode."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert len(events) > 0
+
+    @pytest.mark.asyncio
+    async def test_convert_codex_to_agentex_events_direct(self):
+        """convert_codex_to_agentex_events tap produces text start/done events."""
+        from agentex.lib.adk import convert_codex_to_agentex_events
+        from agentex.types.task_message_update import StreamTaskMessageDone
+
+        events = [e async for e in convert_codex_to_agentex_events(_fake_event_stream())]
+        assert any(isinstance(e, StreamTaskMessageDone) for e in events), (
+            "Expected at least one StreamTaskMessageDone event"
+        )
+
+    @pytest.mark.asyncio
+    async def test_on_result_callback_receives_session_id(self):
+        """on_result callback receives the session_id from thread.started."""
+        from agentex.lib.adk import convert_codex_to_agentex_events
+
+        captured: list[dict] = []
+
+        events = [
+            e
+            async for e in convert_codex_to_agentex_events(
+                _fake_event_stream(),
+                on_result=captured.append,
+            )
+        ]
+
+        assert len(captured) == 1
+        assert captured[0]["session_id"] == "thread-abc"
+        assert captured[0]["tool_call_count"] == 0
+
+
+# ---------------------------------------------------------------------------
+# Live tests (skipped unless CODEX_LIVE_TESTS=1)
+# ---------------------------------------------------------------------------
+
+LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1"
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "s-harness-codex")
+
+
+@pytest.mark.skipif(not LIVE, reason="Set CODEX_LIVE_TESTS=1 and ensure codex CLI + OPENAI_API_KEY are available")
+class TestLiveCodexAgent:
+    """End-to-end tests that require the real codex CLI and a running Agentex server."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    def test_send_simple_message(self, client):
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendMessageRequest
+
+        response = client.agents.send_message(
+            agent_name=AGENT_NAME,
+            params=ParamsSendMessageRequest(
+                content=TextContentParam(
+                    author="user",
+                    content="What is 2+2? Reply with just the number.",
+                    type="text",
+                )
+            ),
+        )
+        assert response.result is not None
+        assert len(response.result) >= 1
diff --git a/examples/tutorials/10_async/00_base/harness_codex/Dockerfile b/examples/tutorials/10_async/00_base/harness_codex/Dockerfile
new file mode 100644
index 000000000..06b76aae2
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_codex/Dockerfile
@@ -0,0 +1,39 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/00_base/harness_codex/pyproject.toml /app/harness_codex/pyproject.toml
+COPY 10_async/00_base/harness_codex/README.md /app/harness_codex/README.md
+
+WORKDIR /app/harness_codex
+
+COPY 10_async/00_base/harness_codex/project /app/harness_codex/project
+COPY 10_async/00_base/harness_codex/tests /app/harness_codex/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+ENV AGENT_NAME=ab-harness-codex
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/examples/tutorials/10_async/00_base/harness_codex/README.md b/examples/tutorials/10_async/00_base/harness_codex/README.md
new file mode 100644
index 000000000..9bbcd927a
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_codex/README.md
@@ -0,0 +1,40 @@
+# harness_codex (async base)
+
+Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap,
+`CodexTurn`, and `UnifiedEmitter` for an **async** (Redis-streaming, no Temporal)
+ACP agent.
+
+## What this tutorial shows
+
+- Spawning `codex exec --json` as a **local asyncio subprocess** (no Scale sandbox).
+- Wrapping the stdout line stream in a `CodexTurn`.
+- Delivering every canonical `StreamTaskMessage*` event to Redis via
+  `UnifiedEmitter.auto_send_turn`, so the UI receives tokens in real time.
+- Persisting the codex thread ID in `adk.state` so subsequent turns resume the
+  same codex session via `codex exec resume <thread_id>`.
+
+> **Production isolation note:** A tutorial agent runs the Codex CLI locally.
+> Production-grade isolation (Scale sandbox, secret injection, MCP configuration)
+> is handled by the golden agent at
+> `teams/sgp/agents/golden_agent/project/harness/providers/codex.py`.
+
+## Live runs
+
+Live runs require:
+1. The `codex` CLI on PATH: `npm install -g @openai/codex`
+2. `OPENAI_API_KEY` set in the environment.
+
+## Running offline unit tests
+
+```bash
+cd /path/to/scale-agentex-python
+uv run --all-packages --all-extras pytest examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py -q
+```
+
+## Running live integration tests
+
+```bash
+export CODEX_LIVE_TESTS=1
+export OPENAI_API_KEY=sk-...
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/00_base/harness_codex/conftest.py b/examples/tutorials/10_async/00_base/harness_codex/conftest.py
new file mode 100644
index 000000000..bdd78994b
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_codex/conftest.py
@@ -0,0 +1,12 @@
+"""Add the agent's project root to sys.path so ``import project`` works.
+
+Also sets minimal environment variables so the FastACP and tracing modules
+can be imported without a running agent server.
+"""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+os.environ.setdefault("ACP_URL", "http://localhost:8000")
diff --git a/examples/tutorials/10_async/00_base/harness_codex/manifest.yaml b/examples/tutorials/10_async/00_base/harness_codex/manifest.yaml
new file mode 100644
index 000000000..e88e2029d
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_codex/manifest.yaml
@@ -0,0 +1,58 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/00_base/harness_codex
+      - test_utils
+    dockerfile: 10_async/00_base/harness_codex/Dockerfile
+    dockerignore: 10_async/00_base/harness_codex/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+
+agent:
+  acp_type: async
+  name: ab-harness-codex
+  description: Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess
+
+  temporal:
+    enabled: false
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "ab-harness-codex"
+      description: "Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/00_base/harness_codex/project/__init__.py b/examples/tutorials/10_async/00_base/harness_codex/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/00_base/harness_codex/project/acp.py b/examples/tutorials/10_async/00_base/harness_codex/project/acp.py
new file mode 100644
index 000000000..0233c49ab
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_codex/project/acp.py
@@ -0,0 +1,230 @@
+"""Async (base) ACP handler for the Codex CLI harness tutorial.
+
+Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` +
+``UnifiedEmitter`` for an async (Redis-streaming) ACP agent without Temporal.
+
+The handler:
+1. Spawns ``codex exec --json`` as a LOCAL asyncio subprocess (no sandbox).
+   This is correct for tutorials and local development; production isolation
+   is handled by the golden agent's Scale sandbox at
+   ``teams/sgp/agents/golden_agent/project/harness/providers/codex.py``.
+2. Wraps the stdout line stream in a ``CodexTurn``.
+3. Delivers every canonical ``StreamTaskMessage*`` event to Redis via
+   ``UnifiedEmitter.auto_send_turn``, so the UI receives tokens in real time.
+4. Multi-turn memory is persisted via ``adk.state``.
+
+Live runs require:
+- ``codex`` CLI on PATH  (``npm install -g @openai/codex``)
+- ``OPENAI_API_KEY`` set in the environment
+"""
+
+from __future__ import annotations
+
+import os
+import time
+import codecs
+import asyncio
+from collections.abc import AsyncIterator
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import agentex.lib.adk as adk
+from agentex.lib.adk import CodexTurn
+from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.types.fastacp import AsyncACPConfig
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.utils.model_utils import BaseModel
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+logger = make_logger(__name__)
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+acp = FastACP.create(
+    acp_type="async",
+    config=AsyncACPConfig(type="base"),
+)
+
+MODEL = os.environ.get("CODEX_MODEL", "o4-mini")
+
+
+class ConversationState(BaseModel):
+    """Per-task conversation state persisted via ``adk.state``.
+
+    We store the codex session/thread ID so subsequent turns can resume the
+    same codex session via ``codex exec resume <thread_id>``.
+    """
+
+    codex_thread_id: str | None = None
+    turn_number: int = 0
+
+
+async def _spawn_codex(
+    model: str,
+    thread_id: str | None = None,
+) -> asyncio.subprocess.Process:
+    """Spawn ``codex exec --json`` locally and return the live process.
+
+    Injection seam: tests replace this function with a fake that returns a
+    mock process whose stdout yields pre-recorded event lines.
+
+    When ``thread_id`` is provided the subcommand becomes
+    ``codex exec ... resume <thread_id> -`` so codex continues the prior
+    conversation thread.
+
+    The caller writes the prompt to stdin after the process starts, then
+    closes stdin so codex knows input is complete.
+    """
+    base_flags = [
+        "--json",
+        "--skip-git-repo-check",
+        "--dangerously-bypass-approvals-and-sandbox",
+        "--model",
+        model,
+    ]
+
+    if thread_id:
+        cmd = ["codex", "exec", *base_flags, "resume", thread_id, "-"]
+    else:
+        cmd = ["codex", "exec", *base_flags, "-"]
+
+    return await asyncio.create_subprocess_exec(
+        *cmd,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        # Discard stderr: codex --json writes events to stdout; its stderr is
+        # progress/debug noise. Capturing it with PIPE but never reading it
+        # would deadlock once codex fills the OS pipe buffer (~64 KB).
+        stderr=asyncio.subprocess.DEVNULL,
+        env={**os.environ},
+    )
+
+
+async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]:
+    """Yield newline-delimited JSON lines from the process stdout.
+
+    Uses an incremental UTF-8 decoder so a multibyte character split across two
+    4 KB reads is decoded correctly instead of being corrupted at the boundary.
+    """
+    assert process.stdout is not None
+    decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
+    buffer = ""
+    while True:
+        chunk = await process.stdout.read(4096)
+        if not chunk:
+            break
+        buffer += decoder.decode(chunk)
+        while "\n" in buffer:
+            line, buffer = buffer.split("\n", 1)
+            line = line.strip()
+            if line:
+                yield line
+    buffer += decoder.decode(b"", final=True)
+    if buffer.strip():
+        yield buffer.strip()
+
+
+@acp.on_task_create
+async def handle_task_create(params: CreateTaskParams):
+    """Initialize per-task state on task creation."""
+    logger.info("Task created: %s", params.task.id)
+    await adk.state.create(
+        task_id=params.task.id,
+        agent_id=params.agent.id,
+        state=ConversationState(),
+    )
+
+
+@acp.on_task_event_send
+async def handle_task_event_send(params: SendEventParams):
+    """Handle each user message: spawn codex, stream events, save thread ID."""
+    task_id = params.task.id
+    agent_id = params.agent.id
+    user_message = params.event.content.content
+
+    logger.info("Processing message for task %s", task_id)
+
+    await adk.messages.create(task_id=task_id, content=params.event.content)
+
+    task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id)
+    if task_state is None:
+        state = ConversationState()
+        task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state)
+    else:
+        state = ConversationState.model_validate(task_state.state)
+
+    state.turn_number += 1
+
+    async with adk.tracing.span(
+        trace_id=task_id,
+        task_id=task_id,
+        name=f"Turn {state.turn_number}",
+        input={"message": user_message},
+        data={"__span_type__": "AGENT_WORKFLOW"},
+    ) as turn_span:
+        start_ms = int(time.monotonic() * 1000)
+
+        process = await _spawn_codex(MODEL, thread_id=state.codex_thread_id)
+
+        assert process.stdin is not None
+        process.stdin.write(user_message.encode("utf-8"))
+        await process.stdin.drain()
+        process.stdin.close()
+
+        turn = CodexTurn(
+            events=_process_stdout(process),
+            model=MODEL,
+        )
+
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+
+        result = await emitter.auto_send_turn(turn)
+
+        await process.wait()
+
+        # Record the real wall-clock duration AFTER streaming completes; setting
+        # it before the stream ran would capture only subprocess spawn overhead.
+        turn.duration_ms = int(time.monotonic() * 1000) - start_ms
+
+        # Persist the new thread ID so subsequent turns resume the same session.
+        usage = turn.usage()
+        if usage.model:
+            # usage() is valid now that the stream is exhausted
+            pass
+        # Persist the codex session id (public accessor; valid post-stream) so the
+        # next turn resumes the same session.
+        if turn.session_id:
+            state.codex_thread_id = turn.session_id
+
+        await adk.state.update(
+            state_id=task_state.id,
+            task_id=task_id,
+            agent_id=agent_id,
+            state=state,
+        )
+
+        if turn_span:
+            turn_span.output = {
+                "final_text": result.final_text,
+                "model": usage.model,
+            }
+
+
+@acp.on_task_cancel
+async def handle_task_canceled(params: CancelTaskParams):
+    logger.info("Task canceled: %s", params.task.id)
diff --git a/examples/tutorials/10_async/00_base/harness_codex/pyproject.toml b/examples/tutorials/10_async/00_base/harness_codex/pyproject.toml
new file mode 100644
index 000000000..c25a65c47
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_codex/pyproject.toml
@@ -0,0 +1,38 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "ab-harness-codex"
+version = "0.1.0"
+description = "Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
diff --git a/examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py b/examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py
new file mode 100644
index 000000000..b50ee9116
--- /dev/null
+++ b/examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py
@@ -0,0 +1,188 @@
+"""Tests for the async (base) Codex harness tutorial agent.
+
+LIVE tests (``TestLiveCodexAgent``):
+  - Require the ``codex`` CLI on PATH and ``OPENAI_API_KEY`` set.
+  - Skipped automatically when ``CODEX_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestOfflineCodexHandler``):
+  - Inject a fake async iterator of pre-recorded codex event lines.
+  - Assert ``CodexTurn`` + ``UnifiedEmitter.auto_send_turn`` is driven correctly.
+  - Always run.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+SAMPLE_EVENTS: list[dict[str, Any]] = [
+    {"type": "thread.started", "thread_id": "thread-xyz"},
+    {"type": "turn.started"},
+    {
+        "type": "item.started",
+        "item": {"id": "msg-1", "type": "agent_message", "text": "Hi"},
+    },
+    {
+        "type": "item.completed",
+        "item": {"id": "msg-1", "type": "agent_message", "text": "Hi there!"},
+    },
+    {
+        "type": "turn.completed",
+        "usage": {"input_tokens": 8, "output_tokens": 4, "total_tokens": 12},
+    },
+]
+
+
+async def _fake_event_stream():
+    """Async iterator of pre-recorded codex event JSON lines (no subprocess)."""
+    for evt in SAMPLE_EVENTS:
+        yield json.dumps(evt)
+
+
+class TestOfflineCodexHandler:
+    """Unit tests that run without a real codex CLI or network."""
+
+    @pytest.mark.asyncio
+    async def test_usage_populated_after_stream_exhausted(self):
+        """CodexTurn.usage() returns non-None tokens after stream is exhausted."""
+        from agentex.lib.adk import CodexTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+
+        collected = [e async for e in turn.events]
+
+        usage = turn.usage()
+        assert usage.input_tokens == 8
+        assert usage.output_tokens == 4
+        assert usage.model == "o4-mini"
+
+    @pytest.mark.asyncio
+    async def test_auto_send_turn_drives_unified_surface(self):
+        """auto_send_turn returns a TurnResult with the final text."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message import TaskMessage
+        from agentex.types.text_content import TextContent
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+
+        real_task_msg = TaskMessage(
+            id="msg-fake",
+            task_id="t",
+            content=TextContent(type="text", author="agent", content=""),
+        )
+
+        fake_streaming = MagicMock()
+        fake_ctx = AsyncMock()
+        fake_ctx.__aenter__ = AsyncMock(return_value=fake_ctx)
+        fake_ctx.__aexit__ = AsyncMock(return_value=False)
+        fake_ctx.stream_update = AsyncMock(return_value=MagicMock())
+        fake_ctx.close = AsyncMock()
+        fake_ctx.task_message = real_task_msg
+        fake_streaming.streaming_task_message_context = MagicMock(return_value=fake_ctx)
+
+        emitter = UnifiedEmitter(
+            task_id="t",
+            trace_id=None,
+            parent_span_id=None,
+            streaming=fake_streaming,
+        )
+
+        result = await emitter.auto_send_turn(turn)
+        assert result is not None
+
+    @pytest.mark.asyncio
+    async def test_session_id_captured_after_stream(self):
+        """CodexTurn._result captures the session_id from thread.started."""
+        from agentex.lib.adk import CodexTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        _ = [e async for e in turn.events]
+
+        assert turn._result is not None
+        assert turn._result["session_id"] == "thread-xyz"
+
+    @pytest.mark.asyncio
+    async def test_yield_turn_is_passthrough(self):
+        """yield_turn mode also works with CodexTurn (no streaming infra needed)."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None)
+
+        events = [e async for e in emitter.yield_turn(turn)]
+        assert len(events) > 0
+
+
+# ---------------------------------------------------------------------------
+# Live tests
+# ---------------------------------------------------------------------------
+
+LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1"
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "ab-harness-codex")
+
+
+@pytest.mark.skipif(
+    not LIVE,
+    reason="Set CODEX_LIVE_TESTS=1 and ensure codex CLI + OPENAI_API_KEY are available",
+)
+class TestLiveCodexAgent:
+    """End-to-end tests that require the real codex CLI and a running Agentex server."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    @pytest.fixture
+    def agent_id(self, client):
+        for agent in client.agents.list():
+            if agent.name == AGENT_NAME:
+                return agent.id
+        raise ValueError(f"Agent {AGENT_NAME!r} not found.")
+
+    def test_send_simple_message(self, client, agent_id: str):
+        """Async agents process events out of band, so create a task, send an
+        event, and poll the task's messages for the agent's response."""
+        import time
+        import uuid
+
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendEventRequest, ParamsCreateTaskRequest
+
+        task = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)).result
+        assert task is not None
+
+        client.agents.send_event(
+            agent_id=agent_id,
+            params=ParamsSendEventRequest(
+                task_id=task.id,
+                content=TextContentParam(
+                    author="user",
+                    content="What is 3+3? Reply with just the number.",
+                    type="text",
+                ),
+            ),
+        )
+
+        deadline = time.monotonic() + 60
+        while time.monotonic() < deadline:
+            msgs = client.messages.list(task_id=task.id)
+            agent_msgs = [m for m in msgs if getattr(m.content, "author", None) == "agent"]
+            if agent_msgs:
+                assert len(agent_msgs) >= 1
+                return
+            time.sleep(2)
+
+        raise AssertionError("No agent response received within 60 s")
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/Dockerfile b/examples/tutorials/10_async/10_temporal/harness_codex/Dockerfile
new file mode 100644
index 000000000..e2f8807fd
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_codex/Dockerfile
@@ -0,0 +1,42 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+COPY 10_async/10_temporal/harness_codex/pyproject.toml /app/harness_codex/pyproject.toml
+COPY 10_async/10_temporal/harness_codex/README.md /app/harness_codex/README.md
+
+WORKDIR /app/harness_codex
+
+COPY 10_async/10_temporal/harness_codex/project /app/harness_codex/project
+COPY 10_async/10_temporal/harness_codex/tests /app/harness_codex/tests
+COPY test_utils /app/test_utils
+
+RUN uv pip install --system .[dev]
+
+ENV PYTHONPATH=/app
+ENV AGENT_NAME=at-harness-codex
+
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
+
+# When deploying the worker, replace CMD with:
+# CMD ["python", "-m", "project.run_worker"]
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/README.md b/examples/tutorials/10_async/10_temporal/harness_codex/README.md
new file mode 100644
index 000000000..4f9b76955
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_codex/README.md
@@ -0,0 +1,48 @@
+# harness_codex (Temporal)
+
+Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap,
+`CodexTurn`, and `UnifiedEmitter` for a **Temporal-durable** async ACP agent.
+
+## What this tutorial shows
+
+- Spawning `codex exec --json` as a **local asyncio subprocess** (no Scale sandbox)
+  inside a Temporal workflow signal handler.
+- Wrapping the stdout line stream in a `CodexTurn`.
+- Delivering every canonical `StreamTaskMessage*` event to Redis via
+  `UnifiedEmitter.auto_send_turn`, passing `created_at=workflow.now()` for
+  deterministic Temporal replay timestamps.
+- Keeping the codex thread ID on the workflow instance (durable across crashes
+  without an external `adk.state` round-trip).
+
+> **Production isolation note:** A tutorial agent runs the Codex CLI locally.
+> Production-grade isolation (Scale sandbox, secret injection, MCP configuration)
+> is handled by the golden agent at
+> `teams/sgp/agents/golden_agent/project/harness/providers/codex.py`.
+
+> **Temporal determinism note:** Subprocess spawning happens inside
+> `@workflow.signal` handler bodies. Temporal does NOT replay signal handler
+> bodies (only `@workflow.run` is subject to replay constraints), so this is
+> safe. A production agent would wrap the subprocess in a Temporal activity for
+> full durability and retry semantics.
+
+## Live runs
+
+Live runs require:
+1. The `codex` CLI on PATH: `npm install -g @openai/codex`
+2. `OPENAI_API_KEY` set in the environment.
+3. A running Temporal server.
+
+## Running offline unit tests
+
+```bash
+cd /path/to/scale-agentex-python
+uv run --all-packages --all-extras pytest examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py -q
+```
+
+## Running live integration tests
+
+```bash
+export CODEX_LIVE_TESTS=1
+export OPENAI_API_KEY=sk-...
+pytest tests/test_agent.py -v
+```
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/conftest.py b/examples/tutorials/10_async/10_temporal/harness_codex/conftest.py
new file mode 100644
index 000000000..4ae6ce61a
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_codex/conftest.py
@@ -0,0 +1,17 @@
+"""Add the agent's project root to sys.path so ``import project`` works.
+
+Also sets minimal environment variables so FastACP, tracing, and the
+Temporal workflow module can be imported without a running server.
+"""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+# AGENT_NAME must match the manifest's agent name: the live test queries the
+# server by this name, and project.workflow reads it at import time.
+os.environ.setdefault("AGENT_NAME", "at-harness-codex")
+os.environ.setdefault("ACP_URL", "http://localhost:8000")
+os.environ.setdefault("WORKFLOW_NAME", "at-harness-codex")
+os.environ.setdefault("WORKFLOW_TASK_QUEUE", "at_harness_codex_queue")
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/manifest.yaml b/examples/tutorials/10_async/10_temporal/harness_codex/manifest.yaml
new file mode 100644
index 000000000..3bc21dccc
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_codex/manifest.yaml
@@ -0,0 +1,62 @@
+build:
+  context:
+    root: ../../../
+    include_paths:
+      - 10_async/10_temporal/harness_codex
+      - test_utils
+    dockerfile: 10_async/10_temporal/harness_codex/Dockerfile
+    dockerignore: 10_async/10_temporal/harness_codex/.dockerignore
+
+local_development:
+  agent:
+    port: 8000
+    host_address: host.docker.internal
+  paths:
+    acp: project/acp.py
+    worker: project/run_worker.py
+
+agent:
+  acp_type: async
+  name: at-harness-codex
+  description: Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess
+
+  temporal:
+    enabled: true
+    workflows:
+      - name: at-harness-codex
+        queue_name: at_harness_codex_queue
+
+  credentials:
+    - env_var_name: OPENAI_API_KEY
+      secret_name: openai-api-key
+      secret_key: api-key
+    - env_var_name: REDIS_URL
+      secret_name: redis-url-secret
+      secret_key: url
+    - env_var_name: SGP_API_KEY
+      secret_name: sgp-api-key
+      secret_key: api-key
+    - env_var_name: SGP_ACCOUNT_ID
+      secret_name: sgp-account-id
+      secret_key: account-id
+    - env_var_name: SGP_CLIENT_BASE_URL
+      secret_name: sgp-client-base-url
+      secret_key: url
+
+deployment:
+  image:
+    repository: ""
+    tag: "latest"
+
+  global:
+    agent:
+      name: "at-harness-codex"
+      description: "Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess"
+    replicaCount: 1
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/__init__.py b/examples/tutorials/10_async/10_temporal/harness_codex/project/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/acp.py b/examples/tutorials/10_async/10_temporal/harness_codex/project/acp.py
new file mode 100644
index 000000000..39a81dde9
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_codex/project/acp.py
@@ -0,0 +1,32 @@
+"""ACP server for the Temporal Codex harness tutorial.
+
+This file is intentionally thin. When ``acp_type="async"`` is combined with
+``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires:
+
+    HTTP task/create       -> @workflow.run on the workflow class
+    HTTP task/event/send   -> @workflow.signal(SignalName.RECEIVE_EVENT)
+    HTTP task/cancel       -> workflow cancellation via the Temporal client
+
+so we don't define any handlers here. The actual agent code lives in
+``project/workflow.py`` and is executed by the Temporal worker
+(``project/run_worker.py``), not by this HTTP process.
+"""
+
+from __future__ import annotations
+
+import os
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from agentex.lib.types.fastacp import TemporalACPConfig
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+
+acp = FastACP.create(
+    acp_type="async",
+    config=TemporalACPConfig(
+        type="temporal",
+        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
+    ),
+)
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/activities.py b/examples/tutorials/10_async/10_temporal/harness_codex/project/activities.py
new file mode 100644
index 000000000..363347635
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_codex/project/activities.py
@@ -0,0 +1,145 @@
+"""Temporal activity for the Codex harness tutorial.
+
+Subprocess spawning (and any other I/O) must run inside a Temporal *activity*,
+not in workflow code. Temporal runs workflow + signal-handler bodies on a
+deterministic sandbox event loop that does not implement ``subprocess_exec``
+(or threads / sockets), so spawning ``codex exec`` directly in the signal
+handler raises ``NotImplementedError``. This activity runs codex, drives the
+``CodexTurn`` through ``UnifiedEmitter.auto_send_turn`` (the async Redis push
+path), and returns the turn result to the workflow.
+
+The ``_spawn_codex`` / ``_process_stdout`` seams are injectable: offline tests
+replace them with fakes that yield pre-recorded event lines so no real CLI
+runs.
+"""
+
+from __future__ import annotations
+
+import os
+import codecs
+import asyncio
+from typing import Any
+from datetime import datetime
+from collections.abc import AsyncIterator
+
+from temporalio import activity
+
+from agentex.lib.adk import CodexTurn
+from agentex.lib.core.harness import UnifiedEmitter
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.utils.model_utils import BaseModel
+
+logger = make_logger(__name__)
+
+RUN_CODEX_TURN_ACTIVITY = "run_codex_turn"
+
+
+class RunCodexTurnParams(BaseModel):
+    """Arguments for one codex turn run inside an activity."""
+
+    task_id: str
+    prompt: str
+    model: str
+    trace_id: str | None = None
+    parent_span_id: str | None = None
+    thread_id: str | None = None
+    created_at: datetime | None = None
+
+
+class RunCodexTurnResult(BaseModel):
+    """Result returned from the activity to the workflow."""
+
+    final_text: str
+    session_id: str | None = None
+    model: str | None = None
+
+
+async def _spawn_codex(
+    model: str,
+    thread_id: str | None = None,
+) -> asyncio.subprocess.Process:
+    """Spawn ``codex exec --json`` locally and return the live process.
+
+    Injection seam: tests replace this function with a fake that returns a
+    mock process whose stdout yields pre-recorded event lines.
+
+    The caller writes the prompt to stdin after the process starts, then
+    closes stdin so codex knows input is complete.
+    """
+    base_flags = [
+        "--json",
+        "--skip-git-repo-check",
+        "--dangerously-bypass-approvals-and-sandbox",
+        "--model",
+        model,
+    ]
+
+    if thread_id:
+        cmd = ["codex", "exec", *base_flags, "resume", thread_id, "-"]
+    else:
+        cmd = ["codex", "exec", *base_flags, "-"]
+
+    return await asyncio.create_subprocess_exec(
+        *cmd,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        # Discard stderr: codex --json writes events to stdout; its stderr is
+        # progress/debug noise. Capturing it with PIPE but never reading it
+        # would deadlock once codex fills the OS pipe buffer (~64 KB).
+        stderr=asyncio.subprocess.DEVNULL,
+        env={**os.environ},
+    )
+
+
+async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]:
+    """Yield newline-delimited JSON lines from the process stdout.
+
+    Uses an incremental UTF-8 decoder so a multibyte character split across two
+    4 KB reads is decoded correctly instead of being corrupted at the boundary.
+    """
+    assert process.stdout is not None
+    decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
+    buffer = ""
+    while True:
+        chunk = await process.stdout.read(4096)
+        if not chunk:
+            break
+        buffer += decoder.decode(chunk)
+        while "\n" in buffer:
+            line, buffer = buffer.split("\n", 1)
+            line = line.strip()
+            if line:
+                yield line
+    buffer += decoder.decode(b"", final=True)
+    if buffer.strip():
+        yield buffer.strip()
+
+
+@activity.defn(name=RUN_CODEX_TURN_ACTIVITY)
+async def run_codex_turn(params: RunCodexTurnParams) -> dict[str, Any]:
+    """Run one codex turn end-to-end and stream events to the task.
+
+    Runs in an activity (real asyncio loop) so subprocess I/O is permitted.
+    """
+    process = await _spawn_codex(params.model, thread_id=params.thread_id)
+
+    assert process.stdin is not None
+    process.stdin.write(params.prompt.encode("utf-8"))
+    await process.stdin.drain()
+    process.stdin.close()
+
+    turn = CodexTurn(events=_process_stdout(process), model=params.model)
+    emitter = UnifiedEmitter(
+        task_id=params.task_id,
+        trace_id=params.trace_id,
+        parent_span_id=params.parent_span_id,
+    )
+    result = await emitter.auto_send_turn(turn, created_at=params.created_at)
+
+    await process.wait()
+
+    return RunCodexTurnResult(
+        final_text=result.final_text,
+        session_id=turn.session_id,
+        model=turn.usage().model,
+    ).model_dump()
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/run_worker.py b/examples/tutorials/10_async/10_temporal/harness_codex/project/run_worker.py
new file mode 100644
index 000000000..b8972806b
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_codex/project/run_worker.py
@@ -0,0 +1,41 @@
+"""Temporal worker for the Codex harness tutorial.
+
+Run as a separate long-lived process alongside the ACP HTTP server. The
+worker polls Temporal for workflow + activity tasks and executes them.
+
+The codex CLI subprocess runs in the ``run_codex_turn`` activity (registered
+below alongside the built-in Agentex activities), because subprocess I/O is not
+permitted on the Temporal workflow event loop.
+"""
+
+import asyncio
+
+from project.workflow import AtHarnessCodexWorkflow
+from project.activities import run_codex_turn
+from agentex.lib.utils.debug import setup_debug_if_enabled
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.activities import get_all_activities
+from agentex.lib.core.temporal.workers.worker import AgentexWorker
+
+environment_variables = EnvironmentVariables.refresh()
+logger = make_logger(__name__)
+
+
+async def main():
+    setup_debug_if_enabled()
+
+    task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE
+    if task_queue_name is None:
+        raise ValueError("WORKFLOW_TASK_QUEUE is not set")
+
+    worker = AgentexWorker(task_queue=task_queue_name)
+
+    await worker.run(
+        activities=[run_codex_turn, *get_all_activities()],
+        workflow=AtHarnessCodexWorkflow,
+    )
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/workflow.py b/examples/tutorials/10_async/10_temporal/harness_codex/project/workflow.py
new file mode 100644
index 000000000..1970b478f
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_codex/project/workflow.py
@@ -0,0 +1,145 @@
+"""Temporal workflow for the Codex harness tutorial.
+
+Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` +
+``UnifiedEmitter`` for a Temporal-durable ACP agent.
+
+KEY CONCEPTS DEMONSTRATED:
+- Running ``codex exec --json`` in the ``run_codex_turn`` activity. Subprocess
+  I/O is not permitted on the Temporal workflow event loop (the deterministic
+  sandbox loop does not implement ``subprocess_exec``), so the signal handler
+  delegates the turn to an activity, which also gets Temporal's retry + timeout
+  guarantees.
+- Wrapping the stdout line stream in a ``CodexTurn`` (inside the activity).
+- Delivering events via ``UnifiedEmitter.auto_send_turn``, which pushes
+  ``StreamTaskMessage*`` events to Redis so the UI sees tokens in real time.
+- Passing ``created_at=workflow.now()`` for deterministic timestamps under
+  Temporal replay (required for Temporal-safe delivery).
+- Persisting the codex thread ID on the workflow instance itself — Temporal's
+  workflow state is durable, so no external ``adk.state`` round-trip is needed.
+"""
+
+from __future__ import annotations
+
+import os
+from datetime import timedelta
+
+from temporalio import workflow
+
+from agentex.lib import adk
+from agentex.lib.types.acp import SendEventParams, CreateTaskParams
+from agentex.lib.types.tracing import SGPTracingProcessorConfig
+from agentex.lib.utils.logging import make_logger
+from agentex.types.text_content import TextContent
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.types.workflow import SignalName
+from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow
+from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config
+
+with workflow.unsafe.imports_passed_through():
+    from project.activities import RunCodexTurnParams, run_codex_turn
+
+add_tracing_processor_config(
+    SGPTracingProcessorConfig(
+        sgp_api_key=os.environ.get("SGP_API_KEY", ""),
+        sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""),
+        sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""),
+    )
+)
+
+environment_variables = EnvironmentVariables.refresh()
+
+if environment_variables.WORKFLOW_NAME is None:
+    raise ValueError("Environment variable WORKFLOW_NAME is not set")
+if environment_variables.AGENT_NAME is None:
+    raise ValueError("Environment variable AGENT_NAME is not set")
+
+logger = make_logger(__name__)
+
+MODEL = os.environ.get("CODEX_MODEL", "o4-mini")
+
+
+@workflow.defn(name=environment_variables.WORKFLOW_NAME)
+class AtHarnessCodexWorkflow(BaseWorkflow):
+    """Long-running Temporal workflow that runs codex exec for each turn.
+
+    Conversation state (codex thread ID + turn counter) is kept on the
+    workflow instance. Temporal's durable replay reconstructs this state if
+    the worker crashes, so no external ``adk.state`` round-trip is needed.
+    """
+
+    def __init__(self):
+        super().__init__(display_name=environment_variables.AGENT_NAME)
+        self._complete_task = False
+        self._turn_number = 0
+        self._codex_thread_id: str | None = None
+
+    @workflow.signal(name=SignalName.RECEIVE_EVENT)
+    async def on_task_event_send(self, params: SendEventParams) -> None:
+        """Handle a new user message: spawn codex, stream events via UnifiedEmitter."""
+        logger.info("Received task event: %s", params.task.id)
+        self._turn_number += 1
+
+        await adk.messages.create(task_id=params.task.id, content=params.event.content)
+
+        user_message = params.event.content.content
+
+        async with adk.tracing.span(
+            trace_id=params.task.id,
+            task_id=params.task.id,
+            name=f"Turn {self._turn_number}",
+            input={"message": user_message},
+        ) as span:
+            # Delegate the subprocess turn to an activity: subprocess I/O is not
+            # permitted on the Temporal workflow event loop. The activity streams
+            # events to the task and returns the final text + codex thread id.
+            # workflow.now() gives a deterministic timestamp under replay.
+            result = await workflow.execute_activity(
+                run_codex_turn,
+                RunCodexTurnParams(
+                    task_id=params.task.id,
+                    prompt=user_message,
+                    model=MODEL,
+                    trace_id=params.task.id,
+                    parent_span_id=span.id if span else None,
+                    thread_id=self._codex_thread_id,
+                    created_at=workflow.now(),
+                ),
+                start_to_close_timeout=timedelta(minutes=5),
+            )
+
+            # Persist the codex thread id so the next turn resumes the session.
+            session_id = result.get("session_id")
+            if session_id:
+                self._codex_thread_id = session_id
+
+            if span:
+                span.output = {
+                    "final_text": result.get("final_text"),
+                    "model": result.get("model"),
+                }
+
+    @workflow.run
+    async def on_task_create(self, params: CreateTaskParams) -> str:
+        """Workflow entry point — keep the conversation alive for incoming signals."""
+        logger.info("Task created: %s", params.task.id)
+
+        await adk.messages.create(
+            task_id=params.task.id,
+            content=TextContent(
+                author="agent",
+                content=(
+                    f"Task initialized.\n"
+                    f"Send me a message and I'll run codex (local subprocess) "
+                    f"to answer, streaming events via the unified harness surface."
+                ),
+            ),
+        )
+
+        await workflow.wait_condition(lambda: self._complete_task, timeout=None)
+        return "Task completed"
+
+    @workflow.signal
+    async def complete_task_signal(self) -> None:
+        """Graceful workflow shutdown signal."""
+        logger.info("Received complete_task signal")
+        self._complete_task = True
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/pyproject.toml b/examples/tutorials/10_async/10_temporal/harness_codex/pyproject.toml
new file mode 100644
index 000000000..c4d67d285
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_codex/pyproject.toml
@@ -0,0 +1,40 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "at-harness-codex"
+version = "0.1.0"
+description = "Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "agentex-sdk",
+    "scale-gp",
+    "temporalio>=1.18.2",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "black",
+    "isort",
+    "flake8",
+    "debugpy>=1.8.15",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["project"]
+
+[tool.black]
+line-length = 88
+target-version = ['py312']
+
+[tool.isort]
+profile = "black"
+line_length = 88
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py
new file mode 100644
index 000000000..2066b35b1
--- /dev/null
+++ b/examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py
@@ -0,0 +1,275 @@
+"""Tests for the Temporal Codex harness tutorial agent.
+
+LIVE tests (``TestLiveCodexAgent``):
+  - Require the ``codex`` CLI on PATH, ``OPENAI_API_KEY``, and a running
+    Temporal + Agentex server.
+  - Skipped automatically when ``CODEX_LIVE_TESTS`` is not set to ``1``.
+
+OFFLINE unit tests (``TestOfflineCodexWorkflow``):
+  - Inject a fake async iterator of pre-recorded codex event lines.
+  - Assert the signal handler drives ``UnifiedEmitter.auto_send_turn`` and
+    captures the codex thread ID on the workflow instance.
+  - Always run.
+"""
+
+from __future__ import annotations
+
+import os
+import json
+from typing import Any
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+SAMPLE_EVENTS: list[dict[str, Any]] = [
+    {"type": "thread.started", "thread_id": "thread-temporal-1"},
+    {"type": "turn.started"},
+    {
+        "type": "item.started",
+        "item": {"id": "msg-t1", "type": "agent_message", "text": "Hello"},
+    },
+    {
+        "type": "item.completed",
+        "item": {"id": "msg-t1", "type": "agent_message", "text": "Hello from Temporal!"},
+    },
+    {
+        "type": "turn.completed",
+        "usage": {"input_tokens": 6, "output_tokens": 3, "total_tokens": 9},
+    },
+]
+
+
+async def _fake_event_stream():
+    """Async iterator of pre-recorded codex event JSON lines (no subprocess)."""
+    for evt in SAMPLE_EVENTS:
+        yield json.dumps(evt)
+
+
+class _FakeSpan:
+    id = "span-temporal-1"
+    output: Any = None
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *a):
+        pass
+
+
+class TestOfflineCodexWorkflow:
+    """Unit tests that run without a real codex CLI, Temporal, or network."""
+
+    @pytest.mark.asyncio
+    async def test_codex_turn_usage_with_temporal_events(self):
+        """CodexTurn.usage() is correct after exhausting the temporal sample events."""
+        from agentex.lib.adk import CodexTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+
+        _ = [e async for e in turn.events]
+
+        usage = turn.usage()
+        assert usage.input_tokens == 6
+        assert usage.output_tokens == 3
+        assert usage.model == "o4-mini"
+
+    @pytest.mark.asyncio
+    async def test_unified_emitter_auto_send_with_created_at(self):
+        """UnifiedEmitter.auto_send_turn accepts created_at=None without error."""
+        from agentex.lib.adk import CodexTurn
+        from agentex.lib.core.harness import UnifiedEmitter
+        from agentex.types.task_message import TaskMessage
+        from agentex.types.text_content import TextContent
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+
+        real_task_msg = TaskMessage(
+            id="msg-fake",
+            task_id="t",
+            content=TextContent(type="text", author="agent", content=""),
+        )
+
+        fake_streaming = MagicMock()
+        fake_ctx = AsyncMock()
+        fake_ctx.__aenter__ = AsyncMock(return_value=fake_ctx)
+        fake_ctx.__aexit__ = AsyncMock(return_value=False)
+        fake_ctx.stream_update = AsyncMock(return_value=MagicMock())
+        fake_ctx.close = AsyncMock()
+        fake_ctx.task_message = real_task_msg
+        fake_streaming.streaming_task_message_context = MagicMock(return_value=fake_ctx)
+
+        emitter = UnifiedEmitter(
+            task_id="t",
+            trace_id=None,
+            parent_span_id=None,
+            streaming=fake_streaming,
+        )
+
+        result = await emitter.auto_send_turn(turn, created_at=None)
+        assert result is not None
+
+    @pytest.mark.asyncio
+    async def test_thread_id_captured_after_exhausted_stream(self):
+        """CodexTurn._result captures the thread_id from thread.started."""
+        from agentex.lib.adk import CodexTurn
+
+        turn = CodexTurn(events=_fake_event_stream(), model="o4-mini")
+        _ = [e async for e in turn.events]
+
+        assert turn._result is not None
+        assert turn._result["session_id"] == "thread-temporal-1"
+
+    @pytest.mark.asyncio
+    async def test_signal_handler_delegates_to_activity_and_captures_thread_id(self):
+        """Signal handler runs the turn via execute_activity, increments the turn
+        counter, and captures the codex thread ID returned by the activity."""
+        captured: dict[str, Any] = {}
+
+        async def _fake_execute_activity(_activity, params, **_kw):
+            captured["params"] = params
+            return {
+                "session_id": "thread-temporal-1",
+                "final_text": "Hello from Temporal!",
+                "model": "o4-mini",
+            }
+
+        with patch("project.workflow.adk.messages.create", new=AsyncMock()), patch(
+            "project.workflow.adk.tracing.span"
+        ) as mock_span, patch(
+            "project.workflow.workflow.execute_activity", new=_fake_execute_activity
+        ), patch("project.workflow.workflow.now", return_value=None):
+            mock_span.return_value = _FakeSpan()
+
+            from project.workflow import AtHarnessCodexWorkflow
+
+            wf = AtHarnessCodexWorkflow.__new__(AtHarnessCodexWorkflow)
+            wf._turn_number = 0
+            wf._codex_thread_id = None
+            wf._complete_task = False
+            wf._display_name = "test"
+
+            params = MagicMock()
+            params.task.id = "task-temporal-offline-1"
+            params.event.content.content = "say hello temporal"
+
+            await wf.on_task_event_send(params)
+
+        assert wf._turn_number == 1
+        assert wf._codex_thread_id == "thread-temporal-1"
+        assert captured["params"].prompt == "say hello temporal"
+        assert captured["params"].thread_id is None
+
+    @pytest.mark.asyncio
+    async def test_run_codex_turn_activity_streams_and_returns_thread_id(self):
+        """The run_codex_turn activity drives the turn and returns the thread id."""
+        from agentex.lib.core.harness import UnifiedEmitter
+
+        async def _fake_spawn(model, thread_id=None):  # noqa: ARG001
+            fake_stdin = MagicMock()
+            fake_stdin.write = MagicMock()
+            fake_stdin.drain = AsyncMock()
+            fake_stdin.close = MagicMock()
+            proc = MagicMock()
+            proc.stdin = fake_stdin
+            proc.wait = AsyncMock(return_value=0)
+            return proc
+
+        async def _fake_process_stdout(_process):  # noqa: ARG001
+            for evt in SAMPLE_EVENTS:
+                yield json.dumps(evt)
+
+        class _FakeTurnResult:
+            final_text = "Hello from Temporal!"
+
+        async def _auto_send(_self, turn, *_a, **_kw):
+            async for _ in turn.events:
+                pass
+            return _FakeTurnResult()
+
+        with patch("project.activities._spawn_codex", new=_fake_spawn), patch(
+            "project.activities._process_stdout", new=_fake_process_stdout
+        ), patch.object(UnifiedEmitter, "auto_send_turn", new=_auto_send):
+            from project.activities import RunCodexTurnParams, run_codex_turn
+
+            result = await run_codex_turn(
+                RunCodexTurnParams(
+                    task_id="task-temporal-offline-1",
+                    prompt="say hello temporal",
+                    model="o4-mini",
+                )
+            )
+
+        assert result["session_id"] == "thread-temporal-1"
+        assert result["final_text"] == "Hello from Temporal!"
+
+
+# ---------------------------------------------------------------------------
+# Live tests
+# ---------------------------------------------------------------------------
+
+LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1"
+AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003")
+AGENT_NAME = os.environ.get("AGENT_NAME", "at-harness-codex")
+
+
+@pytest.mark.skipif(
+    not LIVE,
+    reason="Set CODEX_LIVE_TESTS=1 and ensure codex CLI + OPENAI_API_KEY + Temporal are available",
+)
+class TestLiveCodexAgent:
+    """End-to-end tests that require the real codex CLI, Temporal, and Agentex server."""
+
+    @pytest.fixture
+    def client(self):
+        from agentex import Agentex
+
+        return Agentex(base_url=AGENTEX_API_BASE_URL)
+
+    @pytest.fixture
+    def agent_id(self, client):
+        for agent in client.agents.list():
+            if agent.name == AGENT_NAME:
+                return agent.id
+        raise ValueError(f"Agent {AGENT_NAME!r} not found.")
+
+    def test_send_simple_message(self, client, agent_id: str):
+        """Temporal agents process events out of band, so create a task, send an
+        event, and poll the task's messages for the agent's response."""
+        import time
+        import uuid
+
+        from agentex.types import TextContentParam
+        from agentex.types.agent_rpc_params import ParamsSendEventRequest, ParamsCreateTaskRequest
+
+        task = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)).result
+        assert task is not None
+
+        client.agents.send_event(
+            agent_id=agent_id,
+            params=ParamsSendEventRequest(
+                task_id=task.id,
+                content=TextContentParam(
+                    author="user",
+                    content="What is 5+5? Reply with just the number.",
+                    type="text",
+                ),
+            ),
+        )
+
+        deadline = time.monotonic() + 90
+        while time.monotonic() < deadline:
+            msgs = client.messages.list(task_id=task.id)
+            agent_msgs = [m for m in msgs if getattr(m.content, "author", None) == "agent"]
+            response_msgs = [
+                m for m in agent_msgs if "Task initialized" not in str(getattr(m.content, "content", ""))
+            ]
+            if response_msgs:
+                assert len(response_msgs) >= 1
+                return
+            time.sleep(3)
+
+        raise AssertionError("No agent response received within 90 s")
diff --git a/src/agentex/lib/adk/__init__.py b/src/agentex/lib/adk/__init__.py
index c2b343b72..f6713be7c 100644
--- a/src/agentex/lib/adk/__init__.py
+++ b/src/agentex/lib/adk/__init__.py
@@ -18,6 +18,8 @@
     ClaudeCodeTurn,
     claude_code_usage_to_turn_usage,
 )
+from agentex.lib.adk._modules._codex_sync import convert_codex_to_agentex_events
+from agentex.lib.adk._modules._codex_turn import CodexTurn, codex_usage_to_turn_usage
 from agentex.lib.adk._modules.events import EventsModule
 from agentex.lib.adk._modules.messages import MessagesModule
 from agentex.lib.adk._modules.state import StateModule
@@ -63,6 +65,10 @@
     "convert_claude_code_to_agentex_events",
     "ClaudeCodeTurn",
     "claude_code_usage_to_turn_usage",
+    # Codex
+    "convert_codex_to_agentex_events",
+    "CodexTurn",
+    "codex_usage_to_turn_usage",
     # Providers
     "providers",
     # Utils
diff --git a/src/agentex/lib/adk/_modules/_codex_sync.py b/src/agentex/lib/adk/_modules/_codex_sync.py
new file mode 100644
index 000000000..b2b162a24
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_codex_sync.py
@@ -0,0 +1,587 @@
+"""Codex event-stream parser tap for the unified harness surface.
+
+Converts a ``codex exec --json`` newline-delimited event stream (already
+produced by the golden agent's sandbox/subprocess orchestration) into the
+Agentex canonical ``StreamTaskMessage*`` events.
+
+SCOPE
+-----
+This module is a **pure parser**. It receives pre-produced codex events
+(``str`` lines or already-decoded ``dict`` objects) and yields canonical
+``StreamTaskMessage*`` events. All subprocess management, sandbox
+provisioning, secret injection, and MCP orchestration remain in the golden
+agent at
+``teams/sgp/agents/golden_agent/project/harness/providers/codex.py``.
+
+No deployable test agent is included here: running codex requires the
+golden agent's sandbox environment and is out of scope for this library tap.
+
+OUT OF SCOPE (document here so future callers are not surprised):
+- Subprocess / sandbox management
+- OPENAI_API_KEY / secret injection
+- MCP server configuration (--config /tmp/codex_config.toml)
+- ``codex exec resume`` session tracking
+- ``scale_sandbox`` imports
+
+CANONICAL MAPPING
+-----------------
+The table below lists every ``type`` field the codex exec JSON stream can
+emit (from ``codex-rs/exec/src/exec_events.rs``) and its mapping.
+
+Top-level event types
+~~~~~~~~~~~~~~~~~~~~~
+  thread.started          -> (no StreamTaskMessage; session_id captured
+                              internally; surfaced via ``on_result`` callback)
+  turn.started            -> (no StreamTaskMessage; turn was started before
+                              codex launched; nothing to emit here)
+  turn.completed          -> on_result(usage_dict, tool_count, reasoning_count)
+                             yields no StreamTaskMessage (turn lifecycle is
+                             managed by the activity layer)
+  turn.failed             -> StreamTaskMessageFull(TextContent, error text)
+  error                   -> StreamTaskMessageFull(TextContent, error text)
+
+Item sub-types (item.started / item.updated / item.completed)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  agent_message           -> text deltas:
+                               item.started / item.updated  -> StreamTaskMessageDelta(TextDelta)
+                               item.completed               -> StreamTaskMessageDone
+  reasoning               -> reasoning:
+                               item.started                 -> StreamTaskMessageStart(ReasoningContent)
+                               item.updated                 -> (no-op; final text arrives on completed)
+                               item.completed               -> StreamTaskMessageFull(ReasoningContent)
+  command_execution       -> tool request + response:
+                               item.started                 -> StreamTaskMessageStart(ToolRequestContent)
+                                                              + StreamTaskMessageDone
+                               item.completed               -> StreamTaskMessageFull(ToolResponseContent)
+  file_change             -> same as command_execution
+                             NOTE: file_change may only emit item.completed (no started);
+                             a synthetic ToolRequestContent Full is emitted before the response.
+  mcp_tool_call           -> same as command_execution
+  web_search              -> same as command_execution
+  todo_list               -> same as command_execution
+  collab_tool_call        -> same as command_execution
+  error (item type)       -> StreamTaskMessageFull(TextContent, error text) on completed only
+
+UNMAPPED / PARTIALLY MAPPED EVENTS
+-----------------------------------
+  thread.started:         session_id is extracted but not forwarded as a
+                          StreamTaskMessage (no canonical content type for
+                          session-lifecycle signals; captured in on_result).
+  turn.started:           no-op; intentional (the caller owns turn lifecycle).
+  turn.completed:         no StreamTaskMessage; usage is forwarded via
+                          on_result so the caller can record it in a span
+                          without this module needing to know about spans.
+  item.updated (reasoning): the intermediate cumulative text is discarded;
+                            only item.completed carries the final text.
+  item.updated (tool):    tool item types other than agent_message do not
+                          emit updates; item.started opens the request and
+                          item.completed closes it.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, Callable, AsyncIterator
+
+from agentex.lib.utils.logging import make_logger
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.task_message_content import TextContent
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+logger = make_logger(__name__)
+
+# Canonical type alias matching the unified harness surface.
+StreamTaskMessage = StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone
+
+_MAX_RESULT_LENGTH = 4000
+
+
+def _truncate(text: str, max_len: int = _MAX_RESULT_LENGTH) -> str:
+    return str(text)[:max_len]
+
+
+def _tool_name_for(item_type: str, payload: dict[str, Any]) -> str:
+    """Derive a canonical tool name from a codex item type."""
+    if item_type == "command_execution":
+        return "bash"
+    if item_type == "file_change":
+        return "file_change"
+    if item_type == "mcp_tool_call":
+        server = payload.get("server", "")
+        tool = payload.get("tool", "")
+        return f"{server}.{tool}" if (server or tool) else "mcp_tool_call"
+    if item_type == "web_search":
+        return "web_search"
+    if item_type == "todo_list":
+        return "todo_list"
+    if item_type == "collab_tool_call":
+        return "collab_tool_call"
+    return item_type or "unknown"
+
+
+def _tool_args_for(item_type: str, payload: dict[str, Any]) -> dict[str, Any]:
+    """Extract canonical arguments dict from a codex item payload."""
+    if item_type == "command_execution":
+        return {"command": payload.get("command", "")}
+    if item_type == "file_change":
+        return {"changes": payload.get("changes") or []}
+    if item_type == "mcp_tool_call":
+        args = payload.get("arguments")
+        return args if isinstance(args, dict) else {"value": args}
+    if item_type == "web_search":
+        return {"query": payload.get("query", "")}
+    if item_type == "todo_list":
+        return {"items": payload.get("items") or []}
+    if item_type == "collab_tool_call":
+        # Surface an arguments dict if the payload carries one (mirrors
+        # mcp_tool_call); otherwise no args rather than fabricating a shape.
+        args = payload.get("arguments")
+        return args if isinstance(args, dict) else {}
+    return {}
+
+
+def _tool_output_for(item_type: str, payload: dict[str, Any]) -> tuple[str, bool]:
+    """Extract (result_text, is_error) from a completed codex tool item."""
+    if item_type == "command_execution":
+        out = payload.get("aggregated_output") or ""
+        exit_code = payload.get("exit_code")
+        is_error = exit_code is not None and exit_code != 0
+        return _truncate(out), is_error
+    if item_type in ("mcp_tool_call", "collab_tool_call"):
+        # collab_tool_call mirrors mcp_tool_call's error/result convention
+        # (see _tool_args_for); without this branch a failed collab call would
+        # fall through to the generic path and be reported as a success.
+        err = payload.get("error")
+        if err:
+            msg = err.get("message", "") if isinstance(err, dict) else str(err)
+            return _truncate(f"Error: {msg}"), True
+        result = payload.get("result")
+        if result is None:
+            return "", False
+        try:
+            return _truncate(json.dumps(result)), False
+        except (TypeError, ValueError):
+            return _truncate(str(result)), False
+    if item_type == "file_change":
+        changes = payload.get("changes") or []
+        status = payload.get("status", "")
+        return f"status={status}, {len(changes)} changes", status == "failed"
+    try:
+        return _truncate(json.dumps(payload, default=str)), False
+    except (TypeError, ValueError):
+        return _truncate(str(payload)), False
+
+
+def _error_full(message: str, next_index: int) -> StreamTaskMessageFull:
+    """Emit a one-shot TextContent full message for an error."""
+    return StreamTaskMessageFull(
+        type="full",
+        index=next_index,
+        content=TextContent(
+            type="text",
+            author="agent",
+            content=f"Error: {message}",
+            format="plain",
+        ),
+    )
+
+
+class _CodexStreamProcessor:
+    """Stateful parser: consumes codex exec events, yields StreamTaskMessage*.
+
+    Ported from the golden agent's ``_CodexEventProcessor`` in
+    ``project/harness/providers/codex.py``, adapted to yield
+    ``StreamTaskMessage*`` directly instead of ``HarnessEvent`` objects.
+
+    State tracked:
+    - ``_next_index``: monotonically increasing message index.
+    - ``_text_index``: message index of the current open agent_message block.
+    - ``_text_accumulated``: cumulative text per agent_message item_id.
+    - ``_reasoning_index``: message index of the current open reasoning block.
+    - ``_reasoning_text``: latest cumulative reasoning text per item_id.
+    - ``_tool_open``: item_ids for which a ToolRequestContent Start was emitted
+       but no ToolResponseContent Full yet.
+    - ``_tool_item_types``: item_id -> item_type for open tool calls.
+    """
+
+    def __init__(self) -> None:
+        self._next_index: int = 0
+
+        # agent_message tracking
+        self._text_index: dict[str, int] = {}
+        self._text_accumulated: dict[str, str] = {}
+
+        # reasoning tracking
+        self._reasoning_index: dict[str, int] = {}
+        self._reasoning_text: dict[str, str] = {}
+
+        # tool tracking
+        self._tool_open: set[str] = set()
+        self._tool_item_types: dict[str, str] = {}
+        # Remember the tool_call_id assigned per item so the request and response
+        # halves agree even when item_id is empty (a recomputed fallback would
+        # drift as tool_call_count advances between started and completed).
+        self._tool_call_ids: dict[str, str] = {}
+
+        # counters for on_result callback
+        self.tool_call_count: int = 0
+        self.reasoning_count: int = 0
+        self.session_id: str | None = None
+
+    def _alloc(self) -> int:
+        idx = self._next_index
+        self._next_index += 1
+        return idx
+
+    def process(self, evt: dict[str, Any]) -> list[StreamTaskMessage]:
+        evt_type = evt.get("type", "")
+
+        if evt_type == "thread.started":
+            sid = evt.get("thread_id") or ""
+            if sid:
+                self.session_id = sid
+            return []
+
+        if evt_type == "turn.started":
+            # The activity layer owns turn lifecycle; nothing to emit.
+            return []
+
+        if evt_type == "turn.completed":
+            # Usage forwarded via on_result callback (not a StreamTaskMessage).
+            return []
+
+        if evt_type == "turn.failed":
+            err = evt.get("error") or {}
+            msg = err.get("message", "codex turn failed") if isinstance(err, dict) else str(err)
+            return [_error_full(f"Codex turn failed: {msg}", self._alloc())]
+
+        if evt_type == "error":
+            return [_error_full(evt.get("message", "codex error"), self._alloc())]
+
+        if evt_type in ("item.started", "item.updated", "item.completed"):
+            item = evt.get("item") or {}
+            return self._handle_item(evt_type, item)
+
+        logger.debug("[codex] unhandled event type=%s", evt_type)
+        return []
+
+    def _handle_item(self, evt_type: str, item: dict[str, Any]) -> list[StreamTaskMessage]:
+        item_id = item.get("id") or ""
+        item_type = item.get("type") or ""
+        out: list[StreamTaskMessage] = []
+
+        if item_type == "agent_message":
+            current = item.get("text") or ""
+            previous = self._text_accumulated.get(item_id, "")
+
+            if evt_type in ("item.started", "item.updated"):
+                if item_id not in self._text_index:
+                    idx = self._alloc()
+                    self._text_index[item_id] = idx
+                    out.append(
+                        StreamTaskMessageStart(
+                            type="start",
+                            index=idx,
+                            content=TextContent(
+                                type="text",
+                                author="agent",
+                                content="",
+                            ),
+                        )
+                    )
+                idx = self._text_index[item_id]
+                delta = ""
+                if current.startswith(previous) and len(current) > len(previous):
+                    delta = current[len(previous) :]
+                elif current and current != previous:
+                    delta = current
+                if delta:
+                    out.append(
+                        StreamTaskMessageDelta(
+                            type="delta",
+                            index=idx,
+                            delta=TextDelta(type="text", text_delta=delta),
+                        )
+                    )
+                self._text_accumulated[item_id] = current
+
+            elif evt_type == "item.completed":
+                if item_id not in self._text_index:
+                    idx = self._alloc()
+                    self._text_index[item_id] = idx
+                    out.append(
+                        StreamTaskMessageStart(
+                            type="start",
+                            index=idx,
+                            content=TextContent(
+                                type="text",
+                                author="agent",
+                                content="",
+                            ),
+                        )
+                    )
+                idx = self._text_index[item_id]
+                delta = ""
+                if current.startswith(previous) and len(current) > len(previous):
+                    delta = current[len(previous) :]
+                elif current and current != previous:
+                    delta = current
+                if delta:
+                    out.append(
+                        StreamTaskMessageDelta(
+                            type="delta",
+                            index=idx,
+                            delta=TextDelta(type="text", text_delta=delta),
+                        )
+                    )
+                out.append(StreamTaskMessageDone(type="done", index=idx))
+                self._text_accumulated[item_id] = current
+
+        elif item_type == "reasoning":
+            current = item.get("text") or ""
+
+            if evt_type == "item.started":
+                idx = self._alloc()
+                self._reasoning_index[item_id] = idx
+                self._reasoning_text[item_id] = current
+                out.append(
+                    StreamTaskMessageStart(
+                        type="start",
+                        index=idx,
+                        content=ReasoningContent(
+                            type="reasoning",
+                            author="agent",
+                            summary=[],
+                            content=[],
+                            style="active",
+                        ),
+                    )
+                )
+                if current:
+                    out.append(
+                        StreamTaskMessageDelta(
+                            type="delta",
+                            index=idx,
+                            delta=ReasoningContentDelta(
+                                type="reasoning_content",
+                                content_index=0,
+                                content_delta=current,
+                            ),
+                        )
+                    )
+
+            elif evt_type == "item.updated":
+                # Accumulate silently; final text arrives on item.completed.
+                self._reasoning_text[item_id] = current
+
+            elif evt_type == "item.completed":
+                text = current or self._reasoning_text.get(item_id, "")
+                idx = self._reasoning_index.get(item_id)
+                if text:
+                    self.reasoning_count += 1
+                    summary = text.strip().split("\n", 1)[0][:300]
+                    final_content = ReasoningContent(
+                        type="reasoning",
+                        author="agent",
+                        summary=[summary],
+                        content=[text],
+                        style="static",
+                    )
+                    if idx is not None:
+                        out.append(
+                            StreamTaskMessageFull(
+                                type="full",
+                                index=idx,
+                                content=final_content,
+                            )
+                        )
+                    else:
+                        # No started event was seen; emit a standalone Full.
+                        out.append(
+                            StreamTaskMessageFull(
+                                type="full",
+                                index=self._alloc(),
+                                content=final_content,
+                            )
+                        )
+                elif idx is not None:
+                    # Empty reasoning block — still need to close with a Done.
+                    out.append(StreamTaskMessageDone(type="done", index=idx))
+
+        elif item_type in (
+            "command_execution",
+            "file_change",
+            "mcp_tool_call",
+            "web_search",
+            "todo_list",
+            "collab_tool_call",
+        ):
+            # Resolve a stable id once per item; reuse it for both halves.
+            tool_call_id = self._tool_call_ids.get(item_id)
+            if tool_call_id is None:
+                tool_call_id = item_id or f"codex_tool_{self.tool_call_count + 1}"
+                self._tool_call_ids[item_id] = tool_call_id
+
+            if evt_type == "item.started":
+                self.tool_call_count += 1
+                self._tool_open.add(item_id)
+                self._tool_item_types[item_id] = item_type
+                name = _tool_name_for(item_type, item)
+                args = _tool_args_for(item_type, item)
+                req_idx = self._alloc()
+                out.append(
+                    StreamTaskMessageStart(
+                        type="start",
+                        index=req_idx,
+                        content=ToolRequestContent(
+                            type="tool_request",
+                            author="agent",
+                            tool_call_id=tool_call_id,
+                            name=name,
+                            arguments=args,
+                        ),
+                    )
+                )
+                out.append(StreamTaskMessageDone(type="done", index=req_idx))
+
+            elif evt_type == "item.completed":
+                # file_change items may only emit item.completed (no started).
+                if item_id not in self._tool_open:
+                    self.tool_call_count += 1
+                    self._tool_open.add(item_id)
+                    self._tool_item_types[item_id] = item_type
+                    name = _tool_name_for(item_type, item)
+                    args = _tool_args_for(item_type, item)
+                    req_idx = self._alloc()
+                    out.append(
+                        StreamTaskMessageFull(
+                            type="full",
+                            index=req_idx,
+                            content=ToolRequestContent(
+                                type="tool_request",
+                                author="agent",
+                                tool_call_id=tool_call_id,
+                                name=name,
+                                arguments=args,
+                            ),
+                        )
+                    )
+
+                actual_type = self._tool_item_types.get(item_id, item_type)
+                result_text, is_error = _tool_output_for(actual_type, item)
+                name = _tool_name_for(actual_type, item)
+                resp_content: dict[str, Any] = {"result": result_text}
+                if is_error:
+                    resp_content["is_error"] = True
+                out.append(
+                    StreamTaskMessageFull(
+                        type="full",
+                        index=self._alloc(),
+                        content=ToolResponseContent(
+                            type="tool_response",
+                            author="agent",
+                            tool_call_id=tool_call_id,
+                            name=name,
+                            content=resp_content,
+                        ),
+                    )
+                )
+                self._tool_open.discard(item_id)
+                # Free the id mapping so a later item reusing an empty id gets a
+                # fresh fallback rather than colliding with this one.
+                self._tool_call_ids.pop(item_id, None)
+
+        elif item_type == "error":
+            if evt_type == "item.completed":
+                out.append(_error_full(item.get("message", "codex item error"), self._alloc()))
+
+        else:
+            logger.debug("[codex] unhandled item type=%s evt=%s", item_type, evt_type)
+
+        return out
+
+
+async def convert_codex_to_agentex_events(
+    events: AsyncIterator[str | dict[str, Any]],
+    on_result: Callable[[dict[str, Any]], None] | None = None,
+) -> AsyncIterator[StreamTaskMessage]:
+    """Convert a ``codex exec --json`` event stream into Agentex stream events.
+
+    This is a pure parser tap. The caller must supply ``events`` as an async
+    iterator of either raw newline-delimited JSON strings or pre-decoded dicts.
+    No subprocess or sandbox management is done here.
+
+    Args:
+        events: Async iterator of ``str`` (newline-delimited JSON lines) or
+            ``dict`` (pre-decoded event objects) as produced by the codex CLI's
+            ``--json`` flag via sandbox stdout.
+        on_result: Optional callback invoked once when a ``turn.completed``
+            event is seen. Receives a dict with keys:
+                ``usage``           — the raw codex usage dict (or None)
+                ``session_id``      — the codex thread_id (or None)
+                ``tool_call_count`` — int
+                ``reasoning_count`` — int
+            Use this to record turn-level metrics / usage in the caller's span
+            without coupling this module to span/tracing APIs.
+
+    Yields:
+        Canonical ``StreamTaskMessage*`` events (Start/Delta/Full/Done) with
+        ``TextContent``, ``ReasoningContent``, ``ToolRequestContent``, or
+        ``ToolResponseContent`` payloads.
+
+    MAPPING (abbreviated — see module docstring for the full table)
+        thread.started          -> no event; session_id captured for on_result
+        turn.started            -> no event
+        turn.completed          -> no event; triggers on_result callback
+        turn.failed / error     -> StreamTaskMessageFull(TextContent, error)
+        agent_message           -> Start + Deltas + Done
+        reasoning               -> Start + Full(ReasoningContent)
+        command_execution       -> Start(ToolRequest)+Done + Full(ToolResponse)
+        file_change             -> Full(ToolRequest) + Full(ToolResponse)
+        mcp_tool_call           -> Start(ToolRequest)+Done + Full(ToolResponse)
+        web_search / todo_list  -> Start(ToolRequest)+Done + Full(ToolResponse)
+        collab_tool_call        -> Start(ToolRequest)+Done + Full(ToolResponse)
+    """
+    processor = _CodexStreamProcessor()
+    _pending_usage: dict[str, Any] | None = None
+
+    async for raw in events:
+        if isinstance(raw, dict):
+            evt = raw
+        else:
+            line = raw.strip() if isinstance(raw, str) else ""
+            if not line:
+                continue
+            try:
+                evt = json.loads(line)
+            except json.JSONDecodeError:
+                logger.debug("[codex] non-JSON line: %s", line[:100])
+                continue
+
+        # Capture usage before processing so on_result can fire after flush.
+        if evt.get("type") == "turn.completed":
+            usage = evt.get("usage")
+            _pending_usage = usage if isinstance(usage, dict) else None
+
+        messages = processor.process(evt)
+        for msg in messages:
+            yield msg
+
+    if on_result is not None:
+        on_result(
+            {
+                "usage": _pending_usage,
+                "session_id": processor.session_id,
+                "tool_call_count": processor.tool_call_count,
+                "reasoning_count": processor.reasoning_count,
+            }
+        )
diff --git a/src/agentex/lib/adk/_modules/_codex_turn.py b/src/agentex/lib/adk/_modules/_codex_turn.py
new file mode 100644
index 000000000..e7fa1d929
--- /dev/null
+++ b/src/agentex/lib/adk/_modules/_codex_turn.py
@@ -0,0 +1,214 @@
+"""CodexTurn: HarnessTurn implementation for the codex event-stream tap.
+
+Wraps ``convert_codex_to_agentex_events`` so callers can pass a ``CodexTurn``
+directly to ``UnifiedEmitter.yield_turn`` or ``UnifiedEmitter.auto_send_turn``.
+
+Usage::
+
+    from agentex.lib.adk import convert_codex_to_agentex_events
+    from agentex.lib.adk._modules._codex_turn import CodexTurn, codex_usage_to_turn_usage
+
+    turn = CodexTurn(events=codex_event_stream, model="o4-mini")
+    async for msg in emitter.yield_turn(turn):
+        yield msg
+    turn_usage = turn.usage()
+
+OUT OF SCOPE
+------------
+Like ``_codex_sync``, this module is a pure library tap. Subprocess
+provisioning, sandbox setup, secret injection, and MCP configuration remain
+in the golden agent (``teams/sgp/agents/golden_agent/project/harness/``).
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+from agentex.lib.core.harness.types import TurnUsage
+from agentex.lib.adk._modules._codex_sync import (
+    StreamTaskMessage,
+    convert_codex_to_agentex_events,
+)
+
+
+def codex_usage_to_turn_usage(
+    raw: dict[str, Any] | None,
+    *,
+    model: str | None = None,
+    tool_call_count: int = 0,
+    reasoning_count: int = 0,
+    duration_ms: int | None = None,
+    cost_usd: float | None = None,
+) -> TurnUsage:
+    """Map a raw codex ``turn.completed`` usage dict to a canonical ``TurnUsage``.
+
+    Codex reports token usage under the ``usage`` key of the
+    ``turn.completed`` event. The shape follows the OpenAI completion_tokens
+    convention because codex is built on OpenAI models:
+
+    .. code-block:: json
+
+        {
+            "input_tokens": 1234,
+            "output_tokens": 456,
+            "total_tokens": 1690
+        }
+
+    Additionally, codex may report ``reasoning_tokens`` for o-series models:
+
+    .. code-block:: json
+
+        {
+            "input_tokens": 1234,
+            "output_tokens": 456,
+            "reasoning_tokens": 200,
+            "total_tokens": 1690
+        }
+
+    Defensive rules:
+    - Missing ``raw`` or missing sub-keys default to ``None`` (not zero) so
+      downstream callers can distinguish "not reported" from "reported as 0".
+    - Real zeros (``0`` explicitly present in ``raw``) are preserved as ``0``.
+    - ``total_tokens`` is accepted from the payload or left as ``None``;
+      callers should not recompute it because codex may use cached tokens.
+    - ``cost_usd`` is passed through when codex reports it (not yet common);
+      defaults to ``None`` if absent.
+
+    Args:
+        raw: The raw codex usage dict from ``turn.completed``, or ``None``.
+        model: Model string (e.g. "o4-mini") to attach to the usage record.
+        tool_call_count: Number of tool calls in the turn (from processor).
+        reasoning_count: Number of reasoning blocks (from processor).
+        duration_ms: Wall-clock duration of the turn in milliseconds.
+        cost_usd: Cost in USD if the caller can derive it; ``None`` otherwise.
+
+    Returns:
+        A populated ``TurnUsage`` instance.
+    """
+    if not isinstance(raw, dict):
+        raw = {}
+
+    def _int_or_none(key: str) -> int | None:
+        val = raw.get(key)
+        if val is None:
+            return None
+        try:
+            return int(val)
+        except (TypeError, ValueError):
+            return None
+
+    def _float_or_none(key: str) -> float | None:
+        val = raw.get(key)
+        if val is None:
+            return None
+        try:
+            return float(val)
+        except (TypeError, ValueError):
+            return None
+
+    # cost_usd: prefer explicitly passed value, then fall back to raw payload.
+    effective_cost = cost_usd if cost_usd is not None else _float_or_none("cost_usd")
+
+    return TurnUsage(
+        model=model or None,
+        input_tokens=_int_or_none("input_tokens"),
+        output_tokens=_int_or_none("output_tokens"),
+        cached_input_tokens=_int_or_none("cached_input_tokens"),
+        reasoning_tokens=_int_or_none("reasoning_tokens"),
+        total_tokens=_int_or_none("total_tokens"),
+        cost_usd=effective_cost,
+        duration_ms=duration_ms,
+        num_llm_calls=1,
+        num_tool_calls=tool_call_count,
+        num_reasoning_blocks=reasoning_count,
+    )
+
+
+class CodexTurn:
+    """A single codex turn as a ``HarnessTurn``.
+
+    Implements the ``HarnessTurn`` protocol so it can be passed to
+    ``UnifiedEmitter.yield_turn`` and ``UnifiedEmitter.auto_send_turn``.
+
+    ``usage()`` is valid only after ``events`` has been fully consumed (i.e.
+    the async generator has been exhausted). Calling ``usage()`` before
+    exhaustion returns a zero-value ``TurnUsage`` with only ``model`` set.
+
+    Args:
+        events: An async iterator of ``str | dict`` codex events, as
+            produced by reading ``codex exec --json`` stdout line by line.
+        model: Model string to attach to the ``TurnUsage``.
+        duration_ms: Optional turn wall-clock duration in milliseconds.
+        cost_usd: Optional cost in USD; ``None`` if not known.
+    """
+
+    def __init__(
+        self,
+        events: AsyncIterator[str | dict[str, Any]],
+        *,
+        model: str | None = None,
+        duration_ms: int | None = None,
+        cost_usd: float | None = None,
+    ) -> None:
+        self._raw_events = events
+        self._model = model
+        # Public + mutable: the true wall-clock duration (and cost) is usually
+        # only known after the stream is consumed, so callers may set these
+        # after construction and before calling usage().
+        self.duration_ms = duration_ms
+        self.cost_usd = cost_usd
+
+        # Populated by the on_result callback once the stream is exhausted.
+        self._result: dict[str, Any] | None = None
+        # The events generator is created at most once: ``_raw_events`` is a
+        # single-consumption AsyncIterator, so re-wrapping it would yield an
+        # already-exhausted stream that fires on_result with zeros and clobbers
+        # ``_result``. Cache the generator and hand back the same instance.
+        self._events_gen: AsyncIterator[StreamTaskMessage] | None = None
+
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]:
+        """Async iterator of canonical ``StreamTaskMessage*`` events.
+
+        The ``on_result`` callback populates ``_result`` when the underlying
+        codex stream ends, so ``usage()`` returns meaningful data after
+        exhaustion. Returns the same generator on every access so the underlying
+        stream is consumed (and ``on_result`` fires) exactly once.
+        """
+        if self._events_gen is None:
+            self._events_gen = convert_codex_to_agentex_events(
+                self._raw_events,
+                on_result=self._on_result,
+            )
+        return self._events_gen
+
+    def _on_result(self, result: dict[str, Any]) -> None:
+        self._result = result
+
+    @property
+    def session_id(self) -> str | None:
+        """The codex session id, for resuming a multi-turn session.
+
+        Valid only after ``events`` has been fully consumed (populated by the
+        ``on_result`` callback). Returns ``None`` if the stream is not yet
+        exhausted or codex reported no session id.
+        """
+        return self._result.get("session_id") if self._result else None
+
+    def usage(self) -> TurnUsage:
+        """Return normalized ``TurnUsage`` for this turn.
+
+        Valid only after ``events`` has been fully consumed. Returns a
+        zero-value ``TurnUsage`` (model set, counts zero, tokens None) if
+        called before the stream ends.
+        """
+        if self._result is None:
+            return TurnUsage(model=self._model)
+        return codex_usage_to_turn_usage(
+            self._result.get("usage"),
+            model=self._model,
+            tool_call_count=self._result.get("tool_call_count", 0),
+            reasoning_count=self._result.get("reasoning_count", 0),
+            duration_ms=self.duration_ms,
+            cost_usd=self.cost_usd,
+        )
diff --git a/tests/lib/adk/test_codex_sync.py b/tests/lib/adk/test_codex_sync.py
new file mode 100644
index 000000000..d0093e5dd
--- /dev/null
+++ b/tests/lib/adk/test_codex_sync.py
@@ -0,0 +1,671 @@
+"""Offline tests for the codex event-stream parser tap.
+
+Tests cover:
+- Text streaming (agent_message items)
+- Tool call streaming (command_execution, mcp_tool_call, file_change)
+- Reasoning streaming (reasoning items)
+- Multi-step turns
+- Error events (top-level + item-level)
+- Edge cases: empty events, non-JSON lines, unknown types
+- on_result callback (session_id, usage, counters)
+- file_change synthesized start (no item.started emitted by codex)
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any, AsyncIterator
+
+from agentex.types.reasoning_content import ReasoningContent
+from agentex.types.task_message_delta import TextDelta
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.types.task_message_content import TextContent
+from agentex.types.tool_request_content import ToolRequestContent
+from agentex.types.tool_response_content import ToolResponseContent
+from agentex.lib.adk._modules._codex_sync import (
+    _truncate,
+    _tool_args_for,
+    _tool_name_for,
+    _tool_output_for,
+    convert_codex_to_agentex_events,
+)
+from agentex.types.reasoning_content_delta import ReasoningContentDelta
+
+
+async def _aiter(items: list[Any]) -> AsyncIterator[Any]:
+    for item in items:
+        yield item
+
+
+async def _collect(stream: AsyncIterator[Any]) -> list[Any]:
+    return [e async for e in stream]
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+class TestHelpers:
+    def test_truncate_short(self) -> None:
+        assert _truncate("hello", max_len=10) == "hello"
+
+    def test_truncate_long(self) -> None:
+        assert _truncate("a" * 5000) == "a" * 4000
+
+    def test_tool_name_command_execution(self) -> None:
+        assert _tool_name_for("command_execution", {}) == "bash"
+
+    def test_tool_name_file_change(self) -> None:
+        assert _tool_name_for("file_change", {}) == "file_change"
+
+    def test_tool_name_mcp_with_server_and_tool(self) -> None:
+        assert _tool_name_for("mcp_tool_call", {"server": "fs", "tool": "read"}) == "fs.read"
+
+    def test_tool_name_mcp_empty(self) -> None:
+        assert _tool_name_for("mcp_tool_call", {}) == "mcp_tool_call"
+
+    def test_tool_name_unknown(self) -> None:
+        assert _tool_name_for("", {}) == "unknown"
+
+    def test_tool_args_command(self) -> None:
+        assert _tool_args_for("command_execution", {"command": "ls"}) == {"command": "ls"}
+
+    def test_tool_args_file_change(self) -> None:
+        assert _tool_args_for("file_change", {"changes": ["a"]}) == {"changes": ["a"]}
+
+    def test_tool_args_mcp_dict(self) -> None:
+        assert _tool_args_for("mcp_tool_call", {"arguments": {"k": "v"}}) == {"k": "v"}
+
+    def test_tool_args_mcp_non_dict(self) -> None:
+        assert _tool_args_for("mcp_tool_call", {"arguments": "str"}) == {"value": "str"}
+
+    def test_tool_output_command_success(self) -> None:
+        text, is_err = _tool_output_for("command_execution", {"aggregated_output": "hello", "exit_code": 0})
+        assert text == "hello"
+        assert is_err is False
+
+    def test_tool_output_command_error(self) -> None:
+        _, is_err = _tool_output_for("command_execution", {"aggregated_output": "boom", "exit_code": 1})
+        assert is_err is True
+
+    def test_tool_output_mcp_error(self) -> None:
+        text, is_err = _tool_output_for("mcp_tool_call", {"error": {"message": "not found"}})
+        assert "not found" in text
+        assert is_err is True
+
+    def test_tool_output_mcp_result(self) -> None:
+        text, is_err = _tool_output_for("mcp_tool_call", {"result": {"data": 1}})
+        assert json.loads(text) == {"data": 1}
+        assert is_err is False
+
+    def test_tool_output_file_change_failed(self) -> None:
+        _, is_err = _tool_output_for("file_change", {"status": "failed", "changes": []})
+        assert is_err is True
+
+    def test_tool_output_file_change_ok(self) -> None:
+        text, is_err = _tool_output_for("file_change", {"status": "ok", "changes": [1, 2]})
+        assert "2 changes" in text
+        assert is_err is False
+
+
+# ---------------------------------------------------------------------------
+# Text streaming
+# ---------------------------------------------------------------------------
+
+
+class TestTextStreaming:
+    async def test_text_start_delta_done(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": "Hi"}},
+            {"type": "item.updated", "item": {"id": "m1", "type": "agent_message", "text": "Hi!"}},
+            {"type": "item.completed", "item": {"id": "m1", "type": "agent_message", "text": "Hi! Done"}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, TextContent)
+        assert len(deltas) >= 1
+        all_delta_text = "".join(
+            d.delta.text_delta for d in deltas if isinstance(d.delta, TextDelta) and d.delta.text_delta is not None
+        )
+        assert "Hi" in all_delta_text
+        assert len(dones) == 1
+
+    async def test_text_indices_are_monotonic(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": "A"}},
+            {"type": "item.completed", "item": {"id": "m1", "type": "agent_message", "text": "A"}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        anchor = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        done = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+        assert anchor[0].index == done[0].index
+
+    async def test_empty_text_no_delta(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": ""}},
+            {"type": "item.completed", "item": {"id": "m1", "type": "agent_message", "text": ""}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta)]
+        assert deltas == []
+
+    async def test_text_author_is_agent(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": "X"}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        for e in out:
+            content = getattr(e, "content", None)
+            if content and hasattr(content, "author"):
+                assert content.author == "agent"
+
+
+# ---------------------------------------------------------------------------
+# Tool call streaming
+# ---------------------------------------------------------------------------
+
+
+class TestToolCallStreaming:
+    async def test_command_execution_start_done_full(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {
+                    "id": "t1",
+                    "type": "command_execution",
+                    "command": "echo hello",
+                },
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "t1",
+                    "type": "command_execution",
+                    "command": "echo hello",
+                    "aggregated_output": "hello",
+                    "exit_code": 0,
+                },
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        dones = [e for e in out if isinstance(e, StreamTaskMessageDone)]
+        fulls = [e for e in out if isinstance(e, StreamTaskMessageFull)]
+
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, ToolRequestContent)
+        assert starts[0].content.name == "bash"
+        assert starts[0].content.arguments == {"command": "echo hello"}
+        assert starts[0].content.tool_call_id == "t1"
+
+        assert len(dones) == 1
+
+        assert len(fulls) == 1
+        assert isinstance(fulls[0].content, ToolResponseContent)
+        resp_content = fulls[0].content.content
+        assert isinstance(resp_content, dict)
+        assert resp_content["result"] == "hello"
+        assert fulls[0].content.tool_call_id == "t1"
+
+    async def test_empty_item_id_request_response_ids_match(self) -> None:
+        """A tool with an empty item_id must use the SAME fallback tool_call_id
+        on the request (started) and response (completed) halves."""
+        events = [
+            {"type": "item.started", "item": {"id": "", "type": "command_execution", "command": "ls"}},
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": ".",
+                    "exit_code": 0,
+                },
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        # Pull tool_call_id inside the comprehension so the isinstance narrows the
+        # content union (the narrowing would not survive a later attribute access).
+        req_ids = [
+            e.content.tool_call_id
+            for e in out
+            if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ToolRequestContent)
+        ]
+        resp_ids = [
+            e.content.tool_call_id
+            for e in out
+            if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolResponseContent)
+        ]
+        assert len(req_ids) == 1 and len(resp_ids) == 1
+        assert req_ids[0] == resp_ids[0]
+
+    async def test_file_change_synthesizes_start(self) -> None:
+        """file_change items may only emit item.completed (no started)."""
+        events = [
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "fc1",
+                    "type": "file_change",
+                    "changes": ["a.py"],
+                    "status": "ok",
+                },
+            }
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        tool_req = [
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolRequestContent)
+        ]
+        tool_resp = [
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolResponseContent)
+        ]
+        assert len(tool_req) == 1
+        assert isinstance(tool_req[0].content, ToolRequestContent)
+        assert tool_req[0].content.name == "file_change"
+        assert len(tool_resp) == 1
+
+    async def test_mcp_tool_call_name(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {
+                    "id": "mcp1",
+                    "type": "mcp_tool_call",
+                    "server": "fs",
+                    "tool": "read",
+                    "arguments": {"path": "/x"},
+                },
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "mcp1",
+                    "type": "mcp_tool_call",
+                    "server": "fs",
+                    "tool": "read",
+                    "arguments": {"path": "/x"},
+                    "result": "content",
+                },
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        req = next(
+            e for e in out if isinstance(e, StreamTaskMessageStart) and isinstance(e.content, ToolRequestContent)
+        )
+        assert isinstance(req.content, ToolRequestContent)
+        assert req.content.name == "fs.read"
+
+    async def test_tool_error_marks_is_error(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "cmd1", "type": "command_execution", "command": "bad"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "cmd1",
+                    "type": "command_execution",
+                    "command": "bad",
+                    "aggregated_output": "error output",
+                    "exit_code": 127,
+                },
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        resp = next(
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolResponseContent)
+        )
+        assert isinstance(resp.content, ToolResponseContent)
+        resp_body = resp.content.content
+        assert isinstance(resp_body, dict)
+        assert resp_body.get("is_error") is True
+
+    async def test_tool_indices_request_before_response(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "cmd2", "type": "command_execution", "command": "ls"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "cmd2",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": ".",
+                    "exit_code": 0,
+                },
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        req = next(e for e in out if isinstance(e, StreamTaskMessageStart))
+        resp = next(
+            e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ToolResponseContent)
+        )
+        assert req.index is not None and resp.index is not None
+        assert req.index < resp.index
+
+
+# ---------------------------------------------------------------------------
+# Reasoning
+# ---------------------------------------------------------------------------
+
+
+class TestReasoningStreaming:
+    async def test_reasoning_start_full(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "r1", "type": "reasoning", "text": ""}},
+            {
+                "type": "item.updated",
+                "item": {"id": "r1", "type": "reasoning", "text": "thinking..."},
+            },
+            {
+                "type": "item.completed",
+                "item": {"id": "r1", "type": "reasoning", "text": "thinking... done"},
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        fulls = [e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ReasoningContent)]
+
+        assert len(starts) == 1
+        assert isinstance(starts[0].content, ReasoningContent)
+        assert len(fulls) == 1
+        assert isinstance(fulls[0].content, ReasoningContent)
+        reasoning_content = fulls[0].content.content
+        assert reasoning_content is not None
+        assert any("thinking... done" in s for s in reasoning_content)
+
+    async def test_reasoning_initial_text_emits_delta(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "r1", "type": "reasoning", "text": "seed"},
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        deltas = [e for e in out if isinstance(e, StreamTaskMessageDelta)]
+        assert len(deltas) == 1
+        assert isinstance(deltas[0].delta, ReasoningContentDelta)
+        assert deltas[0].delta.content_delta == "seed"
+
+    async def test_reasoning_no_started_emits_standalone_full(self) -> None:
+        """If item.completed arrives without item.started, emit a standalone Full."""
+        events = [
+            {
+                "type": "item.completed",
+                "item": {"id": "r_orphan", "type": "reasoning", "text": "orphan thought"},
+            }
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        fulls = [e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ReasoningContent)]
+        assert len(fulls) == 1
+        assert isinstance(fulls[0].content, ReasoningContent)
+        orphan_content = fulls[0].content.content
+        assert orphan_content is not None
+        assert any("orphan thought" in s for s in orphan_content)
+
+    async def test_reasoning_summary_is_first_line(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "r2", "type": "reasoning", "text": ""}},
+            {
+                "type": "item.completed",
+                "item": {"id": "r2", "type": "reasoning", "text": "line one\nline two"},
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        full = next(e for e in out if isinstance(e, StreamTaskMessageFull) and isinstance(e.content, ReasoningContent))
+        assert isinstance(full.content, ReasoningContent)
+        assert full.content.summary == ["line one"]
+
+
+# ---------------------------------------------------------------------------
+# Error events
+# ---------------------------------------------------------------------------
+
+
+class TestErrorEvents:
+    async def test_turn_failed_emits_error_text(self) -> None:
+        events = [{"type": "turn.failed", "error": {"message": "context length exceeded"}}]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        assert len(out) == 1
+        assert isinstance(out[0], StreamTaskMessageFull)
+        assert isinstance(out[0].content, TextContent)
+        assert "context length exceeded" in out[0].content.content
+
+    async def test_top_level_error_emits_text(self) -> None:
+        events = [{"type": "error", "message": "unexpected EOF"}]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        assert len(out) == 1
+        assert isinstance(out[0].content, TextContent)
+        assert "unexpected EOF" in out[0].content.content
+
+    async def test_item_error_emits_on_completed_only(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "e1", "type": "error", "message": "bad"}},
+            {"type": "item.completed", "item": {"id": "e1", "type": "error", "message": "bad"}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        # Only item.completed emits an event for error items
+        assert len(out) == 1
+        assert isinstance(out[0].content, TextContent)
+        assert "bad" in out[0].content.content
+
+
+# ---------------------------------------------------------------------------
+# Edge cases
+# ---------------------------------------------------------------------------
+
+
+class TestEdgeCases:
+    async def test_empty_stream(self) -> None:
+        out = await _collect(convert_codex_to_agentex_events(_aiter([])))
+        assert out == []
+
+    async def test_non_json_lines_skipped(self) -> None:
+        events: list[str] = ["not json", "also not json"]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        assert out == []
+
+    async def test_blank_lines_skipped(self) -> None:
+        out = await _collect(convert_codex_to_agentex_events(_aiter(["", "   ", "\n"])))
+        assert out == []
+
+    async def test_pre_decoded_dict_events(self) -> None:
+        """Events passed as dicts (pre-decoded) should work without JSON parsing."""
+        events: list[dict[str, Any]] = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": "hi"}},
+            {
+                "type": "item.completed",
+                "item": {"id": "m1", "type": "agent_message", "text": "hi"},
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        assert len(out) > 0
+
+    async def test_thread_started_no_message(self) -> None:
+        events = [{"type": "thread.started", "thread_id": "t1"}]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        assert out == []
+
+    async def test_turn_started_no_message(self) -> None:
+        out = await _collect(convert_codex_to_agentex_events(_aiter([{"type": "turn.started"}])))
+        assert out == []
+
+    async def test_turn_completed_no_message(self) -> None:
+        out = await _collect(
+            convert_codex_to_agentex_events(_aiter([{"type": "turn.completed", "usage": {"input_tokens": 1}}]))
+        )
+        assert out == []
+
+    async def test_unknown_event_type_no_message(self) -> None:
+        out = await _collect(convert_codex_to_agentex_events(_aiter([{"type": "some.future.event"}])))
+        assert out == []
+
+    async def test_unknown_item_type_no_message(self) -> None:
+        out = await _collect(
+            convert_codex_to_agentex_events(
+                _aiter([{"type": "item.started", "item": {"id": "x", "type": "future_item"}}])
+            )
+        )
+        assert out == []
+
+
+# ---------------------------------------------------------------------------
+# on_result callback
+# ---------------------------------------------------------------------------
+
+
+class TestOnResult:
+    async def test_session_id_captured(self) -> None:
+        result: dict[str, Any] = {}
+
+        def on_result(r: dict[str, Any]) -> None:
+            result.update(r)
+
+        events = [
+            {"type": "thread.started", "thread_id": "sess-xyz"},
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 5, "output_tokens": 3, "total_tokens": 8},
+            },
+        ]
+        await _collect(convert_codex_to_agentex_events(_aiter(events), on_result=on_result))
+        assert result["session_id"] == "sess-xyz"
+
+    async def test_usage_forwarded(self) -> None:
+        result: dict[str, Any] = {}
+
+        def on_result(r: dict[str, Any]) -> None:
+            result.update(r)
+
+        events = [
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+            }
+        ]
+        await _collect(convert_codex_to_agentex_events(_aiter(events), on_result=on_result))
+        assert result["usage"] == {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+
+    async def test_tool_count(self) -> None:
+        result: dict[str, Any] = {}
+
+        def on_result(r: dict[str, Any]) -> None:
+            result.update(r)
+
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "t1", "type": "command_execution", "command": "ls"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "t1",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": ".",
+                    "exit_code": 0,
+                },
+            },
+            {"type": "turn.completed", "usage": None},
+        ]
+        await _collect(convert_codex_to_agentex_events(_aiter(events), on_result=on_result))
+        assert result["tool_call_count"] == 1
+
+    async def test_no_callback_when_none(self) -> None:
+        """Passing on_result=None should not raise."""
+        events = [{"type": "turn.completed", "usage": None}]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events), on_result=None))
+        assert out == []
+
+    async def test_on_result_called_even_without_turn_completed(self) -> None:
+        """on_result fires at end of stream even if turn.completed never arrived."""
+        result: dict[str, Any] = {}
+
+        def on_result(r: dict[str, Any]) -> None:
+            result.update(r)
+
+        events: list[Any] = []
+        await _collect(convert_codex_to_agentex_events(_aiter(events), on_result=on_result))
+        assert result.get("usage") is None
+        assert result.get("session_id") is None
+
+
+# ---------------------------------------------------------------------------
+# Multi-step turn: tool → text
+# ---------------------------------------------------------------------------
+
+
+class TestMultiStepTurn:
+    async def test_tool_then_text_monotonic_indices(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "cmd1", "type": "command_execution", "command": "ls"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "cmd1",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": "file.txt",
+                    "exit_code": 0,
+                },
+            },
+            {
+                "type": "item.started",
+                "item": {"id": "msg1", "type": "agent_message", "text": ""},
+            },
+            {
+                "type": "item.completed",
+                "item": {"id": "msg1", "type": "agent_message", "text": "Done"},
+            },
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        indices = [e.index for e in out]
+        assert indices == sorted(indices), "indices must be monotonically non-decreasing"
+
+    async def test_two_text_blocks_distinct_indices(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "a", "type": "agent_message", "text": "first"},
+            },
+            {"type": "item.completed", "item": {"id": "a", "type": "agent_message", "text": "first"}},
+            {
+                "type": "item.started",
+                "item": {"id": "b", "type": "agent_message", "text": "second"},
+            },
+            {"type": "item.completed", "item": {"id": "b", "type": "agent_message", "text": "second"}},
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(events)))
+        starts = [e for e in out if isinstance(e, StreamTaskMessageStart)]
+        assert len(starts) == 2
+        assert starts[0].index != starts[1].index
+
+    async def test_json_string_events(self) -> None:
+        """Events may arrive as raw newline-delimited JSON strings."""
+        raw_events = [
+            json.dumps({"type": "item.started", "item": {"id": "s1", "type": "agent_message", "text": "hello"}}),
+            json.dumps({"type": "item.completed", "item": {"id": "s1", "type": "agent_message", "text": "hello"}}),
+        ]
+        out = await _collect(convert_codex_to_agentex_events(_aiter(raw_events)))
+        assert len(out) > 0
+        assert any(isinstance(e, StreamTaskMessageStart) for e in out)
diff --git a/tests/lib/adk/test_codex_turn.py b/tests/lib/adk/test_codex_turn.py
new file mode 100644
index 000000000..f6a046478
--- /dev/null
+++ b/tests/lib/adk/test_codex_turn.py
@@ -0,0 +1,282 @@
+"""Offline tests for CodexTurn and codex_usage_to_turn_usage.
+
+Tests cover:
+- TurnUsage normalization from raw codex usage dicts
+- Defensive handling of missing/invalid usage fields
+- CodexTurn: events property yields canonical StreamTaskMessage*
+- CodexTurn: usage() before and after stream exhaustion
+- CodexTurn: on_result wiring (session_id, counts propagate to usage())
+- CodexTurn satisfies HarnessTurn protocol
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator
+
+import pytest
+
+from agentex.lib.core.harness.types import TurnUsage, HarnessTurn
+from agentex.types.task_message_update import (
+    StreamTaskMessageDone,
+    StreamTaskMessageFull,
+    StreamTaskMessageDelta,
+    StreamTaskMessageStart,
+)
+from agentex.lib.adk._modules._codex_turn import (
+    CodexTurn,
+    codex_usage_to_turn_usage,
+)
+
+
+async def _aiter(items: list[Any]) -> AsyncIterator[Any]:
+    for item in items:
+        yield item
+
+
+async def _collect(turn: CodexTurn) -> list[Any]:
+    return [msg async for msg in turn.events]
+
+
+# ---------------------------------------------------------------------------
+# codex_usage_to_turn_usage
+# ---------------------------------------------------------------------------
+
+
+class TestCodexUsageToTurnUsage:
+    def test_none_raw_all_none_tokens(self) -> None:
+        u = codex_usage_to_turn_usage(None)
+        assert u.input_tokens is None
+        assert u.output_tokens is None
+        assert u.total_tokens is None
+        assert u.cost_usd is None
+
+    def test_empty_dict_all_none_tokens(self) -> None:
+        u = codex_usage_to_turn_usage({})
+        assert u.input_tokens is None
+        assert u.output_tokens is None
+
+    def test_standard_usage(self) -> None:
+        raw = {"input_tokens": 100, "output_tokens": 50, "total_tokens": 150}
+        u = codex_usage_to_turn_usage(raw, model="o4-mini")
+        assert u.input_tokens == 100
+        assert u.output_tokens == 50
+        assert u.total_tokens == 150
+        assert u.model == "o4-mini"
+
+    def test_reasoning_tokens(self) -> None:
+        raw = {"input_tokens": 200, "output_tokens": 80, "reasoning_tokens": 60, "total_tokens": 340}
+        u = codex_usage_to_turn_usage(raw)
+        assert u.reasoning_tokens == 60
+
+    def test_real_zero_preserved(self) -> None:
+        """Explicit zeros in the payload must survive (not be treated as missing)."""
+        raw = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+        u = codex_usage_to_turn_usage(raw)
+        assert u.input_tokens == 0
+        assert u.output_tokens == 0
+
+    def test_cached_input_tokens(self) -> None:
+        raw = {"input_tokens": 100, "cached_input_tokens": 20, "output_tokens": 40}
+        u = codex_usage_to_turn_usage(raw)
+        assert u.cached_input_tokens == 20
+
+    def test_invalid_token_values_become_none(self) -> None:
+        raw = {"input_tokens": "not_a_number", "output_tokens": None}
+        u = codex_usage_to_turn_usage(raw)
+        assert u.input_tokens is None
+        assert u.output_tokens is None
+
+    def test_cost_explicit(self) -> None:
+        u = codex_usage_to_turn_usage(None, cost_usd=0.0042)
+        assert u.cost_usd == pytest.approx(0.0042)
+
+    def test_cost_from_raw(self) -> None:
+        u = codex_usage_to_turn_usage({"cost_usd": 0.001})
+        assert u.cost_usd == pytest.approx(0.001)
+
+    def test_explicit_cost_overrides_raw(self) -> None:
+        """Explicit cost_usd kwarg takes precedence over raw dict value."""
+        u = codex_usage_to_turn_usage({"cost_usd": 0.001}, cost_usd=0.002)
+        assert u.cost_usd == pytest.approx(0.002)
+
+    def test_tool_and_reasoning_counts(self) -> None:
+        u = codex_usage_to_turn_usage(None, tool_call_count=3, reasoning_count=2)
+        assert u.num_tool_calls == 3
+        assert u.num_reasoning_blocks == 2
+
+    def test_num_llm_calls_always_one(self) -> None:
+        u = codex_usage_to_turn_usage(None)
+        assert u.num_llm_calls == 1
+
+    def test_duration_ms(self) -> None:
+        u = codex_usage_to_turn_usage(None, duration_ms=1234)
+        assert u.duration_ms == 1234
+
+    def test_model_none_when_not_provided(self) -> None:
+        u = codex_usage_to_turn_usage(None)
+        assert u.model is None
+
+    def test_non_dict_raw_treated_as_empty(self) -> None:
+        u = codex_usage_to_turn_usage("bad input")  # type: ignore[arg-type]
+        assert u.input_tokens is None
+
+    def test_returns_turn_usage_instance(self) -> None:
+        u = codex_usage_to_turn_usage({})
+        assert isinstance(u, TurnUsage)
+
+
+# ---------------------------------------------------------------------------
+# CodexTurn protocol conformance
+# ---------------------------------------------------------------------------
+
+
+class TestCodexTurnProtocol:
+    def test_implements_harness_turn_protocol(self) -> None:
+        turn = CodexTurn(_aiter([]), model="o4-mini")
+        assert isinstance(turn, HarnessTurn)
+
+    def test_usage_before_exhaustion_returns_zero_turn_usage(self) -> None:
+        turn = CodexTurn(_aiter([]), model="test-model")
+        u = turn.usage()
+        assert isinstance(u, TurnUsage)
+        assert u.model == "test-model"
+        assert u.input_tokens is None
+        assert u.num_tool_calls == 0
+
+
+# ---------------------------------------------------------------------------
+# CodexTurn events
+# ---------------------------------------------------------------------------
+
+
+class TestCodexTurnEvents:
+    async def test_events_yield_stream_task_messages(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "m1", "type": "agent_message", "text": "hi"}},
+            {"type": "item.completed", "item": {"id": "m1", "type": "agent_message", "text": "hi"}},
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        out = await _collect(turn)
+        assert len(out) > 0
+        for msg in out:
+            assert isinstance(
+                msg,
+                (StreamTaskMessageStart, StreamTaskMessageDelta, StreamTaskMessageFull, StreamTaskMessageDone),
+            )
+
+    async def test_usage_after_exhaustion_has_tokens(self) -> None:
+        events = [
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+            }
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        await _collect(turn)
+        u = turn.usage()
+        assert u.input_tokens == 10
+        assert u.output_tokens == 5
+        assert u.total_tokens == 15
+
+    async def test_usage_model_propagated(self) -> None:
+        events = [{"type": "turn.completed", "usage": None}]
+        turn = CodexTurn(_aiter(events), model="codex-model-x")
+        await _collect(turn)
+        assert turn.usage().model == "codex-model-x"
+
+    async def test_tool_count_in_usage(self) -> None:
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "t1", "type": "command_execution", "command": "ls"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "t1",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": ".",
+                    "exit_code": 0,
+                },
+            },
+            {"type": "turn.completed", "usage": None},
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        await _collect(turn)
+        assert turn.usage().num_tool_calls == 1
+
+    async def test_events_property_stable_across_accesses(self) -> None:
+        """`.events` returns the same generator; usage survives a second access."""
+        events = [
+            {
+                "type": "item.started",
+                "item": {"id": "t1", "type": "command_execution", "command": "ls"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "t1",
+                    "type": "command_execution",
+                    "command": "ls",
+                    "aggregated_output": ".",
+                    "exit_code": 0,
+                },
+            },
+            {"type": "turn.completed", "usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}},
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        assert turn.events is turn.events  # same generator, not a fresh wrapper
+        await _collect(turn)
+        # A second access must NOT re-wrap the exhausted iterator and reset usage.
+        _ = turn.events
+        assert turn.usage().total_tokens == 15
+        assert turn.usage().num_tool_calls == 1
+
+    async def test_reasoning_count_in_usage(self) -> None:
+        events = [
+            {"type": "item.started", "item": {"id": "r1", "type": "reasoning", "text": ""}},
+            {
+                "type": "item.completed",
+                "item": {"id": "r1", "type": "reasoning", "text": "thought"},
+            },
+            {"type": "turn.completed", "usage": None},
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        await _collect(turn)
+        assert turn.usage().num_reasoning_blocks == 1
+
+    async def test_duration_ms_passed_through(self) -> None:
+        events = [{"type": "turn.completed", "usage": None}]
+        turn = CodexTurn(_aiter(events), model="o4-mini", duration_ms=999)
+        await _collect(turn)
+        assert turn.usage().duration_ms == 999
+
+    async def test_cost_usd_passed_through(self) -> None:
+        events = [{"type": "turn.completed", "usage": None}]
+        turn = CodexTurn(_aiter(events), model="o4-mini", cost_usd=0.007)
+        await _collect(turn)
+        assert turn.usage().cost_usd == pytest.approx(0.007)
+
+    async def test_empty_stream_usage_still_valid(self) -> None:
+        turn = CodexTurn(_aiter([]), model="o4-mini")
+        await _collect(turn)
+        u = turn.usage()
+        assert isinstance(u, TurnUsage)
+        assert u.num_llm_calls == 1
+
+    async def test_reasoning_tokens_propagated(self) -> None:
+        events = [
+            {
+                "type": "turn.completed",
+                "usage": {
+                    "input_tokens": 100,
+                    "output_tokens": 60,
+                    "reasoning_tokens": 40,
+                    "total_tokens": 200,
+                },
+            }
+        ]
+        turn = CodexTurn(_aiter(events), model="o4-mini")
+        await _collect(turn)
+        assert turn.usage().reasoning_tokens == 40
diff --git a/tests/lib/core/harness/conformance/test_codex_conformance.py b/tests/lib/core/harness/conformance/test_codex_conformance.py
new file mode 100644
index 000000000..b00ed2970
--- /dev/null
+++ b/tests/lib/core/harness/conformance/test_codex_conformance.py
@@ -0,0 +1,225 @@
+"""Conformance fixtures for the codex harness tap.
+
+Each fixture is derived from a ``CodexTurn`` and registered into the
+cross-channel conformance runner so that span derivation is validated
+alongside all other harness taps.
+
+Following the per-module registry pattern from runner.py: this module keeps
+its own local list of fixtures, both registers them AND parametrizes over
+them, to guarantee determinism regardless of pytest collection order.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import Any, AsyncIterator
+
+import pytest
+
+from agentex.lib.core.harness.types import StreamTaskMessage
+from agentex.lib.adk._modules._codex_sync import convert_codex_to_agentex_events
+
+from .runner import Fixture, register, derive_all
+
+
+async def _aiter(items: list[Any]) -> AsyncIterator[Any]:
+    for item in items:
+        yield item
+
+
+async def _collect(events: list[Any]) -> list[StreamTaskMessage]:
+    return [msg async for msg in convert_codex_to_agentex_events(_aiter(events))]
+
+
+def _build(events: list[Any]) -> list[StreamTaskMessage]:
+    return asyncio.run(_collect(events))
+
+
+# ---------------------------------------------------------------------------
+# Fixture 1: plain text response
+# ---------------------------------------------------------------------------
+
+_CODEX_TEXT = Fixture(
+    name="codex-text",
+    events=_build(
+        [
+            {"type": "thread.started", "thread_id": "thread-abc"},
+            {"type": "turn.started"},
+            {
+                "type": "item.started",
+                "item": {"id": "msg1", "type": "agent_message", "text": "Hello"},
+            },
+            {
+                "type": "item.updated",
+                "item": {"id": "msg1", "type": "agent_message", "text": "Hello, world"},
+            },
+            {
+                "type": "item.completed",
+                "item": {"id": "msg1", "type": "agent_message", "text": "Hello, world!"},
+            },
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15},
+            },
+        ]
+    ),
+)
+register(_CODEX_TEXT)
+
+# ---------------------------------------------------------------------------
+# Fixture 2: tool call (command_execution)
+# ---------------------------------------------------------------------------
+
+_CODEX_TOOL = Fixture(
+    name="codex-tool-command",
+    events=_build(
+        [
+            {"type": "thread.started", "thread_id": "thread-cmd"},
+            {
+                "type": "item.started",
+                "item": {
+                    "id": "tool1",
+                    "type": "command_execution",
+                    "command": "ls /workspace",
+                },
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "tool1",
+                    "type": "command_execution",
+                    "command": "ls /workspace",
+                    "aggregated_output": "file1.txt\nfile2.py",
+                    "exit_code": 0,
+                },
+            },
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28},
+            },
+        ]
+    ),
+)
+register(_CODEX_TOOL)
+
+# ---------------------------------------------------------------------------
+# Fixture 3: reasoning block
+# ---------------------------------------------------------------------------
+
+_CODEX_REASONING = Fixture(
+    name="codex-reasoning",
+    events=_build(
+        [
+            {"type": "thread.started", "thread_id": "thread-reason"},
+            {
+                "type": "item.started",
+                "item": {"id": "r1", "type": "reasoning", "text": ""},
+            },
+            {
+                "type": "item.updated",
+                "item": {"id": "r1", "type": "reasoning", "text": "Step 1: analyze the problem"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "r1",
+                    "type": "reasoning",
+                    "text": "Step 1: analyze the problem\nStep 2: solve it",
+                },
+            },
+            {
+                "type": "item.started",
+                "item": {"id": "msg2", "type": "agent_message", "text": ""},
+            },
+            {
+                "type": "item.completed",
+                "item": {"id": "msg2", "type": "agent_message", "text": "The answer is 42."},
+            },
+            {
+                "type": "turn.completed",
+                "usage": {
+                    "input_tokens": 30,
+                    "output_tokens": 20,
+                    "reasoning_tokens": 50,
+                    "total_tokens": 100,
+                },
+            },
+        ]
+    ),
+)
+register(_CODEX_REASONING)
+
+# ---------------------------------------------------------------------------
+# Fixture 4: multi-step (mcp_tool_call + follow-up text)
+# ---------------------------------------------------------------------------
+
+_CODEX_MULTI = Fixture(
+    name="codex-multi-step",
+    events=_build(
+        [
+            {"type": "thread.started", "thread_id": "thread-multi"},
+            {
+                "type": "item.started",
+                "item": {
+                    "id": "mcp1",
+                    "type": "mcp_tool_call",
+                    "server": "filesystem",
+                    "tool": "read_file",
+                    "arguments": {"path": "/workspace/README.md"},
+                },
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "mcp1",
+                    "type": "mcp_tool_call",
+                    "server": "filesystem",
+                    "tool": "read_file",
+                    "arguments": {"path": "/workspace/README.md"},
+                    "result": {"content": "# My Project"},
+                },
+            },
+            {
+                "type": "item.started",
+                "item": {"id": "msg3", "type": "agent_message", "text": "The README says:"},
+            },
+            {
+                "type": "item.completed",
+                "item": {
+                    "id": "msg3",
+                    "type": "agent_message",
+                    "text": "The README says: # My Project",
+                },
+            },
+            {
+                "type": "turn.completed",
+                "usage": {"input_tokens": 50, "output_tokens": 30, "total_tokens": 80},
+            },
+        ]
+    ),
+)
+register(_CODEX_MULTI)
+
+
+# ---------------------------------------------------------------------------
+# Local parametrized tests (cross-channel conformance)
+# ---------------------------------------------------------------------------
+
+_LOCAL_FIXTURES = [_CODEX_TEXT, _CODEX_TOOL, _CODEX_REASONING, _CODEX_MULTI]
+
+
+@pytest.mark.parametrize("fixture", _LOCAL_FIXTURES, ids=lambda f: f.name)
+def test_codex_span_derivation_is_deterministic(fixture: Fixture) -> None:
+    """Span derivation over codex events is deterministic (cross-channel guarantee).
+
+    Deriving twice over the same events yields identical signals. This is the
+    invariant that makes ``yield`` and ``auto_send`` delivery equivalent: both
+    observe the same event stream, so their tracing side effects are identical.
+    """
+    assert derive_all(fixture.events) == derive_all(fixture.events)
+
+
+@pytest.mark.parametrize("fixture", _LOCAL_FIXTURES, ids=lambda f: f.name)
+def test_codex_events_are_non_empty(fixture: Fixture) -> None:
+    """Every codex fixture yields at least one StreamTaskMessage*."""
+    assert len(fixture.events) > 0

From fa60632f9be84315a3fdc627745ae5b605994bd8 Mon Sep 17 00:00:00 2001
From: Declan Brady <declan.brady@scale.com>
Date: Mon, 22 Jun 2026 20:10:48 -0400
Subject: [PATCH 09/10] feat(harness): public adk facade + docs for the unified
 harness surface (PR 9) (#423)

---
 adk/docs/harness.md             | 196 ++++++++++++++++++++++++++++++++
 src/agentex/lib/adk/__init__.py |  23 ++++
 2 files changed, 219 insertions(+)
 create mode 100644 adk/docs/harness.md

diff --git a/adk/docs/harness.md b/adk/docs/harness.md
new file mode 100644
index 000000000..6a9d8947a
--- /dev/null
+++ b/adk/docs/harness.md
@@ -0,0 +1,196 @@
+# Unified Harness Surface
+
+The unified harness surface gives every agent harness (pydantic-ai, LangGraph, OpenAI Agents, and future parsers) a single, shared path to streaming, message persistence, and tracing. The Agentex `StreamTaskMessage*` event stream is the canonical wire format. A harness tap produces that stream once; the shared machinery delivers it and derives spans from it.
+
+All public names are re-exported from `agentex.lib.adk`:
+
+```python
+from agentex.lib.adk import (
+    UnifiedEmitter,
+    SpanTracer,
+    TurnUsage,
+    TurnResult,
+    HarnessTurn,
+    StreamTaskMessage,
+    OpenSpan,
+    CloseSpan,
+    SpanSignal,
+)
+```
+
+The implementation lives at `src/agentex/lib/core/harness/`.
+
+---
+
+## The canonical stream: `StreamTaskMessage`
+
+`StreamTaskMessage` is a union of the four wire-protocol update types:
+
+```
+StreamTaskMessageStart  - opens a content slot (text, reasoning, tool request, ...)
+StreamTaskMessageDelta  - appends a token/fragment to an open slot
+StreamTaskMessageFull   - posts a complete message in one shot (tool response, ...)
+StreamTaskMessageDone   - closes an open slot
+```
+
+Every harness tap produces a sequence of these. Everything downstream (delivery, tracing) reads the same sequence.
+
+---
+
+## Per-harness taps: `convert_<harness>_to_agentex_events`
+
+A tap is an async generator that translates the harness's native event stream into `StreamTaskMessage*` events. The currently shipped taps are:
+
+| Harness | Tap function | Exported from |
+|---|---|---|
+| pydantic-ai | `convert_pydantic_ai_to_agentex_events` | `agentex.lib.adk` |
+| LangGraph | `convert_langgraph_to_agentex_events` | `agentex.lib.adk` |
+
+Taps for claude-code and codex will be added in subsequent PRs (AGX1-420, AGX1-421) and exported from `agentex.lib.adk` in the same way.
+
+---
+
+## `HarnessTurn` protocol
+
+`HarnessTurn` is the interface a harness turn object must satisfy to plug into `UnifiedEmitter`:
+
+```python
+@runtime_checkable
+class HarnessTurn(Protocol):
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]: ...
+
+    def usage(self) -> TurnUsage: ...
+```
+
+`events` is the canonical stream for this turn. `usage()` is valid only after `events` is exhausted (async generators cannot cleanly return a value to the consumer, so usage travels out-of-band).
+
+---
+
+## `TurnUsage`
+
+Token counts and cost for one turn, harness-independent:
+
+```python
+class TurnUsage(BaseModel):
+    model: str | None = None
+    input_tokens: int | None = None
+    output_tokens: int | None = None
+    cached_input_tokens: int | None = None
+    reasoning_tokens: int | None = None
+    total_tokens: int | None = None
+    cost_usd: float | None = None
+    duration_ms: int | None = None
+    num_llm_calls: int = 0
+    num_tool_calls: int = 0
+    num_reasoning_blocks: int = 0
+```
+
+Field names align with `agentex.lib.core.observability.llm_metrics` for easy conversion.
+
+---
+
+## `UnifiedEmitter`
+
+`UnifiedEmitter` ties a turn's canonical stream, tracing context, and delivery mode together. Construct one per turn with the task/trace context from the request:
+
+```python
+emitter = UnifiedEmitter(
+    task_id=params.task.id,
+    trace_id=params.task.id,   # or None to disable tracing
+    parent_span_id=turn_span.id if turn_span else None,
+)
+```
+
+**Tracing is on by default** when `trace_id` is provided. To disable it explicitly, pass `tracer=False`. To inject a custom `SpanTracer` (e.g. in tests), pass it as `tracer=<instance>`.
+
+### Delivery mode 1: `yield_turn` (sync HTTP ACP)
+
+For sync ACP agents that return events directly over the HTTP response:
+
+```python
+@acp.on_message_send
+async def handle(params):
+    turn = MyHarnessTurn(params)          # implements HarnessTurn
+    async for event in emitter.yield_turn(turn):
+        yield event
+```
+
+`yield_turn` forwards each event to the caller and traces spans as a side effect. It is a passthrough when `tracer` is `None`.
+
+### Delivery mode 2: `auto_send_turn` (async/Temporal)
+
+For async or Temporal agents that push to the task stream via Redis:
+
+```python
+result: TurnResult = await emitter.auto_send_turn(turn, created_at=workflow.now())
+```
+
+`auto_send_turn` drives `adk.streaming` contexts for every message in the stream, derives and records spans, and returns a `TurnResult` with the final text and usage. Pass `created_at` under Temporal to back-date message timestamps deterministically.
+
+---
+
+## `TurnResult`
+
+```python
+class TurnResult(BaseModel):
+    final_text: str = ""
+    usage: TurnUsage = TurnUsage()
+```
+
+Returned by `auto_send_turn`. `final_text` is the last text segment of the turn (multi-step runs return only the final segment, matching `stream_langgraph_events` / `stream_pydantic_ai_events` semantics).
+
+---
+
+## Tracing: span derivation
+
+Spans are derived from the canonical stream by `SpanDeriver` (pure, no `adk` dependency) and dispatched to `adk.tracing` by `SpanTracer`. The mapping:
+
+- `StreamTaskMessageStart(ToolRequestContent)` + `StreamTaskMessageDone` on that index -> tool span open (keyed by `tool_call_id`)
+- `StreamTaskMessageFull(ToolResponseContent)` whose `tool_call_id` was opened -> tool span close
+- `StreamTaskMessageFull(ToolRequestContent)` (harnesses that emit tool calls as Full) -> opens a tool span; matching `Full(ToolResponseContent)` closes it
+- `StreamTaskMessageStart(ReasoningContent)` + `StreamTaskMessageDone` -> reasoning span
+
+`SpanTracer` is `SpanDeriver`'s consumer. You can inject a custom `SpanTracer` via `UnifiedEmitter(tracer=<instance>)` for advanced use or testing.
+
+---
+
+## Usage examples by channel
+
+### Sync ACP (pydantic-ai tap)
+
+```python
+import agentex.lib.adk as adk
+from agentex.lib.adk import UnifiedEmitter, convert_pydantic_ai_to_agentex_events
+
+@acp.on_message_send
+async def handle(params):
+    task_id = params.task.id
+    async with adk.tracing.span(trace_id=task_id, name="message", ...) as turn_span:
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        tap = convert_pydantic_ai_to_agentex_events(pydantic_stream)
+        # wrap tap in a HarnessTurn then yield_turn, or yield directly:
+        async for event in tap:
+            yield event
+```
+
+For the pre-unified sync path the tap is still yielded directly; `UnifiedEmitter.yield_turn` is the forward-looking integration point when a `HarnessTurn` wrapper is available.
+
+### Async Temporal (auto-send)
+
+```python
+from agentex.lib.adk import UnifiedEmitter
+
+emitter = UnifiedEmitter(
+    task_id=task_id,
+    trace_id=task_id,
+    parent_span_id=parent_span_id,
+)
+result = await emitter.auto_send_turn(turn, created_at=workflow.now())
+# result.final_text — last text segment
+# result.usage     — TurnUsage (tokens, cost, ...)
+```
diff --git a/src/agentex/lib/adk/__init__.py b/src/agentex/lib/adk/__init__.py
index f6713be7c..fedd52f7a 100644
--- a/src/agentex/lib/adk/__init__.py
+++ b/src/agentex/lib/adk/__init__.py
@@ -27,6 +27,19 @@
 from agentex.lib.adk._modules.tasks import TasksModule
 from agentex.lib.adk._modules.tracing import TracingModule
 
+# Unified harness surface (AGX1-375)
+from agentex.lib.core.harness import (
+    UnifiedEmitter,
+    SpanTracer,
+    OpenSpan,
+    CloseSpan,
+    SpanSignal,
+    StreamTaskMessage,
+    TurnUsage,
+    TurnResult,
+    HarnessTurn,
+)
+
 from agentex.lib.adk import providers
 from agentex.lib.adk import utils
 
@@ -69,6 +82,16 @@
     "convert_codex_to_agentex_events",
     "CodexTurn",
     "codex_usage_to_turn_usage",
+    # Unified harness surface (AGX1-375)
+    "UnifiedEmitter",
+    "SpanTracer",
+    "OpenSpan",
+    "CloseSpan",
+    "SpanSignal",
+    "StreamTaskMessage",
+    "TurnUsage",
+    "TurnResult",
+    "HarnessTurn",
     # Providers
     "providers",
     # Utils

From eea2ce6c944114255f2a3652c831cd9d96ff0020 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 23 Jun 2026 00:11:09 +0000
Subject: [PATCH 10/10] chore: release main

---
 .release-please-manifest.json |  4 ++--
 CHANGELOG.md                  | 20 ++++++++++++++++++++
 adk/CHANGELOG.md              |  8 ++++++++
 adk/pyproject.toml            |  2 +-
 pyproject.toml                |  2 +-
 src/agentex/_version.py       |  2 +-
 6 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index be44cf037..9a40fa434 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,4 +1,4 @@
 {
-  ".": "0.14.0",
-  "adk": "0.13.2"
+  ".": "0.15.0",
+  "adk": "0.14.0"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8f81295a9..fb03baa67 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,26 @@
 
 * **tracing:** emit OTel metrics for async span queue depth, batch drain, and SGP export success/failure (HTTP status labels). Disable SDK-side recording with ``AGENTEX_TRACING_METRICS=0``.
 
+## 0.15.0 (2026-06-23)
+
+Full Changelog: [agentex-client-v0.14.0...agentex-client-v0.15.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-client-v0.14.0...agentex-client-v0.15.0)
+
+### Features
+
+* **claude-code:** stream-json parser tap for the unified harness surface ([#420](https://github.com/scaleapi/scale-agentex-python/issues/420)) ([904339c](https://github.com/scaleapi/scale-agentex-python/commit/904339c21b8cd641a02d903c03d4a8730b4d7e84))
+* **codex:** event-stream parser tap for the unified harness surface ([#421](https://github.com/scaleapi/scale-agentex-python/issues/421)) ([9b2b031](https://github.com/scaleapi/scale-agentex-python/commit/9b2b03144cc67bb497e0a301686207aba2629758))
+* **harness:** public adk facade + docs for the unified harness surface (PR 9) ([#423](https://github.com/scaleapi/scale-agentex-python/issues/423)) ([fa60632](https://github.com/scaleapi/scale-agentex-python/commit/fa60632f9be84315a3fdc627745ae5b605994bd8))
+* **harness:** unified harness surface — foundation (span derivation, delivery adapters, emitter) ([#412](https://github.com/scaleapi/scale-agentex-python/issues/412)) ([a9cacf4](https://github.com/scaleapi/scale-agentex-python/commit/a9cacf4eb71697351ee658a570636f04bbf31ad5))
+* **langgraph:** migrate LangGraph harness onto unified surface ([#417](https://github.com/scaleapi/scale-agentex-python/issues/417)) ([d344228](https://github.com/scaleapi/scale-agentex-python/commit/d34422845de4b80ed69d2dccfdb0c680ef2fbca3))
+* **openai-agents:** migrate onto the unified harness surface ([#416](https://github.com/scaleapi/scale-agentex-python/issues/416)) ([d10e151](https://github.com/scaleapi/scale-agentex-python/commit/d10e1510bd5da44ad5acc5cac638750122083fce))
+* **pydantic-ai:** migrate onto unified harness surface (PR4) ([#415](https://github.com/scaleapi/scale-agentex-python/issues/415)) ([5ec62c2](https://github.com/scaleapi/scale-agentex-python/commit/5ec62c20781d24fc3e0b92734fcd444b1e791d70))
+* **streaming:** stream tool call argument deltas in TemporalStreamingModel ([#355](https://github.com/scaleapi/scale-agentex-python/issues/355)) ([c8de1d4](https://github.com/scaleapi/scale-agentex-python/commit/c8de1d4c9c3b5b3c16ad4aaf9644c1ba0d618757))
+
+
+### Bug Fixes
+
+* **harness:** assert cross-channel (yield vs auto-send) conformance equivalence [AGX1-373] ([#414](https://github.com/scaleapi/scale-agentex-python/issues/414)) ([694960f](https://github.com/scaleapi/scale-agentex-python/commit/694960f913b8ba521d9236e876e5e00f57a3a3ff))
+
 ## 0.14.0 (2026-06-22)
 
 Full Changelog: [agentex-client-v0.13.1...agentex-client-v0.14.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-client-v0.13.1...agentex-client-v0.14.0)
diff --git a/adk/CHANGELOG.md b/adk/CHANGELOG.md
index 8c15355d9..ac7404e6b 100644
--- a/adk/CHANGELOG.md
+++ b/adk/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 0.14.0 (2026-06-23)
+
+Full Changelog: [agentex-sdk-v0.13.2...agentex-sdk-v0.14.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-sdk-v0.13.2...agentex-sdk-v0.14.0)
+
+### Features
+
+* **harness:** public adk facade + docs for the unified harness surface (PR 9) ([#423](https://github.com/scaleapi/scale-agentex-python/issues/423)) ([fa60632](https://github.com/scaleapi/scale-agentex-python/commit/fa60632f9be84315a3fdc627745ae5b605994bd8))
+
 ## 0.13.2 (2026-06-22)
 
 Full Changelog: [agentex-sdk-v0.13.1...agentex-sdk-v0.13.2](https://github.com/scaleapi/scale-agentex-python/compare/agentex-sdk-v0.13.1...agentex-sdk-v0.13.2)
diff --git a/adk/pyproject.toml b/adk/pyproject.toml
index 946367d7f..1d8c00a40 100644
--- a/adk/pyproject.toml
+++ b/adk/pyproject.toml
@@ -4,7 +4,7 @@
 # (agentex/{__init__.py, _*.py, types/, resources/}) ships from the slim
 # sibling package `agentex-client` which is pinned as a runtime dep.
 name = "agentex-sdk"
-version = "0.13.2"
+version = "0.14.0"
 description = "Agent Development Kit (ADK) overlay for the Agentex API — FastACP server, Temporal workflows, LLM provider integrations, observability"
 license = "Apache-2.0"
 authors = [
diff --git a/pyproject.toml b/pyproject.toml
index 98134d993..7ee0cf56b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@
 # overlay (formerly `src/agentex/lib/*`) now lives in `adk/` and ships
 # as the sibling `agentex-sdk` package — see `adk/pyproject.toml`.
 name = "agentex-client"
-version = "0.14.0"
+version = "0.15.0"
 description = "The official Python REST client for the Agentex API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/agentex/_version.py b/src/agentex/_version.py
index 551c0dbac..c567e168b 100644
--- a/src/agentex/_version.py
+++ b/src/agentex/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "agentex"
-__version__ = "0.14.0"  # x-release-please-version
+__version__ = "0.15.0"  # x-release-please-version