scaleapi · stainless-app · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026
diff --git a/.github/workflows/agentex-tutorials-test.yml b/.github/workflows/agentex-tutorials-test.yml
@@ -49,6 +49,29 @@ jobs:
           curl -LsSf https://astral.sh/uv/install.sh | sh
           echo "$HOME/.local/bin" >> $GITHUB_PATH
 
+      # Subprocess-CLI harnesses: install the relevant CLI only for the
+      # claude-code / codex tutorials (no-op for every other tutorial). npm is
+      # preinstalled on ubuntu runners. Versions mirror the golden agent's
+      # sandbox image (teams/sgp/agents/golden_agent/sandbox/Dockerfile): claude-code
+      # is pinned to the same CLAUDE_CODE_VERSION; codex is left unpinned there,
+      # so it is left unpinned here too. Bump CLAUDE_CODE_VERSION in lockstep
+      # with the sandbox Dockerfile.
+      - name: Install harness CLI (claude-code / codex only)
+        if: ${{ contains(matrix.tutorial, 'claude_code') || contains(matrix.tutorial, 'codex') }}
+        env:
+          CLAUDE_CODE_VERSION: "2.1.142"
+        run: |
+          if [[ "${{ matrix.tutorial }}" == *claude_code* ]]; then
+            echo "📦 Installing Claude Code CLI (v${CLAUDE_CODE_VERSION})..."
+            npm install -g "@anthropic-ai/claude-code@${CLAUDE_CODE_VERSION}"
+            claude --version || true
+          fi
+          if [[ "${{ matrix.tutorial }}" == *codex* ]]; then
+            echo "📦 Installing Codex CLI..."
+            npm install -g @openai/codex
+            codex --version || true
+          fi
+
       - name: Pull latest AgentEx image
         run: |
           echo "🐳 Pulling latest Scale AgentEx Docker image..."
@@ -136,6 +159,11 @@ jobs:
         working-directory: ./examples/tutorials
         env:
           OPENAI_API_KEY: ${{ secrets.TUTORIAL_OPENAI_API_KEY }}
+          ANTHROPIC_API_KEY: ${{ secrets.TUTORIAL_ANTHROPIC_API_KEY }}
+          # Enable the gated live tests only for the matching subprocess-CLI
+          # harness tutorial (the CLI is installed for it in the step above).
+          CLAUDE_LIVE_TESTS: ${{ contains(matrix.tutorial, 'claude_code') && '1' || '' }}
+          CODEX_LIVE_TESTS: ${{ contains(matrix.tutorial, 'codex') && '1' || '' }}
           HEALTH_CHECK_PORT: 8080 # Use non-privileged port for temporal worker health checks
         run: |
           echo "Testing tutorial: ${{ matrix.tutorial }}"

diff --git a/.github/workflows/harness-integration.yml b/.github/workflows/harness-integration.yml
@@ -0,0 +1,61 @@
+name: Harness Integration
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    paths:
+      - "src/agentex/lib/core/harness/**"
+      - "src/agentex/lib/adk/_modules/**"
+      - "tests/lib/core/harness/test_harness_pydantic_ai_*.py"
+      - "tests/lib/core/harness/test_harness_langgraph_*.py"
+      - ".github/workflows/harness-integration.yml"
+
+jobs:
+  conformance:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
+        with:
+          version: '0.10.2'
+
+      - name: Bootstrap
+        run: ./scripts/bootstrap
+
+      # Defer to scripts/test so the harness suite runs under the exact same
+      # invocation as the main CI test job: DEFER_PYDANTIC_BUILD=false and
+      # `uv run --isolated --all-packages --all-extras pytest`, across the
+      # min/max supported Python versions. Running `uv run pytest` directly
+      # would risk an all-extras-only dep passing locally but failing in CI.
+      - name: Conformance suite
+        run: ./scripts/test tests/lib/core/harness/ -v
+
+  # Offline harness integration tests (sync / async / temporal channels) for each
+  # migrated harness. These use fake streams / TestModel + fake streaming/tracing
+  # and require no live infrastructure. Future harness migration PRs (6-8) add
+  # their harness to the matrix below and their test paths to the triggers above.
+  live-matrix:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        harness: [pydantic_ai, langgraph]
+        channel: [sync, async, temporal]
+      fail-fast: false
+    name: ${{ matrix.harness }}-${{ matrix.channel }}
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2
+        with:
+          version: '0.10.2'
+
+      - name: Bootstrap
+        run: ./scripts/bootstrap
+
+      - name: ${{ matrix.harness }} ${{ matrix.channel }} integration tests (offline)
+        run: |
+          ./scripts/test tests/lib/core/harness/test_harness_${{ matrix.harness }}_${{ matrix.channel }}.py -v
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,4 +1,4 @@
 {
-  ".": "0.14.0",
-  "adk": "0.13.2"
+  ".": "0.15.0",
+  "adk": "0.14.0"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,26 @@
 
 * **tracing:** emit OTel metrics for async span queue depth, batch drain, and SGP export success/failure (HTTP status labels). Disable SDK-side recording with ``AGENTEX_TRACING_METRICS=0``.
 
+## 0.15.0 (2026-06-23)
+
+Full Changelog: [agentex-client-v0.14.0...agentex-client-v0.15.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-client-v0.14.0...agentex-client-v0.15.0)
+
+### Features
+
+* **claude-code:** stream-json parser tap for the unified harness surface ([#420](https://github.com/scaleapi/scale-agentex-python/issues/420)) ([904339c](https://github.com/scaleapi/scale-agentex-python/commit/904339c21b8cd641a02d903c03d4a8730b4d7e84))
+* **codex:** event-stream parser tap for the unified harness surface ([#421](https://github.com/scaleapi/scale-agentex-python/issues/421)) ([9b2b031](https://github.com/scaleapi/scale-agentex-python/commit/9b2b03144cc67bb497e0a301686207aba2629758))
+* **harness:** public adk facade + docs for the unified harness surface (PR 9) ([#423](https://github.com/scaleapi/scale-agentex-python/issues/423)) ([fa60632](https://github.com/scaleapi/scale-agentex-python/commit/fa60632f9be84315a3fdc627745ae5b605994bd8))
+* **harness:** unified harness surface — foundation (span derivation, delivery adapters, emitter) ([#412](https://github.com/scaleapi/scale-agentex-python/issues/412)) ([a9cacf4](https://github.com/scaleapi/scale-agentex-python/commit/a9cacf4eb71697351ee658a570636f04bbf31ad5))
+* **langgraph:** migrate LangGraph harness onto unified surface ([#417](https://github.com/scaleapi/scale-agentex-python/issues/417)) ([d344228](https://github.com/scaleapi/scale-agentex-python/commit/d34422845de4b80ed69d2dccfdb0c680ef2fbca3))
+* **openai-agents:** migrate onto the unified harness surface ([#416](https://github.com/scaleapi/scale-agentex-python/issues/416)) ([d10e151](https://github.com/scaleapi/scale-agentex-python/commit/d10e1510bd5da44ad5acc5cac638750122083fce))
+* **pydantic-ai:** migrate onto unified harness surface (PR4) ([#415](https://github.com/scaleapi/scale-agentex-python/issues/415)) ([5ec62c2](https://github.com/scaleapi/scale-agentex-python/commit/5ec62c20781d24fc3e0b92734fcd444b1e791d70))
+* **streaming:** stream tool call argument deltas in TemporalStreamingModel ([#355](https://github.com/scaleapi/scale-agentex-python/issues/355)) ([c8de1d4](https://github.com/scaleapi/scale-agentex-python/commit/c8de1d4c9c3b5b3c16ad4aaf9644c1ba0d618757))
+
+
+### Bug Fixes
+
+* **harness:** assert cross-channel (yield vs auto-send) conformance equivalence [AGX1-373] ([#414](https://github.com/scaleapi/scale-agentex-python/issues/414)) ([694960f](https://github.com/scaleapi/scale-agentex-python/commit/694960f913b8ba521d9236e876e5e00f57a3a3ff))
+
 ## 0.14.0 (2026-06-22)
 
 Full Changelog: [agentex-client-v0.13.1...agentex-client-v0.14.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-client-v0.13.1...agentex-client-v0.14.0)

diff --git a/adk/CHANGELOG.md b/adk/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 0.14.0 (2026-06-23)
+
+Full Changelog: [agentex-sdk-v0.13.2...agentex-sdk-v0.14.0](https://github.com/scaleapi/scale-agentex-python/compare/agentex-sdk-v0.13.2...agentex-sdk-v0.14.0)
+
+### Features
+
+* **harness:** public adk facade + docs for the unified harness surface (PR 9) ([#423](https://github.com/scaleapi/scale-agentex-python/issues/423)) ([fa60632](https://github.com/scaleapi/scale-agentex-python/commit/fa60632f9be84315a3fdc627745ae5b605994bd8))
+
 ## 0.13.2 (2026-06-22)
 
 Full Changelog: [agentex-sdk-v0.13.1...agentex-sdk-v0.13.2](https://github.com/scaleapi/scale-agentex-python/compare/agentex-sdk-v0.13.1...agentex-sdk-v0.13.2)

diff --git a/adk/docs/harness.md b/adk/docs/harness.md
@@ -0,0 +1,196 @@
+# Unified Harness Surface
+
+The unified harness surface gives every agent harness (pydantic-ai, LangGraph, OpenAI Agents, and future parsers) a single, shared path to streaming, message persistence, and tracing. The Agentex `StreamTaskMessage*` event stream is the canonical wire format. A harness tap produces that stream once; the shared machinery delivers it and derives spans from it.
+
+All public names are re-exported from `agentex.lib.adk`:
+
+```python
+from agentex.lib.adk import (
+    UnifiedEmitter,
+    SpanTracer,
+    TurnUsage,
+    TurnResult,
+    HarnessTurn,
+    StreamTaskMessage,
+    OpenSpan,
+    CloseSpan,
+    SpanSignal,
+)
+```
+
+The implementation lives at `src/agentex/lib/core/harness/`.
+
+---
+
+## The canonical stream: `StreamTaskMessage`
+
+`StreamTaskMessage` is a union of the four wire-protocol update types:
+
+```
+StreamTaskMessageStart  - opens a content slot (text, reasoning, tool request, ...)
+StreamTaskMessageDelta  - appends a token/fragment to an open slot
+StreamTaskMessageFull   - posts a complete message in one shot (tool response, ...)
+StreamTaskMessageDone   - closes an open slot
+```
+
+Every harness tap produces a sequence of these. Everything downstream (delivery, tracing) reads the same sequence.
+
+---
+
+## Per-harness taps: `convert_<harness>_to_agentex_events`
+
+A tap is an async generator that translates the harness's native event stream into `StreamTaskMessage*` events. The currently shipped taps are:
+
+| Harness | Tap function | Exported from |
+|---|---|---|
+| pydantic-ai | `convert_pydantic_ai_to_agentex_events` | `agentex.lib.adk` |
+| LangGraph | `convert_langgraph_to_agentex_events` | `agentex.lib.adk` |
+
+Taps for claude-code and codex will be added in subsequent PRs (AGX1-420, AGX1-421) and exported from `agentex.lib.adk` in the same way.
+
+---
+
+## `HarnessTurn` protocol
+
+`HarnessTurn` is the interface a harness turn object must satisfy to plug into `UnifiedEmitter`:
+
+```python
+@runtime_checkable
+class HarnessTurn(Protocol):
+    @property
+    def events(self) -> AsyncIterator[StreamTaskMessage]: ...
+
+    def usage(self) -> TurnUsage: ...
+```
+
+`events` is the canonical stream for this turn. `usage()` is valid only after `events` is exhausted (async generators cannot cleanly return a value to the consumer, so usage travels out-of-band).
+
+---
+
+## `TurnUsage`
+
+Token counts and cost for one turn, harness-independent:
+
+```python
+class TurnUsage(BaseModel):
+    model: str | None = None
+    input_tokens: int | None = None
+    output_tokens: int | None = None
+    cached_input_tokens: int | None = None
+    reasoning_tokens: int | None = None
+    total_tokens: int | None = None
+    cost_usd: float | None = None
+    duration_ms: int | None = None
+    num_llm_calls: int = 0
+    num_tool_calls: int = 0
+    num_reasoning_blocks: int = 0
+```
+
+Field names align with `agentex.lib.core.observability.llm_metrics` for easy conversion.
+
+---
+
+## `UnifiedEmitter`
+
+`UnifiedEmitter` ties a turn's canonical stream, tracing context, and delivery mode together. Construct one per turn with the task/trace context from the request:
+
+```python
+emitter = UnifiedEmitter(
+    task_id=params.task.id,
+    trace_id=params.task.id,   # or None to disable tracing
+    parent_span_id=turn_span.id if turn_span else None,
+)
+```
+
+**Tracing is on by default** when `trace_id` is provided. To disable it explicitly, pass `tracer=False`. To inject a custom `SpanTracer` (e.g. in tests), pass it as `tracer=<instance>`.
+
+### Delivery mode 1: `yield_turn` (sync HTTP ACP)
+
+For sync ACP agents that return events directly over the HTTP response:
+
+```python
+@acp.on_message_send
+async def handle(params):
+    turn = MyHarnessTurn(params)          # implements HarnessTurn
+    async for event in emitter.yield_turn(turn):
+        yield event
+```
+
+`yield_turn` forwards each event to the caller and traces spans as a side effect. It is a passthrough when `tracer` is `None`.
+
+### Delivery mode 2: `auto_send_turn` (async/Temporal)
+
+For async or Temporal agents that push to the task stream via Redis:
+
+```python
+result: TurnResult = await emitter.auto_send_turn(turn, created_at=workflow.now())
+```
+
+`auto_send_turn` drives `adk.streaming` contexts for every message in the stream, derives and records spans, and returns a `TurnResult` with the final text and usage. Pass `created_at` under Temporal to back-date message timestamps deterministically.
+
+---
+
+## `TurnResult`
+
+```python
+class TurnResult(BaseModel):
+    final_text: str = ""
+    usage: TurnUsage = TurnUsage()
+```
+
+Returned by `auto_send_turn`. `final_text` is the last text segment of the turn (multi-step runs return only the final segment, matching `stream_langgraph_events` / `stream_pydantic_ai_events` semantics).
+
+---
+
+## Tracing: span derivation
+
+Spans are derived from the canonical stream by `SpanDeriver` (pure, no `adk` dependency) and dispatched to `adk.tracing` by `SpanTracer`. The mapping:
+
+- `StreamTaskMessageStart(ToolRequestContent)` + `StreamTaskMessageDone` on that index -> tool span open (keyed by `tool_call_id`)
+- `StreamTaskMessageFull(ToolResponseContent)` whose `tool_call_id` was opened -> tool span close
+- `StreamTaskMessageFull(ToolRequestContent)` (harnesses that emit tool calls as Full) -> opens a tool span; matching `Full(ToolResponseContent)` closes it
+- `StreamTaskMessageStart(ReasoningContent)` + `StreamTaskMessageDone` -> reasoning span
+
+`SpanTracer` is `SpanDeriver`'s consumer. You can inject a custom `SpanTracer` via `UnifiedEmitter(tracer=<instance>)` for advanced use or testing.
+
+---
+
+## Usage examples by channel
+
+### Sync ACP (pydantic-ai tap)
+
+```python
+import agentex.lib.adk as adk
+from agentex.lib.adk import UnifiedEmitter, convert_pydantic_ai_to_agentex_events
+
+@acp.on_message_send
+async def handle(params):
+    task_id = params.task.id
+    async with adk.tracing.span(trace_id=task_id, name="message", ...) as turn_span:
+        emitter = UnifiedEmitter(
+            task_id=task_id,
+            trace_id=task_id,
+            parent_span_id=turn_span.id if turn_span else None,
+        )
+        tap = convert_pydantic_ai_to_agentex_events(pydantic_stream)
+        # wrap tap in a HarnessTurn then yield_turn, or yield directly:
+        async for event in tap:
+            yield event
+```
+
+For the pre-unified sync path the tap is still yielded directly; `UnifiedEmitter.yield_turn` is the forward-looking integration point when a `HarnessTurn` wrapper is available.
+
+### Async Temporal (auto-send)
+
+```python
+from agentex.lib.adk import UnifiedEmitter
+
+emitter = UnifiedEmitter(
+    task_id=task_id,
+    trace_id=task_id,
+    parent_span_id=parent_span_id,
+)
+result = await emitter.auto_send_turn(turn, created_at=workflow.now())
+# result.final_text — last text segment
+# result.usage     — TurnUsage (tokens, cost, ...)
+```
diff --git a/adk/pyproject.toml b/adk/pyproject.toml
@@ -4,7 +4,7 @@
 # (agentex/{__init__.py, _*.py, types/, resources/}) ships from the slim
 # sibling package `agentex-client` which is pinned as a runtime dep.
 name = "agentex-sdk"
-version = "0.13.2"
+version = "0.14.0"
 description = "Agent Development Kit (ADK) overlay for the Agentex API — FastACP server, Temporal workflows, LLM provider integrations, observability"
 license = "Apache-2.0"
 authors = [